Commit 4e01a62a authored by ph's avatar ph

add transport solver

parent 92eaefb2
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt
# number of conservative variables
m = 1
# boundary
wbord = 0.
# grid size
nx = 50
ny = 50
Lx = 1.
Ly = 1.
dx = Lx / nx
dy = Ly / ny
# transport velocity
vel = np.array([1., 1.])
vmax = np.sqrt(vel[0]**2+vel[1]**2)
# time stepping
Tmax = 1
cfl = 0.8
iterplot = 5000
dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax
def fluxnum(wL, wR, vnorm):
vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1]
vnp = 0
if vdotn > 0:
vnp = vdotn
vnm = vdotn - vnp
flux = vnm * wR + vnp * wL
return flux
def exact_sol(xy, t):
x = xy[0] - t * vel[0] - 0.5
y = xy[1] - t * vel[1] - 0.5
d2 = x * x + y *y
w = np.exp(-30*d2)
return w
def solve_python(wn):
#init
t = 0.
wn[:] = wbord
for i in range(1,nx-1):
for j in range(1,ny-1):
xy=[i*dx+dx/2,j*dy+dy/2]
wn[i,j] = exact_sol(xy, t)
#print("i j", xy,wn[i,j])
wnp1 = np.zeros((nx,ny))
np.copyto(wnp1, wn)
iter = 0
while t < Tmax:
t = t+dt
iter += 1
print("iter=",iter, " t=",t)
for i in range(1,nx-1):
for j in range(1,ny-1):
wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i+1,j],np.array([1,0]))
wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i-1,j],np.array([-1,0]))
wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j+1],np.array([0,1]))
wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j-1],np.array([0,-1]))
np.copyto(wn, wnp1)
source = open("transport_kernels.cl", "r").read()
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+"f)")
source = source.replace("_dy_", "("+str(dy)+"f)")
source = source.replace("_dt_", "("+str(dt)+"f)")
source = source.replace("_m_", "("+str(m)+")")
source = source.replace("_wbord_", "("+str(wbord)+"f)")
source = source.replace("_vx_", "("+str(vel[0])+"f)")
source = source.replace("_vy_", "("+str(vel[1])+"f)")
#print(source)
#exit(0)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
# taille = 2**8
#
# x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize))
# y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize))
#
# z_cpu = np.empty((taille, ), dtype = np.float32)
# z_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, z_cpu.nbytes)
#
# verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32)
wn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
wn_cpu = np.empty((nx * ny, ), dtype = np.float32)
wnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
prg = cl.Program(ctx, source).build()
# init data
event = prg.init_sol(queue, (nx * ny, ), (32, ), wn_gpu).wait()
# time loop on cpu
# time loop on gpu
t = 0
iter = 0
while t < Tmax:
t = t + dt
iter = iter + 1
prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait()
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
print("iter=",iter, " t=",t)
if iter % iterplot == 0:
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu, (nx, ny))
plt.clf()
plt.imshow(wplot)
plt.pause(0.01)
if iter % 2 != 0:
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
# prg.K2(queue, (taille, ), None, y_gpu).wait()
# prg.K3(queue, (taille, ), None , x_gpu, y_gpu, z_gpu, wait_for = [event]).wait()
#
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
#
wplot = np.reshape(wn_cpu,(nx, ny))
plt.clf()
plt.imshow(wplot)
plt.show()
solve_python(wplot)
plt.clf()
plt.imshow(wplot[1:nx-1,1:ny-1])
plt.show()
#define _NX _nx_
#define _NY _ny_
#define _DX _dx_
#define _DY _dy_
#define _DT _dt_
#define _M _m_
#define _VX _vx_
#define _VY _vy_
#define _WBORD _wbord_
#define _VOL (_DX * _DY)
__constant int dir[4][2] = { {1, 0}, {-1, 0},
{0, 1}, {0, -1}};
__constant float ds[4] = { _DY, _DY, _DX, _DX };
void fluxnum(float *wL, float *wR, float* vnorm, float* flux){
float vdotn = _VX * vnorm[0] + _VY * vnorm[1];
float vnp = vdotn > 0 ? vdotn : 0;
float vnm = vdotn - vnp;
for(int iv = 0; iv < _M; iv++){
flux[iv] = vnm * wR[iv] + vnp * wL[iv];
}
}
void exact_sol(float* xy, float t, float* w){
float x = xy[0] - t * _VX - 0.5f;
float y = xy[1] - t * _VY - 0.5f;
float d2 = x * x + y *y;
for(int iv = 0; iv < _M; iv++)
w[0] = exp(-30*d2);
}
__kernel void init_sol(__global float *wn){
int id = get_global_id(0);
int i = id % _NX;
int j = id / _NX;
int ngrid = _NX * _NY;
float wnow[_M];
float t = 0;
float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
exact_sol(xy, t, wnow);
//printf("x=%f, y=%f \n",xy[0],xy[1]);
// load middle value
for(int iv = 0; iv < _M; iv++){
int imem = i + j * _NX + iv * ngrid;
wn[imem] = wnow[iv];
}
}
__kernel void time_step(__global float *wn, __global float *wnp1){
int id = get_global_id(0);
int i = id % _NX;
int j = id / _NX;
int ngrid = _NX * _NY;
float wnow[_M];
float wnext[_M];
// load middle value
for(int iv = 0; iv < _M; iv++){
int imem = i + j * _NX + iv * ngrid;
wnow[iv] = wn[imem];
wnext[iv] = wnow[iv];
}
float flux[_M];
// loop on all directions:
// idir = 0 (east)
// idir = 1 (west)
// idir = 2 (north)
// idir = 3 (south)
for(int idir = 0; idir < 4; idir++){
float vn[2];
vn[0] = dir[idir][0];
vn[1] = dir[idir][1];
int iR = i + dir[idir][0];
int jR = j + dir[idir][1];
// load neighbour values if accessible
float wR[_M];
for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD;
if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){
for(int iv = 0; iv < _M; iv++){
int imem = iv * ngrid + iR + jR * _NX;
wR[iv] = wn[imem];
}
}
fluxnum(wnow, wR, vn, flux);
// time evolution
for(int iv = 0; iv < _M; iv++){
wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv];
}
}
// copy wnext into global memory
for(int iv = 0; iv < _M; iv++){
int imem = iv * ngrid + i + j * _NX;
wnp1[imem] = wnext[iv];
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment