Commit d54bcd0b authored by ph's avatar ph

add transport solver

parent 4e01a62a
...@@ -19,6 +19,9 @@ ny = 50 ...@@ -19,6 +19,9 @@ ny = 50
Lx = 1. Lx = 1.
Ly = 1. Ly = 1.
# number of animation frames
nbplots = 10
dx = Lx / nx dx = Lx / nx
dy = Ly / ny dy = Ly / ny
...@@ -28,13 +31,19 @@ vel = np.array([1., 1.]) ...@@ -28,13 +31,19 @@ vel = np.array([1., 1.])
vmax = np.sqrt(vel[0]**2+vel[1]**2) vmax = np.sqrt(vel[0]**2+vel[1]**2)
# time stepping # time stepping
Tmax = 1 Tmax = 0.5 / vmax
cfl = 0.8 cfl = 0.8
iterplot = 5000
dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
print("itermax=",itermax, " iterplot=",iterplot, "tmax=",itermax*dt)
#exit()
def fluxnum(wL, wR, vnorm): def fluxnum(wL, wR, vnorm):
vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1] vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1]
vnp = 0 vnp = 0
...@@ -61,7 +70,7 @@ def solve_python(wn): ...@@ -61,7 +70,7 @@ def solve_python(wn):
wn[i,j] = exact_sol(xy, t) wn[i,j] = exact_sol(xy, t)
#print("i j", xy,wn[i,j]) #print("i j", xy,wn[i,j])
wnp1 = np.zeros((nx,ny)) wnp1 = np.zeros((nx,ny), dtype='float32')
np.copyto(wnp1, wn) np.copyto(wnp1, wn)
iter = 0 iter = 0
while t < Tmax: while t < Tmax:
...@@ -128,17 +137,20 @@ while t < Tmax: ...@@ -128,17 +137,20 @@ while t < Tmax:
t = t + dt t = t + dt
iter = iter + 1 iter = iter + 1
prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait() prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait()
# exchange buffer references for avoiding a copy
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
print("iter=",iter, " t=",t) print("iter=",iter, " t=",t)
if iter % iterplot == 0: if iter % iterplot == 0:
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu, (nx, ny)) wplot = np.reshape(wn_cpu, (nx, ny))
plt.clf() plt.clf()
plt.imshow(wplot) plt.imshow(wplot,vmin=0, vmax=1)
plt.colorbar()
plt.pause(0.01) plt.pause(0.01)
if iter % 2 != 0: # in case of an odd number of iterations, perform a last exchange
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu #if iter % 2 != 0:
# wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
# prg.K2(queue, (taille, ), None, y_gpu).wait() # prg.K2(queue, (taille, ), None, y_gpu).wait()
...@@ -149,14 +161,22 @@ cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() ...@@ -149,14 +161,22 @@ cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu,(nx, ny)) wplot = np.reshape(wn_cpu,(nx, ny))
plt.clf() plt.clf()
plt.imshow(wplot) plt.imshow(wplot,vmin=0, vmax=1)
plt.colorbar()
plt.show() plt.show()
wplot_gpu = wplot.copy()
solve_python(wplot) solve_python(wplot)
plt.clf() plt.clf()
plt.imshow(wplot[1:nx-1,1:ny-1]) plt.imshow(wplot,vmin=0, vmax=1)
plt.colorbar()
plt.show() plt.show()
plt.clf()
plt.imshow(wplot-wplot_gpu)
plt.colorbar()
plt.show()
...@@ -58,6 +58,9 @@ __kernel void init_sol(__global float *wn){ ...@@ -58,6 +58,9 @@ __kernel void init_sol(__global float *wn){
for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){
int imem = i + j * _NX + iv * ngrid; int imem = i + j * _NX + iv * ngrid;
wn[imem] = wnow[iv]; wn[imem] = wnow[iv];
// boundary values
if (i == 0 || i == _NX - 1 || j == 0 || j == _NY - 1)
wn[imem] = _WBORD;
} }
} }
...@@ -72,50 +75,53 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ ...@@ -72,50 +75,53 @@ __kernel void time_step(__global float *wn, __global float *wnp1){
int i = id % _NX; int i = id % _NX;
int j = id / _NX; int j = id / _NX;
int ngrid = _NX * _NY;
float wnow[_M]; int ngrid = _NX * _NY;
float wnext[_M];
// load middle value float wnow[_M];
for(int iv = 0; iv < _M; iv++){ float wnext[_M];
int imem = i + j * _NX + iv * ngrid;
wnow[iv] = wn[imem]; // load middle value
wnext[iv] = wnow[iv]; for(int iv = 0; iv < _M; iv++){
} int imem = i + j * _NX + iv * ngrid;
wnow[iv] = wn[imem];
wnext[iv] = wnow[iv];
}
float flux[_M]; if (i > 0 && i < _NX - 1 && j > 0 && j < _NY - 1){
float flux[_M];
// loop on all directions:
// idir = 0 (east) // loop on all directions:
// idir = 1 (west) // idir = 0 (east)
// idir = 2 (north) // idir = 1 (west)
// idir = 3 (south) // idir = 2 (north)
for(int idir = 0; idir < 4; idir++){ // idir = 3 (south)
float vn[2]; for(int idir = 0; idir < 4; idir++){
vn[0] = dir[idir][0]; float vn[2];
vn[1] = dir[idir][1]; vn[0] = dir[idir][0];
int iR = i + dir[idir][0]; vn[1] = dir[idir][1];
int jR = j + dir[idir][1]; int iR = i + dir[idir][0];
// load neighbour values if accessible int jR = j + dir[idir][1];
float wR[_M]; // load neighbour values if accessible
for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD; float wR[_M];
if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){ for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD;
// if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){
for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){
int imem = iv * ngrid + iR + jR * _NX; int imem = iv * ngrid + iR + jR * _NX;
wR[iv] = wn[imem]; wR[iv] = wn[imem];
} }
} //}
fluxnum(wnow, wR, vn, flux); fluxnum(wnow, wR, vn, flux);
// time evolution // time evolution
for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){
wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv];
} }
} }
}
// copy wnext into global memory // copy wnext into global memory
for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){
...@@ -123,5 +129,6 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ ...@@ -123,5 +129,6 @@ __kernel void time_step(__global float *wn, __global float *wnp1){
wnp1[imem] = wnext[iv]; wnp1[imem] = wnext[iv];
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment