Commit d54bcd0b by ph

### add transport solver

parent 4e01a62a
 ... @@ -19,6 +19,9 @@ ny = 50 ... @@ -19,6 +19,9 @@ ny = 50 Lx = 1. Lx = 1. Ly = 1. Ly = 1. # number of animation frames nbplots = 10 dx = Lx / nx dx = Lx / nx dy = Ly / ny dy = Ly / ny ... @@ -28,13 +31,19 @@ vel = np.array([1., 1.]) ... @@ -28,13 +31,19 @@ vel = np.array([1., 1.]) vmax = np.sqrt(vel[0]**2+vel[1]**2) vmax = np.sqrt(vel[0]**2+vel[1]**2) # time stepping # time stepping Tmax = 1 Tmax = 0.5 / vmax cfl = 0.8 cfl = 0.8 iterplot = 5000 dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax itermax = int(np.floor(Tmax / dt)) iterplot = int(itermax / nbplots) print("itermax=",itermax, " iterplot=",iterplot, "tmax=",itermax*dt) #exit() def fluxnum(wL, wR, vnorm): def fluxnum(wL, wR, vnorm): vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1] vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1] vnp = 0 vnp = 0 ... @@ -61,7 +70,7 @@ def solve_python(wn): ... @@ -61,7 +70,7 @@ def solve_python(wn): wn[i,j] = exact_sol(xy, t) wn[i,j] = exact_sol(xy, t) #print("i j", xy,wn[i,j]) #print("i j", xy,wn[i,j]) wnp1 = np.zeros((nx,ny)) wnp1 = np.zeros((nx,ny), dtype='float32') np.copyto(wnp1, wn) np.copyto(wnp1, wn) iter = 0 iter = 0 while t < Tmax: while t < Tmax: ... @@ -128,17 +137,20 @@ while t < Tmax: ... @@ -128,17 +137,20 @@ while t < Tmax: t = t + dt t = t + dt iter = iter + 1 iter = iter + 1 prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait() prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait() # exchange buffer references for avoiding a copy wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu print("iter=",iter, " t=",t) print("iter=",iter, " t=",t) if iter % iterplot == 0: if iter % iterplot == 0: cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() wplot = np.reshape(wn_cpu, (nx, ny)) wplot = np.reshape(wn_cpu, (nx, ny)) plt.clf() plt.clf() plt.imshow(wplot) plt.imshow(wplot,vmin=0, vmax=1) plt.colorbar() plt.pause(0.01) plt.pause(0.01) if iter % 2 != 0: # in case of an odd number of iterations, perform a last exchange wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu #if iter % 2 != 0: # wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu # prg.K2(queue, (taille, ), None, y_gpu).wait() # prg.K2(queue, (taille, ), None, y_gpu).wait() ... @@ -149,14 +161,22 @@ cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() ... @@ -149,14 +161,22 @@ cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() wplot = np.reshape(wn_cpu,(nx, ny)) wplot = np.reshape(wn_cpu,(nx, ny)) plt.clf() plt.clf() plt.imshow(wplot) plt.imshow(wplot,vmin=0, vmax=1) plt.colorbar() plt.show() plt.show() wplot_gpu = wplot.copy() solve_python(wplot) solve_python(wplot) plt.clf() plt.clf() plt.imshow(wplot[1:nx-1,1:ny-1]) plt.imshow(wplot,vmin=0, vmax=1) plt.colorbar() plt.show() plt.show() plt.clf() plt.imshow(wplot-wplot_gpu) plt.colorbar() plt.show()
 ... @@ -58,6 +58,9 @@ __kernel void init_sol(__global float *wn){ ... @@ -58,6 +58,9 @@ __kernel void init_sol(__global float *wn){ for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){ int imem = i + j * _NX + iv * ngrid; int imem = i + j * _NX + iv * ngrid; wn[imem] = wnow[iv]; wn[imem] = wnow[iv]; // boundary values if (i == 0 || i == _NX - 1 || j == 0 || j == _NY - 1) wn[imem] = _WBORD; } } } } ... @@ -72,50 +75,53 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ ... @@ -72,50 +75,53 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ int i = id % _NX; int i = id % _NX; int j = id / _NX; int j = id / _NX; int ngrid = _NX * _NY; float wnow[_M]; int ngrid = _NX * _NY; float wnext[_M]; // load middle value float wnow[_M]; for(int iv = 0; iv < _M; iv++){ float wnext[_M]; int imem = i + j * _NX + iv * ngrid; wnow[iv] = wn[imem]; // load middle value wnext[iv] = wnow[iv]; for(int iv = 0; iv < _M; iv++){ } int imem = i + j * _NX + iv * ngrid; wnow[iv] = wn[imem]; wnext[iv] = wnow[iv]; } float flux[_M]; if (i > 0 && i < _NX - 1 && j > 0 && j < _NY - 1){ float flux[_M]; // loop on all directions: // idir = 0 (east) // loop on all directions: // idir = 1 (west) // idir = 0 (east) // idir = 2 (north) // idir = 1 (west) // idir = 3 (south) // idir = 2 (north) for(int idir = 0; idir < 4; idir++){ // idir = 3 (south) float vn[2]; for(int idir = 0; idir < 4; idir++){ vn[0] = dir[idir][0]; float vn[2]; vn[1] = dir[idir][1]; vn[0] = dir[idir][0]; int iR = i + dir[idir][0]; vn[1] = dir[idir][1]; int jR = j + dir[idir][1]; int iR = i + dir[idir][0]; // load neighbour values if accessible int jR = j + dir[idir][1]; float wR[_M]; // load neighbour values if accessible for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD; float wR[_M]; if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){ for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD; // if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){ for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){ int imem = iv * ngrid + iR + jR * _NX; int imem = iv * ngrid + iR + jR * _NX; wR[iv] = wn[imem]; wR[iv] = wn[imem]; } } } //} fluxnum(wnow, wR, vn, flux); fluxnum(wnow, wR, vn, flux); // time evolution // time evolution for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){ wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; } } } } } // copy wnext into global memory // copy wnext into global memory for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){ ... @@ -123,5 +129,6 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ ... @@ -123,5 +129,6 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ wnp1[imem] = wnext[iv]; wnp1[imem] = wnext[iv]; } } } }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment