From d54bcd0bfc314d9c4708d1e26ca45618176734c7 Mon Sep 17 00:00:00 2001 From: ph Date: Sun, 19 Nov 2017 19:18:39 +0100 Subject: [PATCH] add transport solver --- transport_cl.py | 36 +++++++++++++++++----- transport_kernels.cl | 73 ++++++++++++++++++++++++-------------------- 2 files changed, 68 insertions(+), 41 deletions(-) diff --git a/transport_cl.py b/transport_cl.py index 9a1fc35..e2b0ea9 100644 --- a/transport_cl.py +++ b/transport_cl.py @@ -19,6 +19,9 @@ ny = 50 Lx = 1. Ly = 1. +# number of animation frames +nbplots = 10 + dx = Lx / nx dy = Ly / ny @@ -28,13 +31,19 @@ vel = np.array([1., 1.]) vmax = np.sqrt(vel[0]**2+vel[1]**2) # time stepping -Tmax = 1 +Tmax = 0.5 / vmax cfl = 0.8 -iterplot = 5000 dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax +itermax = int(np.floor(Tmax / dt)) +iterplot = int(itermax / nbplots) + +print("itermax=",itermax, " iterplot=",iterplot, "tmax=",itermax*dt) +#exit() + + def fluxnum(wL, wR, vnorm): vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1] vnp = 0 @@ -61,7 +70,7 @@ def solve_python(wn): wn[i,j] = exact_sol(xy, t) #print("i j", xy,wn[i,j]) - wnp1 = np.zeros((nx,ny)) + wnp1 = np.zeros((nx,ny), dtype='float32') np.copyto(wnp1, wn) iter = 0 while t < Tmax: @@ -128,17 +137,20 @@ while t < Tmax: t = t + dt iter = iter + 1 prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait() + # exchange buffer references for avoiding a copy wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu print("iter=",iter, " t=",t) if iter % iterplot == 0: cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() wplot = np.reshape(wn_cpu, (nx, ny)) plt.clf() - plt.imshow(wplot) + plt.imshow(wplot,vmin=0, vmax=1) + plt.colorbar() plt.pause(0.01) -if iter % 2 != 0: - wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu +# in case of an odd number of iterations, perform a last exchange +#if iter % 2 != 0: +# wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu # prg.K2(queue, (taille, ), None, y_gpu).wait() @@ -149,14 +161,22 @@ cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() wplot = np.reshape(wn_cpu,(nx, ny)) plt.clf() -plt.imshow(wplot) +plt.imshow(wplot,vmin=0, vmax=1) +plt.colorbar() plt.show() +wplot_gpu = wplot.copy() + solve_python(wplot) plt.clf() -plt.imshow(wplot[1:nx-1,1:ny-1]) +plt.imshow(wplot,vmin=0, vmax=1) +plt.colorbar() plt.show() +plt.clf() +plt.imshow(wplot-wplot_gpu) +plt.colorbar() +plt.show() diff --git a/transport_kernels.cl b/transport_kernels.cl index 8ebbe78..816be03 100644 --- a/transport_kernels.cl +++ b/transport_kernels.cl @@ -58,6 +58,9 @@ __kernel void init_sol(__global float *wn){ for(int iv = 0; iv < _M; iv++){ int imem = i + j * _NX + iv * ngrid; wn[imem] = wnow[iv]; + // boundary values + if (i == 0 || i == _NX - 1 || j == 0 || j == _NY - 1) + wn[imem] = _WBORD; } } @@ -72,50 +75,53 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ int i = id % _NX; int j = id / _NX; - int ngrid = _NX * _NY; - float wnow[_M]; - float wnext[_M]; + int ngrid = _NX * _NY; - // load middle value - for(int iv = 0; iv < _M; iv++){ - int imem = i + j * _NX + iv * ngrid; - wnow[iv] = wn[imem]; - wnext[iv] = wnow[iv]; - } + float wnow[_M]; + float wnext[_M]; + + // load middle value + for(int iv = 0; iv < _M; iv++){ + int imem = i + j * _NX + iv * ngrid; + wnow[iv] = wn[imem]; + wnext[iv] = wnow[iv]; + } - float flux[_M]; - - // loop on all directions: - // idir = 0 (east) - // idir = 1 (west) - // idir = 2 (north) - // idir = 3 (south) - for(int idir = 0; idir < 4; idir++){ - float vn[2]; - vn[0] = dir[idir][0]; - vn[1] = dir[idir][1]; - int iR = i + dir[idir][0]; - int jR = j + dir[idir][1]; - // load neighbour values if accessible - float wR[_M]; - for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD; - if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){ + if (i > 0 && i < _NX - 1 && j > 0 && j < _NY - 1){ + float flux[_M]; + + // loop on all directions: + // idir = 0 (east) + // idir = 1 (west) + // idir = 2 (north) + // idir = 3 (south) + for(int idir = 0; idir < 4; idir++){ + float vn[2]; + vn[0] = dir[idir][0]; + vn[1] = dir[idir][1]; + int iR = i + dir[idir][0]; + int jR = j + dir[idir][1]; + // load neighbour values if accessible + float wR[_M]; + for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD; + // if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){ for(int iv = 0; iv < _M; iv++){ int imem = iv * ngrid + iR + jR * _NX; wR[iv] = wn[imem]; } - } - fluxnum(wnow, wR, vn, flux); + //} + fluxnum(wnow, wR, vn, flux); - // time evolution - for(int iv = 0; iv < _M; iv++){ - wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; - } + // time evolution + for(int iv = 0; iv < _M; iv++){ + wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; + } - } + } + } // copy wnext into global memory for(int iv = 0; iv < _M; iv++){ @@ -123,5 +129,6 @@ __kernel void time_step(__global float *wn, __global float *wnp1){ wnp1[imem] = wnext[iv]; } + } -- GitLab