Commit f2b3fae2 by Matthieu Boileau

### Use lbm_cl in a notebook

parent e3962aa7
.gitignore 0 → 100644
 .ipynb_checkpoints/
 ... @@ -57,7 +57,8 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -57,7 +57,8 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, Tmax = _Tmax, vmax = _vmax, Tmax = _Tmax, vmax = _vmax, dx = _dx, dy = _dy, dx = _dx, dy = _dy, dt = _dt, dt = _dt, animate = False): animate = False, interactive=True): ff = "_F" ff = "_F" ... @@ -76,7 +77,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -76,7 +77,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") source = source.replace("_F", "") source = source.replace("_F", "") print(source) #print(source) #exit(0) #exit(0) ... @@ -113,6 +114,11 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -113,6 +114,11 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, elapsed = 0.; elapsed = 0.; fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64) fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64) if animate: fig = plt.gcf() fig.show() fig.canvas.draw() print("start OpenCL computations...") print("start OpenCL computations...") while t < Tmax: while t < Tmax: t = t + dt t = t + dt ... @@ -124,16 +130,20 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -124,16 +130,20 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, elapsed += 1e-9 * (event.profile.end - event.profile.start) elapsed += 1e-9 * (event.profile.end - event.profile.start) # exchange buffer references for avoiding a copy # exchange buffer references for avoiding a copy fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu print("iter=",iter, " t=",t, "elapsed (s)=",elapsed) title = "iter = {}, t = {:f}, elapsed (s) = {:f}".format(iter, t, elapsed) if iter % iterplot == 0 and animate: if animate: cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() if iter % iterplot == 0: wplot = np.reshape(fn_cpu, (4, m, nx, ny)) cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() plt.clf() wplot = np.reshape(fn_cpu, (4, m, nx, ny)) #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1) plt.clf() plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0)) #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1) plt.gca().invert_yaxis() fig.suptitle(title) plt.colorbar() plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0)) plt.pause(0.01) plt.gca().invert_yaxis() plt.colorbar() fig.canvas.draw() else: print(title, end='\r') # copy OpenCL data to CPU and return the results # copy OpenCL data to CPU and return the results cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() ... @@ -141,28 +151,30 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -141,28 +151,30 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny)) wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny)) return wplot_gpu return wplot_gpu # gpu solve wplot_gpu = solve_ocl(animate = True) if __name__ == '__main__': #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) # gpu solve plt.clf() wplot_gpu = solve_ocl(animate=False) #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1) #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.clf() plt.gca().invert_yaxis() #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1) plt.colorbar() plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.show() plt.gca().invert_yaxis() plt.colorbar() # for iv in range(4): plt.show() # plt.imshow(wplot_gpu[iv,:,:]) # plt.gca().invert_yaxis() # for iv in range(4): # plt.colorbar() # plt.imshow(wplot_gpu[iv,:,:]) # plt.show() # plt.gca().invert_yaxis() # plt.colorbar() # plt.show() # check difference # plt.clf() # plt.imshow(wplot_cpu-wplot_gpu) # check difference # plt.gca().invert_yaxis() # plt.clf() # plt.colorbar() # plt.imshow(wplot_cpu-wplot_gpu) # plt.show() # plt.gca().invert_yaxis() # plt.colorbar() # plt.show()
orszag-tang.ipynb 0 → 100644