Commit 7c8ba864 authored by helluy's avatar helluy

this case works on nvidia but not with a larger mesh

parent 2c9cf8f6
...@@ -25,8 +25,8 @@ _n = 4 * _m ...@@ -25,8 +25,8 @@ _n = 4 * _m
_ivplot = 0 _ivplot = 0
# grid size # grid size
_nx = 256 _nx = 1024
_ny = 256 _ny = 1024
_Lx = 1 _Lx = 1
_Ly = 1 _Ly = 1
...@@ -43,7 +43,7 @@ _Tmax = 2. ...@@ -43,7 +43,7 @@ _Tmax = 2.
def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
Tmax = _Tmax, Tmax = _Tmax,
Lx = _Lx, Ly = _Ly, Lx = _Lx, Ly = _Ly,
animate = False, animate = True,
interactive=True, interactive=True,
precision="single"): precision="single"):
...@@ -94,8 +94,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -94,8 +94,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
mf = cl.mem_flags mf = cl.mem_flags
# compile OpenCL C program # compile OpenCL C program
prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing \ prg = cl.Program(ctx, source).build(options = "")
-cl-fast-relaxed-math")
# create OpenCL buffers # create OpenCL buffers
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, fn_gpu = cl.Buffer(ctx, mf.READ_WRITE,
...@@ -111,6 +110,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -111,6 +110,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu) event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
event.wait() event.wait()
event = prg.init_sol(queue, (nx * ny, ), (32, ), fnp1_gpu)
event.wait()
# number of animation frames # number of animation frames
nbplots = 100 nbplots = 100
itermax = int(np.floor(Tmax / dt)) itermax = int(np.floor(Tmax / dt))
...@@ -130,7 +132,6 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -130,7 +132,6 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
print("start OpenCL computations...") print("start OpenCL computations...")
while t < Tmax: while t < Tmax:
t = t + dt t = t + dt
iter = iter + 1
#event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu) #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu) event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
#event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event]) #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
...@@ -153,6 +154,8 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -153,6 +154,8 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
else: else:
print(title, end='\r') print(title, end='\r')
iter = iter + 1
# copy OpenCL data to CPU and return the results # copy OpenCL data to CPU and return the results
cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
...@@ -162,7 +165,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -162,7 +165,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
if __name__ == '__main__': if __name__ == '__main__':
# gpu solve # gpu solve
wplot_gpu = solve_ocl(animate=False) wplot_gpu = solve_ocl()
#print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
plt.clf() plt.clf()
#plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1) #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
......
#pragma OPENCL EXTENSION cl_khr_fp64 : enable //#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define real _real_ #define real _real_
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment