Commit 202c32e5 by Matthieu Boileau

### Double/single precision

parent 061146b8
 ... @@ -28,37 +28,35 @@ _ivplot = 0 ... @@ -28,37 +28,35 @@ _ivplot = 0 _nx = 256 _nx = 256 _ny = 256 _ny = 256 Lx = 1 _Lx = 1 Ly = 1 _Ly = 1 _dx = Lx / _nx _dy = Ly / _ny # transport velocity # transport velocity vel = np.array([1., 1.]) vel = np.array([1., 1.]) # lattice speed _vmax = 20. # time stepping _Tmax = 10. / _vmax _Tmax = 2. _Tmax = 2. cfl = 1 _dt = cfl * _dx / _vmax ############# end of default values ############# end of default values def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, Tmax = _Tmax, vmax = _vmax, Tmax = _Tmax, dx = _dx, dy = _dy, Lx = _Lx, Ly = _Ly, dt = _dt, animate = False, animate = False, interactive=True): interactive=True, precision="single"): dx = Lx / nx dy = Ly / ny # lattice speed vmax = 20. # time stepping cfl = 1 dt = cfl * dx / vmax ff = "_F" ff = "_F" ... @@ -75,7 +73,18 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -75,7 +73,18 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, source = source.replace("_vy_", "("+str(vel[1])+ ff + ")") source = source.replace("_vy_", "("+str(vel[1])+ ff + ")") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") source = source.replace("_F", "") if precision == "double": source = source.replace("_F", "") source = source.replace("_real_", "double") np_real = 'float64' dtype = np.float64 else: print("prec:", precision) source = source.replace("_F", "f") source = source.replace("_real_", "float") np_real = 'float32' dtype = np.float32 #print(source) #print(source) #exit(0) #exit(0) ... @@ -90,9 +99,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -90,9 +99,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, # create OpenCL buffers # create OpenCL buffers fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float64').itemsize)) size=(4 * m * nx * ny * np.dtype(np_real).itemsize)) fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float64').itemsize)) size=(4 * m * nx * ny * np.dtype(np_real).itemsize)) # create a queue (for submitting opencl operations) # create a queue (for submitting opencl operations) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) ... @@ -111,7 +120,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -111,7 +120,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, t = 0 t = 0 iter = 0 iter = 0 elapsed = 0.; elapsed = 0.; fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64) fn_cpu = np.empty((4 * m * nx * ny, ), dtype = dtype) if animate: if animate: fig = plt.gcf() fig = plt.gcf() ... ...
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define real double #define real _real_ #define _NX _nx_ #define _NX _nx_ #define _NY _ny_ #define _NY _ny_ ... ...