diff --git a/lbm_cl.py b/lbm_cl.py index 53fb8fa9ce85cc979e8b6fa2d4106c1054830437..c8e525e8c74daca08bf207ae75b8b7814de97ee1 100755 --- a/lbm_cl.py +++ b/lbm_cl.py @@ -28,37 +28,35 @@ _ivplot = 0 _nx = 256 _ny = 256 -Lx = 1 -Ly = 1 - - -_dx = Lx / _nx -_dy = Ly / _ny +_Lx = 1 +_Ly = 1 # transport velocity vel = np.array([1., 1.]) -# lattice speed -_vmax = 20. - - -# time stepping -_Tmax = 10. / _vmax _Tmax = 2. -cfl = 1 -_dt = cfl * _dx / _vmax ############# end of default values def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, - Tmax = _Tmax, vmax = _vmax, - dx = _dx, dy = _dy, - dt = _dt, + Tmax = _Tmax, + Lx = _Lx, Ly = _Ly, animate = False, - interactive=True): + interactive=True, + precision="single"): + dx = Lx / nx + dy = Ly / ny + + # lattice speed + vmax = 20. + + # time stepping + cfl = 1 + + dt = cfl * dx / vmax ff = "_F" @@ -75,7 +73,18 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, source = source.replace("_vy_", "("+str(vel[1])+ ff + ")") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") - source = source.replace("_F", "") + + if precision == "double": + source = source.replace("_F", "") + source = source.replace("_real_", "double") + np_real = 'float64' + dtype = np.float64 + else: + print("prec:", precision) + source = source.replace("_F", "f") + source = source.replace("_real_", "float") + np_real = 'float32' + dtype = np.float32 #print(source) #exit(0) @@ -90,9 +99,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, # create OpenCL buffers fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, - size=(4 * m * nx * ny * np.dtype('float64').itemsize)) + size=(4 * m * nx * ny * np.dtype(np_real).itemsize)) fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, - size=(4 * m * nx * ny * np.dtype('float64').itemsize)) + size=(4 * m * nx * ny * np.dtype(np_real).itemsize)) # create a queue (for submitting opencl operations) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) @@ -111,7 +120,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, t = 0 iter = 0 elapsed = 0.; - fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64) + fn_cpu = np.empty((4 * m * nx * ny, ), dtype = dtype) if animate: fig = plt.gcf() diff --git a/lbm_kernels.cl b/lbm_kernels.cl index 386d4946b3ee5da32dc1ca43da48698821ea34d6..4e9965c27a1dc50951824520d2fc3aaa0df5f531 100644 --- a/lbm_kernels.cl +++ b/lbm_kernels.cl @@ -1,7 +1,7 @@ #pragma OPENCL EXTENSION cl_khr_fp64 : enable -#define real double +#define real _real_ #define _NX _nx_ #define _NY _ny_ diff --git a/orszag-tang.ipynb b/orszag-tang.ipynb index 0ee8a5005996908ca95308ca68ad68d1c6bfa2e6..ae0de6524ae781996c4b2bd826d8d11e2836ac9c 100644 --- a/orszag-tang.ipynb +++ b/orszag-tang.ipynb @@ -9,17 +9,6 @@ "## Orszag-Tang vortex on `gpu3.math.unistra.fr`" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from lbm_cl import solve_ocl\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -90,17 +79,73 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": false + }, "outputs": [], "source": [ + "from lbm_cl import solve_ocl\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "%matplotlib notebook\n", - "%timeit wplot_gpu = solve_ocl(animate=False)\n", + "%config InlineBackend.figure_format = 'retina'\n", + "%time wplot_gpu = solve_ocl(nx=2048, ny=2048, animate=True, precision=\"single\")\n", "plt.clf()\n", - "plt.imshow(np.sum(wplot_gpu[:, 0, :, :],axis=0))\n", + "plt.imshow(np.sum(wplot_gpu[:, 0, :, :], axis=0))\n", "plt.gca().invert_yaxis()\n", "plt.colorbar()\n", "plt.show()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compare execution time with CPU\n", + "\n", + "Final physical time is set to 0.5 \n", + "\n", + "#### GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Tmax = 0.1\n", + "N = 512\n", + "%env PYOPENCL_CTX=0:1\n", + "result_gpu = %timeit -o wplot = solve_ocl(nx=N, ny=N, animate=False, Tmax=Tmax, precision=\"single\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env PYOPENCL_CTX=0:4\n", + "result_cpu = %timeit -o wplot = solve_ocl(nx=N, ny=N, animate=False, Tmax=Tmax, precision=\"single\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"GPU [s]: {:f}\".format(result_gpu.best))\n", + "print(\"CPU [s]: {:f}\".format(result_cpu.best))\n" + ] } ], "metadata": {