Commit 202c32e5 authored by Matthieu Boileau's avatar Matthieu Boileau

Double/single precision

parent 061146b8
......@@ -28,37 +28,35 @@ _ivplot = 0
_nx = 256
_ny = 256
Lx = 1
Ly = 1
_dx = Lx / _nx
_dy = Ly / _ny
_Lx = 1
_Ly = 1
# transport velocity
vel = np.array([1., 1.])
# lattice speed
_vmax = 20.
# time stepping
_Tmax = 10. / _vmax
_Tmax = 2.
cfl = 1
_dt = cfl * _dx / _vmax
############# end of default values
def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
Tmax = _Tmax, vmax = _vmax,
dx = _dx, dy = _dy,
dt = _dt,
Tmax = _Tmax,
Lx = _Lx, Ly = _Ly,
animate = False,
interactive=True):
interactive=True,
precision="single"):
dx = Lx / nx
dy = Ly / ny
# lattice speed
vmax = 20.
# time stepping
cfl = 1
dt = cfl * dx / vmax
ff = "_F"
......@@ -75,7 +73,18 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
if precision == "double":
source = source.replace("_F", "")
source = source.replace("_real_", "double")
np_real = 'float64'
dtype = np.float64
else:
print("prec:", precision)
source = source.replace("_F", "f")
source = source.replace("_real_", "float")
np_real = 'float32'
dtype = np.float32
#print(source)
#exit(0)
......@@ -90,9 +99,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
# create OpenCL buffers
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * nx * ny * np.dtype('float64').itemsize))
size=(4 * m * nx * ny * np.dtype(np_real).itemsize))
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * nx * ny * np.dtype('float64').itemsize))
size=(4 * m * nx * ny * np.dtype(np_real).itemsize))
# create a queue (for submitting opencl operations)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
......@@ -111,7 +120,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
t = 0
iter = 0
elapsed = 0.;
fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64)
fn_cpu = np.empty((4 * m * nx * ny, ), dtype = dtype)
if animate:
fig = plt.gcf()
......
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define real double
#define real _real_
#define _NX _nx_
#define _NY _ny_
......
......@@ -9,17 +9,6 @@
"## Orszag-Tang vortex on `gpu3.math.unistra.fr`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from lbm_cl import solve_ocl\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -90,17 +79,73 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"from lbm_cl import solve_ocl\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"%matplotlib notebook\n",
"%timeit wplot_gpu = solve_ocl(animate=False)\n",
"%config InlineBackend.figure_format = 'retina'\n",
"%time wplot_gpu = solve_ocl(nx=2048, ny=2048, animate=True, precision=\"single\")\n",
"plt.clf()\n",
"plt.imshow(np.sum(wplot_gpu[:, 0, :, :],axis=0))\n",
"plt.imshow(np.sum(wplot_gpu[:, 0, :, :], axis=0))\n",
"plt.gca().invert_yaxis()\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Compare execution time with CPU\n",
"\n",
"Final physical time is set to 0.5 \n",
"\n",
"#### GPU"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Tmax = 0.1\n",
"N = 512\n",
"%env PYOPENCL_CTX=0:1\n",
"result_gpu = %timeit -o wplot = solve_ocl(nx=N, ny=N, animate=False, Tmax=Tmax, precision=\"single\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### CPU"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%env PYOPENCL_CTX=0:4\n",
"result_cpu = %timeit -o wplot = solve_ocl(nx=N, ny=N, animate=False, Tmax=Tmax, precision=\"single\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"GPU [s]: {:f}\".format(result_gpu.best))\n",
"print(\"CPU [s]: {:f}\".format(result_cpu.best))\n"
]
}
],
"metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment