Commit 5c11b3d4 authored by Romane Helie's avatar Romane Helie

equivalent equation

parent 48bf31ee
# How to run `lbm_cl.py` within a remote Jupyter notebook
- Connect to the remote machine, say `gpu3`, without X redirection:
```bash
ssh gpu3.math.unistra.fr
```
- First time: set your jupyter configuration
```bash
jupyter notebook --generate-config
```
Add these lines to your `~/.jupyter/jupyter_notebook_config.py` file:
```bash
c.NotebookApp.ip = 'gpu3.math.unistra.fr'
c.NotebookApp.open_browser = False
```
If you want to secure your notebook with a password, follow [these instructions](http://jupyter-notebook.readthedocs.io/en/stable/public_server.html#preparing-a-hashed-password).
- In `patapon` directory, run:
```bash
~/patapon$ jupyter-notebook
[I 17:42:03.286 NotebookApp] Serving notebooks from local directory: /ssd/home/boileau/patapon
[I 17:42:03.287 NotebookApp] 0 active kernels
[I 17:42:03.287 NotebookApp] The Jupyter Notebook is running at:
[I 17:42:03.287 NotebookApp] http://gpu3.math.unistra.fr:8888/
[I 17:42:03.287 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
[I 17:42:13.375 NotebookApp] 302 GET / (130.79.4.11) 1.15ms
```
- In your web browser, connect to the URL (here: <http://gpu3.math.unistra.fr:8888/>) and open `orszag-tang.ipynb`
- You may also run directly:
```
~/patapon$ jupyter-notebook orszag-tang.ipynb
```
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Resolution of a transport equation by the finite volume method on regular grid
"""
from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt
import h5py
from jinja2 import Template
import os
import sys
sys.path.append('..')
from utils import Figure, load_kernel, parse_cl_args, get_ite_title
# Default values
# number of conservative variables
_m = 7
# number of kinetic variables
_n = 4 * _m
_ivplot = 2
# grid size
_nx = 256
_ny = 256
_Lx = 6 #12 #6 #1
_Ly = 6 #12 #6 #1
# transport velocity
vel = np.array([1., 1.])
_Tmax = 1.
eRho = 0
eU = 1
eE = 2
eV = 3
eBy = 4
eBx = 5
ePsi = 6
H5_DIR = "h5"
def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
animate=False, precision="single",
savekineticdata=False,
savemacrodata=False):
def dump_h5(filename, t, fn_cpu, kinetic_cpu):
# Create HDF5 file
h5filename = filename + ".h5"
h5filepath = os.path.join(H5_DIR, h5filename)
fdata = h5py.File(h5filepath, 'w')
fdata.attrs['file_name'] = h5filename
fdata.attrs['file_time'] = t
# heavydata = f.create_group('/')
data = np.reshape(fn_cpu, (4, m, ncprx, ncpry))
if savekineticdata:
for iv in range(4):
for iw in range(m):
indic = iw * 4 + iv
fdata.create_dataset('f{}'.format(indic), data=data[iv, iw, :, :])
if savemacrodata:
for iw in macrotobedump:
iwdata = np.sum(data[:, iw, :, :], axis=0)
indicname = 'w' + str(iw)
fdata.create_dataset(indicname, data=iwdata)
kineticplot = np.reshape(kinetic_cpu, (ncprx, ncpry))
fdata.create_dataset('kinetic', data=kineticplot)
fdata.close()
xdmffilename = filename + ".xmf"
xmdffilepath = os.path.join(H5_DIR, xdmffilename)
dcprx = Lx / ncprx
dcpry = Ly / ncpry
# Create corresponding xml file
ffield = open(xmdffilepath, 'w')
with open("template.xml") as xmf_file:
template = Template(xmf_file.read())
xmdffile = template.render(m=m, t=t, ncprx=ncprx, ncpry=ncpry, dcprx=dcprx, dcpry=dcpry,
h5filename=h5filename,
savekineticdata=savekineticdata,
savemacrodata=savemacrodata,
macrotobedump=macrotobedump)
ffield.write(xmdffile)
ffield.close()
# Create H5 dir if not exists
if (savekineticdata or savemacrodata) and not os.path.exists(H5_DIR):
os.makedirs(H5_DIR)
dx = Lx / nx
dy = Ly / ny
# lattice speed
vmax = 10 #10 #320. #20.
#10 should be the lamda according to the hyperbolic waves
# time stepping
cfl = 1
dt = cfl * dx / vmax
#dt /= 2
# compression for output data
ncpr = 10000 #1000 #1024
testname = "smooth-vortex_"
# testname = "orszag_"
# testname = "tilt-p1_"
ncprx = nx
ncpry = ny
if nx > ncpr:
ncprx = ncpr
ncpry = ncpr
# For plotting
x = np.linspace(0., Lx, num=ncprx)
y = np.linspace(0., Ly, num=ncpry)
parameters = {'nx': nx,
'ny': ny,
'dx': dx,
'dy': dy,
'dt': dt,
'm': m,
'n': n,
'vx': vel[0],
'vy': vel[1],
'lambda': vmax,
'ncpr': ncpr,
}
np_real, source = load_kernel("lbm_kernels_light.cl", parameters, module_file=__file__, precision=precision,
print_source=True)
# OpenCL init
ctx = cl.create_some_context()
mf = cl.mem_flags
# compile OpenCL C program
prg = cl.Program(ctx, source).build(options="")
# create OpenCL buffers
buffer_size = 4 * m * nx * ny * np.dtype(np_real).itemsize
kinetic_buffer_size = ncprx * ncpry * np.dtype(np_real).itemsize
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
kinetic_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=kinetic_buffer_size)
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
# divb_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
if nx > ncpr:
fn_cpr_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * ncprx * ncpry * np.dtype(np_real).itemsize))
# create a queue (for submitting opencl operations)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
# init data
# event = prg.init_sol(queue, (nx * ny, ), None, fn_gpu)
# event = prg.init_nappes(queue, (nx * ny, ), None, fn_gpu)
# event = prg.init_tilt(queue, (nx * ny, ), None, fn_gpu)
event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fn_gpu)
event.wait()
# event = prg.init_sol(queue, (nx * ny, ), None, fnp1_gpu)
# event = prg.init_nappes(queue, (nx * ny, ), None, fnp1_gpu)
# event = prg.init_tilt(queue, (nx * ny, ), None, fnp1_gpu)
event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fnp1_gpu)
event.wait()
queue.finish()
# number of animation frames
nbplots = 8 #120
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
macrotobedump = eRho, eBy, eBx, ePsi
# time loop
t = 0
ite = 0
elapsed = 0.
fn_cpu = np.zeros((4 * m * ncprx * ncpry, ), dtype=np_real)
kinetic_cpu = np.zeros((1 * ncprx * ncpry, ), dtype=np_real)
# divb_cpu = np.zeros((1 * nx * ny, ), dtype=np_real)
if animate:
plot_title = r"$n_x = {}, n_y = {}$".format(nx, ny)
# outputname = "fig/"+testname+str(nx)+"_0.png"
# plt.savefig(outputname)
# fig = Figure(title=plot_title,
# levels=np.linspace(1., 7., 64))
fig = Figure(title=plot_title)
print("start OpenCL computations...")
while t < Tmax + dt:
# print(itermax, iterplot, ite, " :: ", t)
ite_title = get_ite_title(ite, t, elapsed)
# plt.ioff()
if animate:
if ite % iterplot == 0:
if nx > ncpr:
event = prg.compress(queue, (nx*ny, ), None, fn_gpu, fn_cpr_gpu)
event.wait()
cl.enqueue_copy(queue, fn_cpu, fn_cpr_gpu).wait()
else:
cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
wplot = np.reshape(fn_cpu, (4, m, ncprx, ncpry))
# event = prg.div_b(queue, (nx * ny, ), None, fn_gpu, divb_gpu).wait()
# cl.enqueue_copy(queue, divb_cpu, divb_gpu).wait()
#
# plt.clf()
# #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
# fig.suptitle(ite_title)
# plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis=0), cmap=cm.jet, extent=[0, Lx, 0, Ly])
# plt.colorbar()
#
# #cset = contour(np.sum(wplot[:, _ivplot, :, :], axis=0),np.arange(-1,1,0.2),linewidths=1, colors='k')
# #clabel(cset,inline=True,fmt='%1.1f',fontsize=10)
#
# strm = streamplot(np.arange(0,Lx,dx), np.arange(0,Ly,dy), -np.sum(wplot[:, 7, :, :], axis=0),
# np.sum(wplot[:, 5, :, :], axis=0), linewidth=1, color='k', arrowstyle='-',
# density=6)
# #start_points=strm_points.T)
#
# #plt.imshow(np.reshape(divb_cpu,(nx, ny)))
# #plt.gca().invert_yaxis()
# #plt.colorbar()
# plt.title(plot_title)
# plt.clim(0,6)
#
# if rcParams['backend'] == "nbAgg":
# fig.canvas.draw()
# else:
# plt.pause(1e-6)
#
# outputname = "fig/"+testname+str(nx)+"_"+str(ite)+".png"
# plt.savefig(outputname)
fig.update(x, y, np.sum(wplot[:, _ivplot, :, :], axis=0), suptitle=ite_title, cb=ite == 0)
else:
print(ite_title, end='\r')
if savekineticdata or savemacrodata:
if ite % iterplot == 0:
# print(itermax, iterplot, ite, " :: ", t)
event = prg.kinetic(queue, (nx * ny, ), None, fn_gpu, kinetic_gpu).wait()
cl.enqueue_copy(queue, kinetic_cpu, kinetic_gpu).wait()
if nx > ncpr:
event = prg.compress(queue, (nx*ny, ), None, fn_gpu, fn_cpr_gpu)
event.wait()
cl.enqueue_copy(queue, fn_cpu, fn_cpr_gpu).wait()
else:
cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
filename = "{}{}_{}".format(testname, nx, ite)
dump_h5(filename, t, fn_cpu, kinetic_cpu)
t += dt
event = prg.time_step(queue, (nx * ny, ), None, fn_gpu, fnp1_gpu)
event.wait()
elapsed += 1e-9 * (event.profile.end - event.profile.start)
event = prg.shift_step(queue, (nx * ny, ), None, fnp1_gpu, fn_gpu)
event.wait()
queue.finish()
elapsed += 1e-9 * (event.profile.end - event.profile.start)
#event = prg.shift_step(queue, (nx * ny, ), None, fn_gpu, fnp1_gpu)
#event.wait()
#elapsed += 1e-9 * (event.profile.end - event.profile.start)
#event = prg.relax_step(queue, (nx * ny, ), None, fnp1_gpu, fn_gpu)
#event.wait()
#elapsed += 1e-9 * (event.profile.end - event.profile.start)
#event = prg.shift_step(queue, (nx * ny, ), None, fn_gpu, fnp1_gpu)
#event.wait()
#queue.finish()
#elapsed += 1e-9 * (event.profile.end - event.profile.start)
# exchange buffer references for avoiding a copy
#fn_sauv = fnp1_gpu
#fnp1_gpu = fn_gpu
#fn_gpu = fn_sauv
ite += 1
# copy OpenCL data to CPU and return the results
cl.enqueue_copy(queue, fn_cpu, fnp1_gpu).wait()
queue.finish()
wplot_gpu = np.reshape(fn_cpu, (4, m, ncprx, ncpry))
plt.show(block=True)
print(ite_title)
bandwidth = ite*buffer_size/elapsed/1e6
return x, y, wplot_gpu, bandwidth
if __name__ == '__main__':
args = parse_cl_args(n=_nx, L=_Lx, tmax=_Tmax,
description='Solve orszag-tang using LBM on PyOpenCL')
# gpu solve
x, y, wplot_gpu, bandwith = solve_ocl(**vars(args))
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Resolution of a transport equation by the finite volume method on regular grid
"""
from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
from time import sleep
import matplotlib
#matplotlib.use('Agg')
#from matplotlib import rcParams
import matplotlib.pyplot as plt
from pylab import contour, cm, clabel, streamplot
import h5py
import sys
sys.path.append('..')
from utils import Figure, float2str, load_kernel, parse_cl_args, get_ite_title
## Default values
# number of conservative variables
_m = 9
# number of kinetic variables
_n = 4 * _m
_ivplot = 2
# grid size
_nx = 256
_ny = 256
_Lx = 1 #12 #6 #1
_Ly = 1 #12 #6 #1
# transport velocity
vel = np.array([1., 1.])
_Tmax = 1.
def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
animate=False, precision="single",savekineticdata="False",
savemacrodata="False"):
dx = Lx / nx
dy = Ly / ny
# lattice speed
vmax = 20.
#10.
vmax=20.
# time stepping
cfl = 1
dt = cfl * dx / vmax
# compression for output data
ncpr = 10000 #1000 #1024
testname = "smooth-vortex_"
#testname = "orszag_"
#testname = "tilt-p1_"
foldername = "../h5/"
ncprx=nx
ncpry=ny
if (nx>ncpr):
ncprx=ncpr
ncpry=ncpr
# For plotting
x = np.linspace(0., Lx, num=ncprx)
y = np.linspace(0., Ly, num=ncpry)
parameters = {'nx': nx,
'ny': ny,
'dx': dx,
'dy': dy,
'dt': dt,
'm': m,
'n': n,
'vx': vel[0],
'vy': vel[1],
'lambda': vmax,
'ncpr': ncpr,
}
np_real, source = load_kernel("lbm_kernels.cl", parameters, precision=precision, print_source=True,
module_file=__file__)
# OpenCL init
ctx = cl.create_some_context()
mf = cl.mem_flags
# compile OpenCL C program
prg = cl.Program(ctx, source).build(options="")
# create OpenCL buffers
buffer_size = 4 * m * nx * ny * np.dtype(np_real).itemsize
kinetic_buffer_size = ncprx * ncpry * np.dtype(np_real).itemsize
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
kinetic_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=kinetic_buffer_size)
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
#divb_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
if (nx>ncpr):
fn_cpr_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * ncprx * ncpry * np.dtype(np_real).itemsize))
# create a queue (for submitting opencl operations)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
# init data
event = prg.init_sol(queue, (nx * ny, ), None, fn_gpu)
#event = prg.init_nappes(queue, (nx * ny, ), None, fn_gpu)
#event = prg.init_tilt(queue, (nx * ny, ), None, fn_gpu)
#event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fn_gpu)
event.wait()
event = prg.init_sol(queue, (nx * ny, ), None, fnp1_gpu)
#event = prg.init_nappes(queue, (nx * ny, ), None, fnp1_gpu)
#event = prg.init_tilt(queue, (nx * ny, ), None, fnp1_gpu)
#event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fnp1_gpu)
event.wait()
queue.finish()
# number of animation frames
nbplots = 10 #120
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
macrotobedump=[0,2,5,7,8] #[0,1,2,3,5,7,8]
# time loop
t = 0
ite = 0
elapsed = 0.
fn_cpu = np.zeros((4 * m * ncprx * ncpry, ), dtype=np_real)
kinetic_cpu = np.zeros((1 * ncprx * ncpry, ), dtype=np_real)
#divb_cpu = np.zeros((1 * nx * ny, ), dtype=np_real)
if animate:
plot_title = r"$n_x = {}, n_y = {}$".format(nx, ny)
#outputname = "fig/"+testname+str(nx)+"_0.png"
#plt.savefig(outputname)
fig = Figure(title=plot_title,
levels=np.linspace(0., 12., 64))
#fig = Figure(title=plot_title)
print("start OpenCL computations...")
while t < Tmax + dt:
#print(itermax, iterplot, ite, " :: ", t)
ite_title = get_ite_title(ite, t, elapsed)
#plt.ioff()
if animate:
if ite % iterplot == 0:
if (nx>ncpr):
event = prg.compress(queue, (nx*ny, ), None, fn_gpu, fn_cpr_gpu)
event.wait()
cl.enqueue_copy(queue, fn_cpu, fn_cpr_gpu).wait()
else:
cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
wplot = np.reshape(fn_cpu, (4, m, ncprx, ncpry))
#event = prg.div_b(queue, (nx * ny, ), None, fn_gpu, divb_gpu).wait()
#cl.enqueue_copy(queue, divb_cpu, divb_gpu).wait()
#plt.clf()
##plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
#fig.suptitle(ite_title)
#plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis=0), cmap=cm.jet, extent=[0, Lx, 0, Ly])
#plt.colorbar()
#
##cset = contour(np.sum(wplot[:, _ivplot, :, :], axis=0),np.arange(-1,1,0.2),linewidths=1, colors='k')
##clabel(cset,inline=True,fmt='%1.1f',fontsize=10)
#strm = streamplot(np.arange(0,Lx,dx), np.arange(0,Ly,dy), -np.sum(wplot[:, 7, :, :], axis=0), np.sum(wplot[:, 5, :, :], axis=0), linewidth=1, color='k', arrowstyle='-', density=6)
# #start_points=strm_points.T)
#
##plt.imshow(np.reshape(divb_cpu,(nx, ny)))
##plt.gca().invert_yaxis()
##plt.colorbar()
#plt.title(plot_title)
#plt.clim(0,6)
#
#if rcParams['backend'] == "nbAgg":
# fig.canvas.draw()
#else:
# plt.pause(1e-6)
#outputname = "fig/"+testname+str(nx)+"_"+str(ite)+".png"
#plt.savefig(outputname)
fig.update(x, y, np.sum(wplot[:, _ivplot, :, :], axis=0), suptitle=ite_title, cb=ite == 0)
else:
print(ite_title, end='\r')
if (savekineticdata or savemacrodata):
if ite % iterplot == 0:
event = prg.kinetic(queue, (nx * ny, ), None, fn_gpu, kinetic_gpu).wait()
cl.enqueue_copy(queue, kinetic_cpu, kinetic_gpu).wait()
if (nx>ncpr):
event = prg.compress(queue, (nx*ny, ), None, fn_gpu, fn_cpr_gpu)
event.wait()
cl.enqueue_copy(queue, fn_cpu, fn_cpr_gpu).wait()
else:
cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
filename = testname+str(nx)+"_"+str(ite)
h5filename = filename+".h5"