Commit da06e04d authored by Matthieu Boileau's avatar Matthieu Boileau
Browse files

Merge branch 'master' of gitlab.math.unistra.fr:patapon/patapon

parents 72c1b75f 48bf31ee
...@@ -1924,6 +1924,15 @@ MRREVIEWER = {Philip D. Loewen}, ...@@ -1924,6 +1924,15 @@ MRREVIEWER = {Philip D. Loewen},
Year = {2015}, Year = {2015},
Bdsk-Url-1 = {https://tel.archives-ouvertes.fr/tel-01132856}} Bdsk-Url-1 = {https://tel.archives-ouvertes.fr/tel-01132856}}
@incollection{helluy2016asynchronous,
title={Asynchronous OpenCL/MPI numerical simulations of conservation laws},
author={Helluy, Philippe and Strub, Thomas and Massaro, Michel and Roberts, Malcolm},
booktitle={Software for Exascale Computing-SPPEXA 2013-2015},
pages={547--565},
year={2016},
publisher={Springer}
}
@unpublished{helluy:hal-01134222, @unpublished{helluy:hal-01134222,
Author = {Strub, Thomas and Helluy, Philippe and Massaro, Michel and Roberts, Malcolm}, Author = {Strub, Thomas and Helluy, Philippe and Massaro, Michel and Roberts, Malcolm},
File = {helluy-iwocl-2015.pdf:https\://hal.archives-ouvertes.fr/hal-01134222/file/helluy-iwocl-2015.pdf:PDF}, File = {helluy-iwocl-2015.pdf:https\://hal.archives-ouvertes.fr/hal-01134222/file/helluy-iwocl-2015.pdf:PDF},
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#define _LAMBDA _lambda_ #define _LAMBDA _lambda_
#ifndef M_PI #ifndef M_PI
#define M_PI (3.14159265358979323846264338328_F) #define M_PI (3.14159265358979323846264338328)
#endif #endif
#define _VOL (_DX * _DY) #define _VOL (_DX * _DY)
...@@ -64,7 +64,7 @@ __constant real ds[4] = {_DY, _DY, _DX, _DX}; ...@@ -64,7 +64,7 @@ __constant real ds[4] = {_DY, _DY, _DX, _DX};
void flux_phy(const real *W, const real *vn, real *flux) { void flux_phy(const real *W, const real *vn, real *flux) {
real gam = 1.6666666666_F; real gam = 1.6666666666;
real un = (W[eU] * vn[eX] + W[eV] * vn[eY] + W[eW] * vn[eZ]) / W[eRho]; real un = (W[eU] * vn[eX] + W[eV] * vn[eY] + W[eW] * vn[eZ]) / W[eRho];
real bn = W[eBx] * vn[eX] + W[eBy] * vn[eY] + W[eBz] * vn[eZ]; real bn = W[eBx] * vn[eX] + W[eBy] * vn[eY] + W[eBz] * vn[eZ];
...@@ -143,7 +143,7 @@ void f2w(const real *f, real *w) { ...@@ -143,7 +143,7 @@ void f2w(const real *f, real *w) {
// mhd // mhd
void conservatives(real *y, real *w) { void conservatives(real *y, real *w) {
real gam = 1.6666666666_F; real gam = 1.6666666666;
w[eRho] = y[eRho]; // rho w[eRho] = y[eRho]; // rho
w[eU] = y[eRho] * y[eU]; // rho u w[eU] = y[eRho] * y[eU]; // rho u
...@@ -160,7 +160,7 @@ void conservatives(real *y, real *w) { ...@@ -160,7 +160,7 @@ void conservatives(real *y, real *w) {
// orszag tang init data // orszag tang init data
void exact_sol(real *x, real t, real *w) { void exact_sol(real *x, real t, real *w) {
real gam = 1.6666666666_F; real gam = 1.6666666666;
real yL[_M]; real yL[_M];
yL[eRho] = gam * gam; // rho yL[eRho] = gam * gam; // rho
...@@ -176,59 +176,32 @@ void exact_sol(real *x, real t, real *w) { ...@@ -176,59 +176,32 @@ void exact_sol(real *x, real t, real *w) {
conservatives(yL, w); conservatives(yL, w);
} }
// initial condition on the macro data real gauss(real r){
__kernel void init_sol(__global real *fn) {
int id = get_global_id(0);
int i = id % _NX;
int j = id / _NX;
int ngrid = _NX * _NY;
real wnow[_M];
real t = 0;
real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
exact_sol(xy, t, wnow);
real fnow[_N];
w2f(wnow, fnow);
// printf("x=%f, y=%f \n",xy[0],xy[1]); return exp(-r*r/2);
// load middle value
for (int ik = 0; ik < _N; ik++) {
int imem = i + j * _NX + ik * ngrid;
fn[imem] = fnow[ik];
// fn[imem] = j;
}
} }
void exact_smooth_vortex(real *x, real t, real *w) { void exact_smooth_vortex(real *x, real t, real *w) {
#define MHD2D_VORTEX_KAPPA 1.0_F const real gam = 1.6666666666;
#define MHD2D_VORTEX_UREF 0.2_F const real rref2 = 1;
#define MHD2D_VORTEX_MU 1.0_F const real sigmam2 = 0.5;
#define MHD2D_VORTEX_BREF 0.2_F const real kappa = 1;
const real gam = 1.6666666666_F; const real uref = 0.2;
const real rref2 = 1.0_F; const real bref = 0.2;
const real sigmam2 = 0.5_F; const real mu = 1;
const real kappa = MHD2D_VORTEX_KAPPA; const real udrift[2] = {1.0, 1.0};
const real uref = MHD2D_VORTEX_UREF; const real xstart[2] = {10., 10.};
const real bref = MHD2D_VORTEX_BREF;
const real mu = MHD2D_VORTEX_MU;
const real udrift[2] = {1.0_F, 1.0_F};
const real xstart[2] = {5._F, 5._F};
// //
real xrel[3] = {x[0] - uref * udrift[0] * t - xstart[0], real xrel[3] = {x[0] - uref * udrift[0] * t - xstart[0],
x[1] - uref * udrift[1] * t - xstart[1], 0.0_F}; x[1] - uref * udrift[1] * t - xstart[1], 0.0};
// //
real r = sqrt(xrel[0] * xrel[0] + xrel[1] * xrel[1]); real r = sqrt(xrel[0] * xrel[0] + xrel[1] * xrel[1]);
real expf = exp(sigmam2 * (1.0_F - r * r)); real expf = exp(sigmam2) * gauss(r);
// rho // rho
w[eRho] = 1.0_F; w[eRho] = 1.0;
// qx = rho * ux // qx = rho * ux
w[eU] = uref * (udrift[0] - xrel[1] * kappa * expf); w[eU] = uref * (udrift[0] - xrel[1] * kappa * expf);
// qy = rho * uy // qy = rho * uy
...@@ -238,23 +211,26 @@ void exact_smooth_vortex(real *x, real t, real *w) { ...@@ -238,23 +211,26 @@ void exact_smooth_vortex(real *x, real t, real *w) {
// By // By
w[eBy] = bref * xrel[0] * mu * expf; w[eBy] = bref * xrel[0] * mu * expf;
// psi // psi
w[ePsi] = 0.0_F; w[ePsi] = 0.0;
// //
// real T = 1.0; // real T = 1.0;
// real e = T/(gam-1.0); // real e = T/(gam-1.0);
// //
real p = 1.0_F - 0.5_F * bref * bref * mu * mu * r * r * expf * expf; real p = 1.0 - 0.5 * bref * bref * mu * mu * r * r * expf * expf;
real qdotq = w[1] * w[1] + w[2] * w[2]; real qdotq = w[eU] * w[eU] + w[eV] * w[eV];
real BdotB = w[4] * w[4] + w[5] * w[5]; real BdotB = w[eBx] * w[eBx] + w[eBy] * w[eBy];
// //
w[eE] = p / (gam - 1.0_F) + 0.5_F * (qdotq / w[0] + BdotB); w[eE] = p / (gam - 1.0) + 0.5 * (qdotq / w[eRho] + BdotB);
w[eW] = 0.;
w[eBz] = 0.;
w[eW] = 0._F;
w[eBz] = 0._F;
} }
// initial condition on the macro data // initial condition on the macro data
__kernel void init_smooth_vortex(__global real *fn) { __kernel void init_sol(__global real *fn) {
int id = get_global_id(0); int id = get_global_id(0);
...@@ -268,8 +244,10 @@ __kernel void init_smooth_vortex(__global real *fn) { ...@@ -268,8 +244,10 @@ __kernel void init_smooth_vortex(__global real *fn) {
real t = 0; real t = 0;
real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
//exact_sol(xy, t, wnow);
exact_smooth_vortex(xy, t, wnow); exact_smooth_vortex(xy, t, wnow);
real fnow[_N]; real fnow[_N];
w2f(wnow, fnow); w2f(wnow, fnow);
...@@ -282,6 +260,9 @@ __kernel void init_smooth_vortex(__global real *fn) { ...@@ -282,6 +260,9 @@ __kernel void init_smooth_vortex(__global real *fn) {
} }
} }
// one time step of the LBM scheme // one time step of the LBM scheme
__kernel void time_step(__global const real *fn, __global real *fnp1) { __kernel void time_step(__global const real *fn, __global real *fnp1) {
...@@ -302,25 +283,25 @@ __kernel void time_step(__global const real *fn, __global real *fnp1) { ...@@ -302,25 +283,25 @@ __kernel void time_step(__global const real *fn, __global real *fnp1) {
for (int iv = 0; iv < _M; iv++) { for (int iv = 0; iv < _M; iv++) {
int ik = d * _M + iv; int ik = d * _M + iv;
int imem = iR + jR * _NX + ik * ngrid; int imem = iR + jR * _NX + ik * ngrid;
#ifdef dirichlet_updown // #ifdef dirichlet_updown
// dirichlet condition on up and down borders // // dirichlet condition on up and down borders
// (values of border cells are unchanged) // // (values of border cells are unchanged)
if ((j == 0) || (j == _NY - 1)) { // if ((j == 0) || (j == _NY - 1)) {
imem = i + j * _NX + ik * ngrid; // imem = i + j * _NX + ik * ngrid;
} // }
#elif defined dirichlet_leftright // #elif defined dirichlet_leftright
// dirichlet condition on left and right borders // // dirichlet condition on left and right borders
// (values of border cells are unchanged) // // (values of border cells are unchanged)
if ((i == 0) || (i == _NX - 1)) { // if ((i == 0) || (i == _NX - 1)) {
imem = i + j * _NX + ik * ngrid; // imem = i + j * _NX + ik * ngrid;
} // }
#elif defined dirichlet // #elif defined dirichlet
// dirichlet condition on all borders // // dirichlet condition on all borders
// (values of border cells are unchanged) // // (values of border cells are unchanged)
if ((i == 0) || (i == _NX - 1) || (j == 0) || (j == _NY - 1)) { // if ((i == 0) || (i == _NX - 1) || (j == 0) || (j == _NY - 1)) {
imem = i + j * _NX + ik * ngrid; // imem = i + j * _NX + ik * ngrid;
} // }
#endif // #endif
fnow[ik] = fn[imem]; fnow[ik] = fn[imem];
} }
} }
...@@ -332,17 +313,18 @@ __kernel void time_step(__global const real *fn, __global real *fnp1) { ...@@ -332,17 +313,18 @@ __kernel void time_step(__global const real *fn, __global real *fnp1) {
// first order relaxation // first order relaxation
w2f(wnow + 0, fnext + 0); w2f(wnow + 0, fnext + 0);
real om = 2;
// second order relaxation // second order relaxation
for (int iv = 0; iv < _M; iv++) { for (int iv = 0; iv < _M; iv++) {
for (int d = 0; d < 4; d++) { for (int d = 0; d < 4; d++) {
int ik = d * _M + iv; int ik = d * _M + iv;
// if (iv == ePsi) { // if (iv == ePsi) {
// fnext[ik] = 1.0_F * fnext[ik]; // fnext[ik] = 1.0 * fnext[ik];
fnext[ik] = 1.9_F * fnext[ik] - 0.9_F * fnow[ik]; fnext[ik] = om * fnext[ik] - (om - 1) * fnow[ik];
//} else { //} else {
// fnext[ik] = 1.9_F * fnext[ik] - 0.9_F * fnow[ik]; // fnext[ik] = 1.9 * fnext[ik] - 0.9 * fnow[ik];
// fnext[ik] = 2.0_F * fnext[ik] - 1.0_F * fnow[ik]; // fnext[ik] = 2.0 * fnext[ik] - 1.0 * fnow[ik];
// fnext[ik] = 1.0_F * fnext[ik] - 0.0_F * fnow[ik]; // fnext[ik] = 1.0 * fnext[ik] - 0.0 * fnow[ik];
//} //}
} }
} }
...@@ -410,8 +392,8 @@ __kernel void kinetic(const real t, __global real *fn, __global real *kin) { ...@@ -410,8 +392,8 @@ __kernel void kinetic(const real t, __global real *fn, __global real *kin) {
real floc[_N]; real floc[_N];
// save // save
int imem = i + j * _NX; int imemk = i + j * _NX;
kin[imem] = 0.0_F; kin[imemk] = 0;
for (int ik = 0; ik < _N; ik++) { for (int ik = 0; ik < _N; ik++) {
int imem = i + j * _NX + ik * ngrid; int imem = i + j * _NX + ik * ngrid;
...@@ -423,8 +405,9 @@ __kernel void kinetic(const real t, __global real *fn, __global real *kin) { ...@@ -423,8 +405,9 @@ __kernel void kinetic(const real t, __global real *fn, __global real *kin) {
real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
real wnow[_M]; real wnow[_M];
exact_smooth_vortex(xy, t, wnow); exact_smooth_vortex(xy, t, wnow);
real kinloc = (wn[eU]-wnow[eU]) * (wn[eU]-wnow[eU]); //real kinloc = (wn[eU]-wnow[eU]) * (wn[eU]-wnow[eU]);
real kinloc = fabs(wn[eU]-wnow[eU]);
//kinloc += wn[eV] * wn[eV]; //kinloc += wn[eV] * wn[eV];
//kinloc *= 0.5 / wn[eRho]; //kinloc *= 0.5 / wn[eRho];
kin[imem] += kinloc * _DX * _DY; kin[imemk] += kinloc * _DX * _DY;
} }
...@@ -35,17 +35,21 @@ _ivplot = 1 ...@@ -35,17 +35,21 @@ _ivplot = 1
_minplot = -0.05 _minplot = -0.05
_maxplot = 0.45 _maxplot = 0.45
# _minplot = -3
# _maxplot = 3
# grid size # grid size
_nx = 256 _nx = 256
_ny = 256 _ny = 256
_Lx = 10 # 12 #6 #1 _Lx = 20 # 12 #6 #1
_Ly = 10 # 12 #6 #1 _Ly = 20 # 12 #6 #1
# transport velocity # transport velocity
vel = np.array([1., 1.]) vel = np.array([1., 1.])
_Tmax = 5 _Tmax = 10
def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax, def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
...@@ -56,7 +60,7 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax, ...@@ -56,7 +60,7 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
dy = Ly / ny dy = Ly / ny
# lattice speed # lattice speed
vmax = 21. vmax = 20
# time stepping # time stepping
cfl = 1 cfl = 1
...@@ -101,33 +105,21 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax, ...@@ -101,33 +105,21 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
ctx = cl.create_some_context() ctx = cl.create_some_context()
mf = cl.mem_flags mf = cl.mem_flags
# compile OpenCL C program #opt = "-cl-denorms-are-zero -cl-fast-relaxed-math"
prg = cl.Program(ctx, source).build(options=["-I .", opt = ""
f"-D real={prec[precision]}"])
if precision == "single":
# create OpenCL buffers opt = opt + " -cl-single-precision-constant"
buffer_size = 4 * m * nx * ny * np.dtype(np_real).itemsize
kinetic_buffer_size = ncprx * ncpry * np.dtype(np_real).itemsize
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
kinetic_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=kinetic_buffer_size)
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
#divb_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
if (nx > ncpr): print(opt)
fn_cpr_gpu = cl.Buffer(ctx, mf.READ_WRITE, # compile OpenCL C program
size=(4 * m * ncprx * ncpry * np.dtype(np_real).itemsize)) prg = cl.Program(ctx, source).build(options= opt)
# create a queue (for submitting opencl operations) # create a queue (for submitting opencl operations)
queue = cl.CommandQueue( queue = cl.CommandQueue(
ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
# init data
event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fn_gpu)
event.wait()
event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fnp1_gpu)
event.wait()
queue.finish()
# number of animation frames # number of animation frames
nbplots = 10 # 120 nbplots = 10 # 120
...@@ -141,7 +133,28 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax, ...@@ -141,7 +133,28 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
elapsed = 0. elapsed = 0.
fn_cpu = np.zeros((4 * m * ncprx * ncpry, ), dtype=np_real) fn_cpu = np.zeros((4 * m * ncprx * ncpry, ), dtype=np_real)
fnp1_cpu = np.zeros((4 * m * ncprx * ncpry, ), dtype=np_real)
kinetic_cpu = np.zeros((1 * ncprx * ncpry, ), dtype=np_real) kinetic_cpu = np.zeros((1 * ncprx * ncpry, ), dtype=np_real)
# create OpenCL buffers
#buffer_size = 4 * m * nx * ny * np.dtype(np_real).itemsize
#kinetic_buffer_size = ncprx * ncpry * np.dtype(np_real).itemsize
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf = fn_cpu)
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf = fnp1_cpu)
kinetic_gpu = cl.Buffer(ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf = kinetic_cpu)
#fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
#divb_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=buffer_size)
# init data
event = prg.init_sol(queue, (nx * ny, ), None, fn_gpu)
event.wait()
#event = prg.init_smooth_vortex(queue, (nx * ny, ), None, fnp1_gpu)
#event.wait()
#queue.finish()
#divb_cpu = np.zeros((1 * nx * ny, ), dtype=np_real) #divb_cpu = np.zeros((1 * nx * ny, ), dtype=np_real)
if True: if True:
...@@ -174,7 +187,8 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax, ...@@ -174,7 +187,8 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax,
if np.isnan(np.sum(kinetic_cpu)): if np.isnan(np.sum(kinetic_cpu)):
exit(f"Nan in kinetic_cpu at ite {ite}") exit(f"Nan in kinetic_cpu at ite {ite}")
else: else:
print("L2 norm =", np.sqrt(np.sum(kinetic_cpu)/Lx/Ly)) #print("t=",t," L2 norm =", np.sqrt(np.sum(kinetic_cpu)/Lx/Ly))
print("t=",t," L1 norm =", np.sum(kinetic_cpu))
fig.update(x, y, np.sum( fig.update(x, y, np.sum(
wplot[:, _ivplot, :, :], axis=0), suptitle=ite_title, cb=ite == 0) wplot[:, _ivplot, :, :], axis=0), suptitle=ite_title, cb=ite == 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment