Commit e3962aa7 authored by ph's avatar ph

orsag tang ok

parent 1dccb9bd
...@@ -18,23 +18,21 @@ import time ...@@ -18,23 +18,21 @@ import time
##################" definition of default values ##################" definition of default values
# number of conservative variables # number of conservative variables
_m = 2 _m = 9
# number of kinetic variables # number of kinetic variables
_n = 4 * _m _n = 4 * _m
_ivplot = 1 _ivplot = 0
# grid size # grid size
_nx = 64 _nx = 256
_ny = 64 _ny = 256
Lx = 2 * 3.14159265358979323846264338328
Ly = 2 * 3.14159265358979323846264338328
Lx = 1 Lx = 1
Ly = 1 Ly = 1
_dx = Lx / _nx _dx = Lx / _nx
_dy = Ly / _ny _dy = Ly / _ny
...@@ -42,12 +40,12 @@ _dy = Ly / _ny ...@@ -42,12 +40,12 @@ _dy = Ly / _ny
vel = np.array([1., 1.]) vel = np.array([1., 1.])
# lattice speed # lattice speed
_vmax = 7. _vmax = 20.
# time stepping # time stepping
_Tmax = 10. / _vmax _Tmax = 10. / _vmax
#_Tmax = 0. _Tmax = 2.
cfl = 1 cfl = 1
_dt = cfl * _dx / _vmax _dt = cfl * _dx / _vmax
...@@ -61,20 +59,24 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -61,20 +59,24 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
dt = _dt, dt = _dt,
animate = False): animate = False):
ff = "_F"
# load and adjust C program # load and adjust C program
source = open("lbm_kernels.cl", "r").read() source = open("lbm_kernels.cl", "r").read()
source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+"f)") source = source.replace("_dx_", "("+str(dx)+ ff + ")")
source = source.replace("_dy_", "("+str(dy)+"f)") source = source.replace("_dy_", "("+str(dy)+ ff + ")")
source = source.replace("_dt_", "("+str(dt)+"f)") source = source.replace("_dt_", "("+str(dt)+ ff + ")")
source = source.replace("_m_", "("+str(m)+")") source = source.replace("_m_", "("+str(m)+")")
source = source.replace("_n_", "("+str(n)+")") source = source.replace("_n_", "("+str(n)+")")
source = source.replace("_vx_", "("+str(vel[0])+"f)") source = source.replace("_vx_", "("+str(vel[0])+ ff + ")")
source = source.replace("_vy_", "("+str(vel[1])+"f)") source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
source = source.replace("_lambda_", "("+str(vmax)+"f)") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
#print(source) source = source.replace("_F", "")
print(source)
#exit(0) #exit(0)
...@@ -88,9 +90,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -88,9 +90,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
# create OpenCL buffers # create OpenCL buffers
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, fn_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * nx * ny * np.dtype('float32').itemsize)) size=(4 * m * nx * ny * np.dtype('float64').itemsize))
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * nx * ny * np.dtype('float32').itemsize)) size=(4 * m * nx * ny * np.dtype('float64').itemsize))
# create a queue (for submitting opencl operations) # create a queue (for submitting opencl operations)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
...@@ -109,7 +111,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ...@@ -109,7 +111,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
t = 0 t = 0
iter = 0 iter = 0
elapsed = 0.; elapsed = 0.;
fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32) fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64)
print("start OpenCL computations...") print("start OpenCL computations...")
while t < Tmax: while t < Tmax:
......
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define real double
#define _NX _nx_ #define _NX _nx_
#define _NY _ny_ #define _NY _ny_
#define _DX _dx_ #define _DX _dx_
...@@ -10,65 +15,66 @@ ...@@ -10,65 +15,66 @@
#define _LAMBDA _lambda_ #define _LAMBDA _lambda_
#define M_PI (3.14159265358979323846264338328_F)
#define _VOL (_DX * _DY) #define _VOL (_DX * _DY)
__constant int dir[4][2] = { {-1, 0}, {1, 0}, __constant int dir[4][2] = { {-1, 0}, {1, 0},
{0, -1}, {0, 1}}; {0, -1}, {0, 1}};
__constant float ds[4] = { _DY, _DY, _DX, _DX }; __constant real ds[4] = { _DY, _DY, _DX, _DX };
// physical flux of the hyperbolic system // physical flux of the hyperbolic system
void flux_phy(const float *w, const float* vnorm, float* flux){ /* void flux_phy(const real *w, const real* vnorm, real* flux){ */
float vdotn = _VX * vnorm[0] + _VY * vnorm[1]; /* real vdotn = _VX * vnorm[0] + _VY * vnorm[1]; */
int signe = 1; /* int signe = 1; */
for(int iv = 0; iv < _M; iv++){ /* for(int iv = 0; iv < _M; iv++){ */
flux[iv] = vdotn * w[iv] * signe; /* flux[iv] = vdotn * w[iv] * signe; */
signe = -signe; /* signe = -signe; */
} /* } */
} /* } */
/* void flux_phy(const float *W, const float *vn, float *flux) { */ void flux_phy(const real *W, const real *vn, real *flux) {
/* float gam = 1.6666666666f; */ real gam = 1.6666666666_F;
/* float un = W[1]/W[0]*vn[0]+W[3]/W[0]*vn[1]+W[4]/W[0]*vn[2]; */ real un = (W[1] * vn[0] + W[3] * vn[1] + W[4] * vn[2]) / W[0];
/* float bn = W[7]*vn[0]+W[5]*vn[1]+W[6]*vn[2]; */ real bn = W[7] * vn[0] + W[5] * vn[1] + W[6] * vn[2];
/* float p = (gam-1)*(W[2] - W[0]*(W[1]/W[0]*W[1]/W[0] */ real p = (gam - 1) * (W[2]
/* + W[3]/W[0]*W[3]/W[0] */ - (W[1] * W[1] + W[3] * W[3]
/* + W[4]/W[0]*W[4]/W[0])/2 */ + W[4] * W[4]) / 2 / W[0]
/* - (W[7]*W[7]+W[5]*W[5]+W[6]*W[6])/2); */ - (W[7] * W[7] + W[5] * W[5] + W[6] * W[6]) /2);
/* flux[0] = W[0]*un; */ flux[0] = W[0] * un;
/* flux[1] = W[0]*un*W[1]/W[0] + */ flux[1] = un*W[1] +
/* (p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2) */ (p + (W[7] * W[7] + W[5] * W[5] + W[6] * W[6]) / 2)
/* *vn[0] - bn*W[7]; */ * vn[0] - bn * W[7];
/* flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un */ flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un
/* - (W[7]*W[1]/W[0] + W[5]*W[3]/W[0] + W[6]*W[4]/W[0])*bn; */ - (W[7]*W[1] + W[5]*W[3] + W[6]*W[4])*bn/W[0];
/* flux[3] = W[0]*un*W[3]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */ flux[3] = un*W[3] + (p + (W[7]*W[7] + W[5]*W[5]
/* + W[6]*W[6])/2)*vn[1] - bn*W[5]; */ + W[6]*W[6])/2)*vn[1] - bn*W[5];
/* flux[4] = W[0]*un*W[4]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */ flux[4] = un*W[4] + (p + (W[7]*W[7] + W[5]*W[5]
/* + W[6]*W[6])/2)*vn[2] - bn*W[6]; */ + W[6]*W[6])/2)*vn[2] - bn*W[6];
/* flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1]; */ flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1];
/* flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2]; */ flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2];
/* flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0]; */ flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0];
/* flux[8] = 6*6*bn; */ flux[8] = 6*6*bn;
/* } */ }
// equilibrium "maxwellian" from macro data w // equilibrium "maxwellian" from macro data w
void w2f(const float *w, float *f){ void w2f(const real *w, real *f){
for(int d = 0; d < 4; d++){ for(int d = 0; d < 4; d++){
float flux[_M]; real flux[_M];
float vnorm[2] = {dir[d][0], dir[d][1]}; real vnorm[2] = {dir[d][0], dir[d][1]};
flux_phy(w, vnorm, flux); flux_phy(w, vnorm, flux);
for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){
f[d * _M + iv] = w[iv] / 4 + flux[iv] / 2 / _LAMBDA; f[d * _M + iv] = w[iv] / 4 + flux[iv] / 2 / _LAMBDA;
...@@ -78,7 +84,7 @@ void w2f(const float *w, float *f){ ...@@ -78,7 +84,7 @@ void w2f(const float *w, float *f){
// macro data w from micro data f // macro data w from micro data f
void f2w(const float *f, float *w){ void f2w(const real *f, real *w){
for(int iv = 0; iv < _M; iv++) w[iv] = 0; for(int iv = 0; iv < _M; iv++) w[iv] = 0;
for(int d = 0; d < 4; d++){ for(int d = 0; d < 4; d++){
for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){
...@@ -90,56 +96,56 @@ void f2w(const float *f, float *w){ ...@@ -90,56 +96,56 @@ void f2w(const float *f, float *w){
// exact macro data w // exact macro data w
// transport // transport
void exact_sol(float* xy, float t, float* w){ /* void exact_sol(real* xy, real t, real* w){ */
int signe = 1; /* int signe = 1; */
for(int iv = 0; iv < _M; iv++){ /* for(int iv = 0; iv < _M; iv++){ */
float x = xy[0] - t * _VX * signe - 0.5f; /* real x = xy[0] - t * _VX * signe - 0.5_F; */
float y = xy[1] - t * _VY * signe - 0.5f; /* real y = xy[1] - t * _VY * signe - 0.5_F; */
float d2 = x * x + y *y; /* real d2 = x * x + y *y; */
w[iv] = exp(-30*d2); /* w[iv] = exp(-30*d2); */
signe = -signe; /* signe = -signe; */
} /* } */
} /* } */
// mhd // mhd
/* void conservatives(float *y, float *w) { */ void conservatives(real *y, real *w) {
/* float gam = 1.6666666666f; */ real gam = 1.6666666666_F;
/* w[0] = y[0]; */ w[0] = y[0]; // rho
/* w[1] = y[0]*y[1]; */ w[1] = y[0]*y[1]; // rho u
/* w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2 */ w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2
/* + (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; */ + (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; // rho E
/* w[3] = y[0]*y[3]; */ w[3] = y[0]*y[3]; // rho v
/* w[4] = y[0]*y[4]; */ w[4] = y[0]*y[4]; // rho w
/* w[5] = y[5]; */ w[5] = y[5]; // By
/* w[6] = y[6]; */ w[6] = y[6]; // Bz
/* w[7] = y[7]; // Bx */ w[7] = y[7]; // Bx
/* w[8] = y[8]; // psi */ w[8] = y[8]; // psi
/* } */ }
/* void exact_sol(float* x, float t, float* w){ */ void exact_sol(real* x, real t, real* w){
/* float gam = 1.6666666666f; */ real gam = 1.6666666666_F;
/* float yL[9]; */ real yL[9];
/* yL[0] = gam*gam; */ yL[0] = gam*gam; //rho
/* yL[1] = -sin(x[1]); */ yL[1] = -sin(2 * M_PI * x[1]); // u
/* yL[2] = gam; */ yL[2] = gam; // p
/* yL[3] = sin(x[0]); */ yL[3] = sin(2 * M_PI * x[0]); // v
/* yL[4] = 0; */ yL[4] = 0; // w
/* yL[5] = sin(2*(x[0])); */ yL[5] = sin(4 * M_PI * x[0]); // By
/* yL[6] = 0; */ yL[6] = 0; // Bz
/* yL[7] = -sin(x[1]); */ yL[7] = -sin(2 * M_PI * x[1]); // Bx
/* yL[8] = 0; */ yL[8] = 0; // psi
/* conservatives(yL, w); */ conservatives(yL, w);
/* } */ }
// initial condition on the macro data // initial condition on the macro data
__kernel void init_sol(__global float *fn){ __kernel void init_sol(__global real *fn){
int id = get_global_id(0); int id = get_global_id(0);
...@@ -148,14 +154,14 @@ __kernel void init_sol(__global float *fn){ ...@@ -148,14 +154,14 @@ __kernel void init_sol(__global float *fn){
int ngrid = _NX * _NY; int ngrid = _NX * _NY;
float wnow[_M]; real wnow[_M];
float t = 0; real t = 0;
float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
exact_sol(xy, t, wnow); exact_sol(xy, t, wnow);
float fnow[_N]; real fnow[_N];
w2f(wnow, fnow); w2f(wnow, fnow);
//printf("x=%f, y=%f \n",xy[0],xy[1]); //printf("x=%f, y=%f \n",xy[0],xy[1]);
...@@ -171,7 +177,7 @@ __kernel void init_sol(__global float *fn){ ...@@ -171,7 +177,7 @@ __kernel void init_sol(__global float *fn){
// one time step of the LBM scheme // one time step of the LBM scheme
__kernel void time_step(__global float *fn, __global float *fnp1){ __kernel void time_step(__global real *fn, __global real *fnp1){
int id = get_global_id(0); int id = get_global_id(0);
...@@ -181,7 +187,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ ...@@ -181,7 +187,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){
int ngrid = _NX * _NY; int ngrid = _NX * _NY;
float fnow[_N]; real fnow[_N];
// periodic shift in each direction // periodic shift in each direction
for(int d = 0; d < 4; d++){ for(int d = 0; d < 4; d++){
...@@ -195,16 +201,16 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ ...@@ -195,16 +201,16 @@ __kernel void time_step(__global float *fn, __global float *fnp1){
} }
} }
float wnow[_M]; real wnow[_M];
f2w(fnow, wnow); f2w(fnow, wnow);
float fnext[_N]; real fnext[_N];
// first order relaxation // first order relaxation
w2f(wnow, fnext); w2f(wnow, fnext);
// second order relaxation // second order relaxation
for(int ik = 0; ik < _N; ik++) for(int ik = 0; ik < _N; ik++)
fnext[ik] = 2 * fnext[ik] - fnow[ik]; fnext[ik] = 1.9_F * fnext[ik] - 0.9_F * fnow[ik];
// save // save
for(int ik = 0; ik < _N; ik++){ for(int ik = 0; ik < _N; ik++){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment