Commit e3962aa7 authored by ph's avatar ph

orsag tang ok

parent 1dccb9bd
......@@ -18,23 +18,21 @@ import time
##################" definition of default values
# number of conservative variables
_m = 2
_m = 9
# number of kinetic variables
_n = 4 * _m
_ivplot = 1
_ivplot = 0
# grid size
_nx = 64
_ny = 64
Lx = 2 * 3.14159265358979323846264338328
Ly = 2 * 3.14159265358979323846264338328
_nx = 256
_ny = 256
Lx = 1
Ly = 1
_dx = Lx / _nx
_dy = Ly / _ny
......@@ -42,12 +40,12 @@ _dy = Ly / _ny
vel = np.array([1., 1.])
# lattice speed
_vmax = 7.
_vmax = 20.
# time stepping
_Tmax = 10. / _vmax
#_Tmax = 0.
_Tmax = 2.
cfl = 1
_dt = cfl * _dx / _vmax
......@@ -61,20 +59,24 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
dt = _dt,
animate = False):
ff = "_F"
# load and adjust C program
source = open("lbm_kernels.cl", "r").read()
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+"f)")
source = source.replace("_dy_", "("+str(dy)+"f)")
source = source.replace("_dt_", "("+str(dt)+"f)")
source = source.replace("_dx_", "("+str(dx)+ ff + ")")
source = source.replace("_dy_", "("+str(dy)+ ff + ")")
source = source.replace("_dt_", "("+str(dt)+ ff + ")")
source = source.replace("_m_", "("+str(m)+")")
source = source.replace("_n_", "("+str(n)+")")
source = source.replace("_vx_", "("+str(vel[0])+"f)")
source = source.replace("_vy_", "("+str(vel[1])+"f)")
source = source.replace("_lambda_", "("+str(vmax)+"f)")
source = source.replace("_vx_", "("+str(vel[0])+ ff + ")")
source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
#print(source)
source = source.replace("_F", "")
print(source)
#exit(0)
......@@ -88,9 +90,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
# create OpenCL buffers
fn_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * nx * ny * np.dtype('float32').itemsize))
size=(4 * m * nx * ny * np.dtype('float64').itemsize))
fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
size=(4 * m * nx * ny * np.dtype('float32').itemsize))
size=(4 * m * nx * ny * np.dtype('float64').itemsize))
# create a queue (for submitting opencl operations)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
......@@ -109,7 +111,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
t = 0
iter = 0
elapsed = 0.;
fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32)
fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64)
print("start OpenCL computations...")
while t < Tmax:
......
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define real double
#define _NX _nx_
#define _NY _ny_
#define _DX _dx_
......@@ -10,65 +15,66 @@
#define _LAMBDA _lambda_
#define M_PI (3.14159265358979323846264338328_F)
#define _VOL (_DX * _DY)
__constant int dir[4][2] = { {-1, 0}, {1, 0},
{0, -1}, {0, 1}};
__constant float ds[4] = { _DY, _DY, _DX, _DX };
__constant real ds[4] = { _DY, _DY, _DX, _DX };
// physical flux of the hyperbolic system
void flux_phy(const float *w, const float* vnorm, float* flux){
float vdotn = _VX * vnorm[0] + _VY * vnorm[1];
int signe = 1;
for(int iv = 0; iv < _M; iv++){
flux[iv] = vdotn * w[iv] * signe;
signe = -signe;
}
}
/* void flux_phy(const real *w, const real* vnorm, real* flux){ */
/* real vdotn = _VX * vnorm[0] + _VY * vnorm[1]; */
/* int signe = 1; */
/* for(int iv = 0; iv < _M; iv++){ */
/* flux[iv] = vdotn * w[iv] * signe; */
/* signe = -signe; */
/* } */
/* } */
/* void flux_phy(const float *W, const float *vn, float *flux) { */
void flux_phy(const real *W, const real *vn, real *flux) {
/* float gam = 1.6666666666f; */
real gam = 1.6666666666_F;
/* float un = W[1]/W[0]*vn[0]+W[3]/W[0]*vn[1]+W[4]/W[0]*vn[2]; */
/* float bn = W[7]*vn[0]+W[5]*vn[1]+W[6]*vn[2]; */
real un = (W[1] * vn[0] + W[3] * vn[1] + W[4] * vn[2]) / W[0];
real bn = W[7] * vn[0] + W[5] * vn[1] + W[6] * vn[2];
/* float p = (gam-1)*(W[2] - W[0]*(W[1]/W[0]*W[1]/W[0] */
/* + W[3]/W[0]*W[3]/W[0] */
/* + W[4]/W[0]*W[4]/W[0])/2 */
/* - (W[7]*W[7]+W[5]*W[5]+W[6]*W[6])/2); */
real p = (gam - 1) * (W[2]
- (W[1] * W[1] + W[3] * W[3]
+ W[4] * W[4]) / 2 / W[0]
- (W[7] * W[7] + W[5] * W[5] + W[6] * W[6]) /2);
/* flux[0] = W[0]*un; */
/* flux[1] = W[0]*un*W[1]/W[0] + */
/* (p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2) */
/* *vn[0] - bn*W[7]; */
/* flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un */
/* - (W[7]*W[1]/W[0] + W[5]*W[3]/W[0] + W[6]*W[4]/W[0])*bn; */
/* flux[3] = W[0]*un*W[3]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */
/* + W[6]*W[6])/2)*vn[1] - bn*W[5]; */
/* flux[4] = W[0]*un*W[4]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */
/* + W[6]*W[6])/2)*vn[2] - bn*W[6]; */
flux[0] = W[0] * un;
flux[1] = un*W[1] +
(p + (W[7] * W[7] + W[5] * W[5] + W[6] * W[6]) / 2)
* vn[0] - bn * W[7];
flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un
- (W[7]*W[1] + W[5]*W[3] + W[6]*W[4])*bn/W[0];
flux[3] = un*W[3] + (p + (W[7]*W[7] + W[5]*W[5]
+ W[6]*W[6])/2)*vn[1] - bn*W[5];
flux[4] = un*W[4] + (p + (W[7]*W[7] + W[5]*W[5]
+ W[6]*W[6])/2)*vn[2] - bn*W[6];
/* flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1]; */
/* flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2]; */
/* flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0]; */
flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1];
flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2];
flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0];
/* flux[8] = 6*6*bn; */
/* } */
flux[8] = 6*6*bn;
}
// equilibrium "maxwellian" from macro data w
void w2f(const float *w, float *f){
void w2f(const real *w, real *f){
for(int d = 0; d < 4; d++){
float flux[_M];
float vnorm[2] = {dir[d][0], dir[d][1]};
real flux[_M];
real vnorm[2] = {dir[d][0], dir[d][1]};
flux_phy(w, vnorm, flux);
for(int iv = 0; iv < _M; iv++){
f[d * _M + iv] = w[iv] / 4 + flux[iv] / 2 / _LAMBDA;
......@@ -78,7 +84,7 @@ void w2f(const float *w, float *f){
// macro data w from micro data f
void f2w(const float *f, float *w){
void f2w(const real *f, real *w){
for(int iv = 0; iv < _M; iv++) w[iv] = 0;
for(int d = 0; d < 4; d++){
for(int iv = 0; iv < _M; iv++){
......@@ -90,56 +96,56 @@ void f2w(const float *f, float *w){
// exact macro data w
// transport
void exact_sol(float* xy, float t, float* w){
int signe = 1;
for(int iv = 0; iv < _M; iv++){
float x = xy[0] - t * _VX * signe - 0.5f;
float y = xy[1] - t * _VY * signe - 0.5f;
float d2 = x * x + y *y;
w[iv] = exp(-30*d2);
signe = -signe;
}
}
/* void exact_sol(real* xy, real t, real* w){ */
/* int signe = 1; */
/* for(int iv = 0; iv < _M; iv++){ */
/* real x = xy[0] - t * _VX * signe - 0.5_F; */
/* real y = xy[1] - t * _VY * signe - 0.5_F; */
/* real d2 = x * x + y *y; */
/* w[iv] = exp(-30*d2); */
/* signe = -signe; */
/* } */
/* } */
// mhd
/* void conservatives(float *y, float *w) { */
/* float gam = 1.6666666666f; */
/* w[0] = y[0]; */
/* w[1] = y[0]*y[1]; */
/* w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2 */
/* + (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; */
/* w[3] = y[0]*y[3]; */
/* w[4] = y[0]*y[4]; */
/* w[5] = y[5]; */
/* w[6] = y[6]; */
/* w[7] = y[7]; // Bx */
/* w[8] = y[8]; // psi */
/* } */
void conservatives(real *y, real *w) {
real gam = 1.6666666666_F;
w[0] = y[0]; // rho
w[1] = y[0]*y[1]; // rho u
w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2
+ (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; // rho E
w[3] = y[0]*y[3]; // rho v
w[4] = y[0]*y[4]; // rho w
w[5] = y[5]; // By
w[6] = y[6]; // Bz
w[7] = y[7]; // Bx
w[8] = y[8]; // psi
}
/* void exact_sol(float* x, float t, float* w){ */
/* float gam = 1.6666666666f; */
/* float yL[9]; */
void exact_sol(real* x, real t, real* w){
real gam = 1.6666666666_F;
real yL[9];
/* yL[0] = gam*gam; */
/* yL[1] = -sin(x[1]); */
/* yL[2] = gam; */
/* yL[3] = sin(x[0]); */
/* yL[4] = 0; */
/* yL[5] = sin(2*(x[0])); */
/* yL[6] = 0; */
/* yL[7] = -sin(x[1]); */
/* yL[8] = 0; */
yL[0] = gam*gam; //rho
yL[1] = -sin(2 * M_PI * x[1]); // u
yL[2] = gam; // p
yL[3] = sin(2 * M_PI * x[0]); // v
yL[4] = 0; // w
yL[5] = sin(4 * M_PI * x[0]); // By
yL[6] = 0; // Bz
yL[7] = -sin(2 * M_PI * x[1]); // Bx
yL[8] = 0; // psi
/* conservatives(yL, w); */
conservatives(yL, w);
/* } */
}
// initial condition on the macro data
__kernel void init_sol(__global float *fn){
__kernel void init_sol(__global real *fn){
int id = get_global_id(0);
......@@ -148,14 +154,14 @@ __kernel void init_sol(__global float *fn){
int ngrid = _NX * _NY;
float wnow[_M];
real wnow[_M];
float t = 0;
float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
real t = 0;
real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2};
exact_sol(xy, t, wnow);
float fnow[_N];
real fnow[_N];
w2f(wnow, fnow);
//printf("x=%f, y=%f \n",xy[0],xy[1]);
......@@ -171,7 +177,7 @@ __kernel void init_sol(__global float *fn){
// one time step of the LBM scheme
__kernel void time_step(__global float *fn, __global float *fnp1){
__kernel void time_step(__global real *fn, __global real *fnp1){
int id = get_global_id(0);
......@@ -181,7 +187,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){
int ngrid = _NX * _NY;
float fnow[_N];
real fnow[_N];
// periodic shift in each direction
for(int d = 0; d < 4; d++){
......@@ -195,16 +201,16 @@ __kernel void time_step(__global float *fn, __global float *fnp1){
}
}
float wnow[_M];
real wnow[_M];
f2w(fnow, wnow);
float fnext[_N];
real fnext[_N];
// first order relaxation
w2f(wnow, fnext);
// second order relaxation
for(int ik = 0; ik < _N; ik++)
fnext[ik] = 2 * fnext[ik] - fnow[ik];
fnext[ik] = 1.9_F * fnext[ik] - 0.9_F * fnow[ik];
// save
for(int ik = 0; ik < _N; ik++){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment