Commit e3962aa7 by ph

### orsag tang ok

parent 1dccb9bd
 ... @@ -18,23 +18,21 @@ import time ... @@ -18,23 +18,21 @@ import time ##################" definition of default values ##################" definition of default values # number of conservative variables # number of conservative variables _m = 2 _m = 9 # number of kinetic variables # number of kinetic variables _n = 4 * _m _n = 4 * _m _ivplot = 1 _ivplot = 0 # grid size # grid size _nx = 64 _nx = 256 _ny = 64 _ny = 256 Lx = 2 * 3.14159265358979323846264338328 Ly = 2 * 3.14159265358979323846264338328 Lx = 1 Lx = 1 Ly = 1 Ly = 1 _dx = Lx / _nx _dx = Lx / _nx _dy = Ly / _ny _dy = Ly / _ny ... @@ -42,12 +40,12 @@ _dy = Ly / _ny ... @@ -42,12 +40,12 @@ _dy = Ly / _ny vel = np.array([1., 1.]) vel = np.array([1., 1.]) # lattice speed # lattice speed _vmax = 7. _vmax = 20. # time stepping # time stepping _Tmax = 10. / _vmax _Tmax = 10. / _vmax #_Tmax = 0. _Tmax = 2. cfl = 1 cfl = 1 _dt = cfl * _dx / _vmax _dt = cfl * _dx / _vmax ... @@ -61,20 +59,24 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -61,20 +59,24 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, dt = _dt, dt = _dt, animate = False): animate = False): ff = "_F" # load and adjust C program # load and adjust C program source = open("lbm_kernels.cl", "r").read() source = open("lbm_kernels.cl", "r").read() source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_dx_", "("+str(dx)+"f)") source = source.replace("_dx_", "("+str(dx)+ ff + ")") source = source.replace("_dy_", "("+str(dy)+"f)") source = source.replace("_dy_", "("+str(dy)+ ff + ")") source = source.replace("_dt_", "("+str(dt)+"f)") source = source.replace("_dt_", "("+str(dt)+ ff + ")") source = source.replace("_m_", "("+str(m)+")") source = source.replace("_m_", "("+str(m)+")") source = source.replace("_n_", "("+str(n)+")") source = source.replace("_n_", "("+str(n)+")") source = source.replace("_vx_", "("+str(vel[0])+"f)") source = source.replace("_vx_", "("+str(vel[0])+ ff + ")") source = source.replace("_vy_", "("+str(vel[1])+"f)") source = source.replace("_vy_", "("+str(vel[1])+ ff + ")") source = source.replace("_lambda_", "("+str(vmax)+"f)") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") #print(source) source = source.replace("_F", "") print(source) #exit(0) #exit(0) ... @@ -88,9 +90,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -88,9 +90,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, # create OpenCL buffers # create OpenCL buffers fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float32').itemsize)) size=(4 * m * nx * ny * np.dtype('float64').itemsize)) fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float32').itemsize)) size=(4 * m * nx * ny * np.dtype('float64').itemsize)) # create a queue (for submitting opencl operations) # create a queue (for submitting opencl operations) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) ... @@ -109,7 +111,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, ... @@ -109,7 +111,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, t = 0 t = 0 iter = 0 iter = 0 elapsed = 0.; elapsed = 0.; fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32) fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64) print("start OpenCL computations...") print("start OpenCL computations...") while t < Tmax: while t < Tmax: ... ...
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define real double #define _NX _nx_ #define _NX _nx_ #define _NY _ny_ #define _NY _ny_ #define _DX _dx_ #define _DX _dx_ ... @@ -10,65 +15,66 @@ ... @@ -10,65 +15,66 @@ #define _LAMBDA _lambda_ #define _LAMBDA _lambda_ #define M_PI (3.14159265358979323846264338328_F) #define _VOL (_DX * _DY) #define _VOL (_DX * _DY) __constant int dir[4][2] = { {-1, 0}, {1, 0}, __constant int dir[4][2] = { {-1, 0}, {1, 0}, {0, -1}, {0, 1}}; {0, -1}, {0, 1}}; __constant float ds[4] = { _DY, _DY, _DX, _DX }; __constant real ds[4] = { _DY, _DY, _DX, _DX }; // physical flux of the hyperbolic system // physical flux of the hyperbolic system void flux_phy(const float *w, const float* vnorm, float* flux){ /* void flux_phy(const real *w, const real* vnorm, real* flux){ */ float vdotn = _VX * vnorm[0] + _VY * vnorm[1]; /* real vdotn = _VX * vnorm[0] + _VY * vnorm[1]; */ int signe = 1; /* int signe = 1; */ for(int iv = 0; iv < _M; iv++){ /* for(int iv = 0; iv < _M; iv++){ */ flux[iv] = vdotn * w[iv] * signe; /* flux[iv] = vdotn * w[iv] * signe; */ signe = -signe; /* signe = -signe; */ } /* } */ } /* } */ /* void flux_phy(const float *W, const float *vn, float *flux) { */ void flux_phy(const real *W, const real *vn, real *flux) { /* float gam = 1.6666666666f; */ real gam = 1.6666666666_F; /* float un = W[1]/W[0]*vn[0]+W[3]/W[0]*vn[1]+W[4]/W[0]*vn[2]; */ real un = (W[1] * vn[0] + W[3] * vn[1] + W[4] * vn[2]) / W[0]; /* float bn = W[7]*vn[0]+W[5]*vn[1]+W[6]*vn[2]; */ real bn = W[7] * vn[0] + W[5] * vn[1] + W[6] * vn[2]; /* float p = (gam-1)*(W[2] - W[0]*(W[1]/W[0]*W[1]/W[0] */ real p = (gam - 1) * (W[2] /* + W[3]/W[0]*W[3]/W[0] */ - (W[1] * W[1] + W[3] * W[3] /* + W[4]/W[0]*W[4]/W[0])/2 */ + W[4] * W[4]) / 2 / W[0] /* - (W[7]*W[7]+W[5]*W[5]+W[6]*W[6])/2); */ - (W[7] * W[7] + W[5] * W[5] + W[6] * W[6]) /2); /* flux[0] = W[0]*un; */ flux[0] = W[0] * un; /* flux[1] = W[0]*un*W[1]/W[0] + */ flux[1] = un*W[1] + /* (p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2) */ (p + (W[7] * W[7] + W[5] * W[5] + W[6] * W[6]) / 2) /* *vn[0] - bn*W[7]; */ * vn[0] - bn * W[7]; /* flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un */ flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un /* - (W[7]*W[1]/W[0] + W[5]*W[3]/W[0] + W[6]*W[4]/W[0])*bn; */ - (W[7]*W[1] + W[5]*W[3] + W[6]*W[4])*bn/W[0]; /* flux[3] = W[0]*un*W[3]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */ flux[3] = un*W[3] + (p + (W[7]*W[7] + W[5]*W[5] /* + W[6]*W[6])/2)*vn[1] - bn*W[5]; */ + W[6]*W[6])/2)*vn[1] - bn*W[5]; /* flux[4] = W[0]*un*W[4]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */ flux[4] = un*W[4] + (p + (W[7]*W[7] + W[5]*W[5] /* + W[6]*W[6])/2)*vn[2] - bn*W[6]; */ + W[6]*W[6])/2)*vn[2] - bn*W[6]; /* flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1]; */ flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1]; /* flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2]; */ flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2]; /* flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0]; */ flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0]; /* flux[8] = 6*6*bn; */ flux[8] = 6*6*bn; /* } */ } // equilibrium "maxwellian" from macro data w // equilibrium "maxwellian" from macro data w void w2f(const float *w, float *f){ void w2f(const real *w, real *f){ for(int d = 0; d < 4; d++){ for(int d = 0; d < 4; d++){ float flux[_M]; real flux[_M]; float vnorm[2] = {dir[d][0], dir[d][1]}; real vnorm[2] = {dir[d][0], dir[d][1]}; flux_phy(w, vnorm, flux); flux_phy(w, vnorm, flux); for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){ f[d * _M + iv] = w[iv] / 4 + flux[iv] / 2 / _LAMBDA; f[d * _M + iv] = w[iv] / 4 + flux[iv] / 2 / _LAMBDA; ... @@ -78,7 +84,7 @@ void w2f(const float *w, float *f){ ... @@ -78,7 +84,7 @@ void w2f(const float *w, float *f){ // macro data w from micro data f // macro data w from micro data f void f2w(const float *f, float *w){ void f2w(const real *f, real *w){ for(int iv = 0; iv < _M; iv++) w[iv] = 0; for(int iv = 0; iv < _M; iv++) w[iv] = 0; for(int d = 0; d < 4; d++){ for(int d = 0; d < 4; d++){ for(int iv = 0; iv < _M; iv++){ for(int iv = 0; iv < _M; iv++){ ... @@ -90,56 +96,56 @@ void f2w(const float *f, float *w){ ... @@ -90,56 +96,56 @@ void f2w(const float *f, float *w){ // exact macro data w // exact macro data w // transport // transport void exact_sol(float* xy, float t, float* w){ /* void exact_sol(real* xy, real t, real* w){ */ int signe = 1; /* int signe = 1; */ for(int iv = 0; iv < _M; iv++){ /* for(int iv = 0; iv < _M; iv++){ */ float x = xy[0] - t * _VX * signe - 0.5f; /* real x = xy[0] - t * _VX * signe - 0.5_F; */ float y = xy[1] - t * _VY * signe - 0.5f; /* real y = xy[1] - t * _VY * signe - 0.5_F; */ float d2 = x * x + y *y; /* real d2 = x * x + y *y; */ w[iv] = exp(-30*d2); /* w[iv] = exp(-30*d2); */ signe = -signe; /* signe = -signe; */ } /* } */ } /* } */ // mhd // mhd /* void conservatives(float *y, float *w) { */ void conservatives(real *y, real *w) { /* float gam = 1.6666666666f; */ real gam = 1.6666666666_F; /* w[0] = y[0]; */ w[0] = y[0]; // rho /* w[1] = y[0]*y[1]; */ w[1] = y[0]*y[1]; // rho u /* w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2 */ w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2 /* + (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; */ + (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; // rho E /* w[3] = y[0]*y[3]; */ w[3] = y[0]*y[3]; // rho v /* w[4] = y[0]*y[4]; */ w[4] = y[0]*y[4]; // rho w /* w[5] = y[5]; */ w[5] = y[5]; // By /* w[6] = y[6]; */ w[6] = y[6]; // Bz /* w[7] = y[7]; // Bx */ w[7] = y[7]; // Bx /* w[8] = y[8]; // psi */ w[8] = y[8]; // psi /* } */ } /* void exact_sol(float* x, float t, float* w){ */ void exact_sol(real* x, real t, real* w){ /* float gam = 1.6666666666f; */ real gam = 1.6666666666_F; /* float yL[9]; */ real yL[9]; /* yL[0] = gam*gam; */ yL[0] = gam*gam; //rho /* yL[1] = -sin(x[1]); */ yL[1] = -sin(2 * M_PI * x[1]); // u /* yL[2] = gam; */ yL[2] = gam; // p /* yL[3] = sin(x[0]); */ yL[3] = sin(2 * M_PI * x[0]); // v /* yL[4] = 0; */ yL[4] = 0; // w /* yL[5] = sin(2*(x[0])); */ yL[5] = sin(4 * M_PI * x[0]); // By /* yL[6] = 0; */ yL[6] = 0; // Bz /* yL[7] = -sin(x[1]); */ yL[7] = -sin(2 * M_PI * x[1]); // Bx /* yL[8] = 0; */ yL[8] = 0; // psi /* conservatives(yL, w); */ conservatives(yL, w); /* } */ } // initial condition on the macro data // initial condition on the macro data __kernel void init_sol(__global float *fn){ __kernel void init_sol(__global real *fn){ int id = get_global_id(0); int id = get_global_id(0); ... @@ -148,14 +154,14 @@ __kernel void init_sol(__global float *fn){ ... @@ -148,14 +154,14 @@ __kernel void init_sol(__global float *fn){ int ngrid = _NX * _NY; int ngrid = _NX * _NY; float wnow[_M]; real wnow[_M]; float t = 0; real t = 0; float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; real xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; exact_sol(xy, t, wnow); exact_sol(xy, t, wnow); float fnow[_N]; real fnow[_N]; w2f(wnow, fnow); w2f(wnow, fnow); //printf("x=%f, y=%f \n",xy[0],xy[1]); //printf("x=%f, y=%f \n",xy[0],xy[1]); ... @@ -171,7 +177,7 @@ __kernel void init_sol(__global float *fn){ ... @@ -171,7 +177,7 @@ __kernel void init_sol(__global float *fn){ // one time step of the LBM scheme // one time step of the LBM scheme __kernel void time_step(__global float *fn, __global float *fnp1){ __kernel void time_step(__global real *fn, __global real *fnp1){ int id = get_global_id(0); int id = get_global_id(0); ... @@ -181,7 +187,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ ... @@ -181,7 +187,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ int ngrid = _NX * _NY; int ngrid = _NX * _NY; float fnow[_N]; real fnow[_N]; // periodic shift in each direction // periodic shift in each direction for(int d = 0; d < 4; d++){ for(int d = 0; d < 4; d++){ ... @@ -195,16 +201,16 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ ... @@ -195,16 +201,16 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ } } } } float wnow[_M]; real wnow[_M]; f2w(fnow, wnow); f2w(fnow, wnow); float fnext[_N]; real fnext[_N]; // first order relaxation // first order relaxation w2f(wnow, fnext); w2f(wnow, fnext); // second order relaxation // second order relaxation for(int ik = 0; ik < _N; ik++) for(int ik = 0; ik < _N; ik++) fnext[ik] = 2 * fnext[ik] - fnow[ik]; fnext[ik] = 1.9_F * fnext[ik] - 0.9_F * fnow[ik]; // save // save for(int ik = 0; ik < _N; ik++){ for(int ik = 0; ik < _N; ik++){ ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!