From 1dccb9bd7e1152da52c48e1535a9f198469c071d Mon Sep 17 00:00:00 2001 From: ph Date: Tue, 5 Dec 2017 09:21:42 +0100 Subject: [PATCH] up --- lbm_cl.py | 35 ++++++++++--------- lbm_kernels.cl | 91 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 101 insertions(+), 25 deletions(-) diff --git a/lbm_cl.py b/lbm_cl.py index 47df369..722fe42 100755 --- a/lbm_cl.py +++ b/lbm_cl.py @@ -18,18 +18,22 @@ import time ##################" definition of default values # number of conservative variables -_m = 1 +_m = 2 # number of kinetic variables _n = 4 * _m +_ivplot = 1 # grid size -_nx = 128 -_ny = 128 +_nx = 64 +_ny = 64 -Lx = 1. -Ly = 1. +Lx = 2 * 3.14159265358979323846264338328 +Ly = 2 * 3.14159265358979323846264338328 + +Lx = 1 +Ly = 1 _dx = Lx / _nx _dy = Ly / _ny @@ -38,7 +42,7 @@ _dy = Ly / _ny vel = np.array([1., 1.]) # lattice speed -_vmax = 3. +_vmax = 7. # time stepping @@ -49,19 +53,12 @@ cfl = 1 _dt = cfl * _dx / _vmax ############# end of default values -def exact_sol(xy, t): - x = xy[0] - t * vel[0] - 0.5 - y = xy[1] - t * vel[1] - 0.5 - d2 = x * x + y *y - w = np.exp(-30*d2) - return w - def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, Tmax = _Tmax, vmax = _vmax, dx = _dx, dy = _dy, - dt = _dt, exact_sol = exact_sol, + dt = _dt, animate = False): # load and adjust C program @@ -128,10 +125,10 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, print("iter=",iter, " t=",t, "elapsed (s)=",elapsed) if iter % iterplot == 0 and animate: cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() - wplot = np.reshape(fn_cpu, (n, nx, ny)) + wplot = np.reshape(fn_cpu, (4, m, nx, ny)) plt.clf() #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1) - plt.imshow(np.sum(wplot, axis = 0)) + plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0)) plt.gca().invert_yaxis() plt.colorbar() plt.pause(0.01) @@ -139,13 +136,15 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, # copy OpenCL data to CPU and return the results cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() - wplot_gpu = np.reshape(fn_cpu,(n, nx, ny)) + wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny)) return wplot_gpu # gpu solve wplot_gpu = solve_ocl(animate = True) +#print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.clf() -plt.imshow(np.sum(wplot_gpu,axis=0), vmin=0, vmax=1) +#plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1) +plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.gca().invert_yaxis() plt.colorbar() plt.show() diff --git a/lbm_kernels.cl b/lbm_kernels.cl index 6073926..76b517b 100644 --- a/lbm_kernels.cl +++ b/lbm_kernels.cl @@ -23,11 +23,47 @@ __constant float ds[4] = { _DY, _DY, _DX, _DX }; // physical flux of the hyperbolic system void flux_phy(const float *w, const float* vnorm, float* flux){ float vdotn = _VX * vnorm[0] + _VY * vnorm[1]; + int signe = 1; for(int iv = 0; iv < _M; iv++){ - flux[iv] = vdotn * w[iv]; + flux[iv] = vdotn * w[iv] * signe; + signe = -signe; } } +/* void flux_phy(const float *W, const float *vn, float *flux) { */ + +/* float gam = 1.6666666666f; */ + +/* float un = W[1]/W[0]*vn[0]+W[3]/W[0]*vn[1]+W[4]/W[0]*vn[2]; */ +/* float bn = W[7]*vn[0]+W[5]*vn[1]+W[6]*vn[2]; */ + +/* float p = (gam-1)*(W[2] - W[0]*(W[1]/W[0]*W[1]/W[0] */ +/* + W[3]/W[0]*W[3]/W[0] */ +/* + W[4]/W[0]*W[4]/W[0])/2 */ +/* - (W[7]*W[7]+W[5]*W[5]+W[6]*W[6])/2); */ + +/* flux[0] = W[0]*un; */ +/* flux[1] = W[0]*un*W[1]/W[0] + */ +/* (p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2) */ +/* *vn[0] - bn*W[7]; */ +/* flux[2] = (W[2] + p + (W[7]*W[7] + W[5]*W[5] + W[6]*W[6])/2)*un */ +/* - (W[7]*W[1]/W[0] + W[5]*W[3]/W[0] + W[6]*W[4]/W[0])*bn; */ +/* flux[3] = W[0]*un*W[3]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */ +/* + W[6]*W[6])/2)*vn[1] - bn*W[5]; */ +/* flux[4] = W[0]*un*W[4]/W[0] + (p + (W[7]*W[7] + W[5]*W[5] */ +/* + W[6]*W[6])/2)*vn[2] - bn*W[6]; */ + +/* flux[5] = -bn*W[3]/W[0] + un*W[5] + W[8]*vn[1]; */ +/* flux[6] = -bn*W[4]/W[0] + un*W[6] + W[8]*vn[2]; */ +/* flux[7] = -bn*W[1]/W[0] + un*W[7] + W[8]*vn[0]; */ + +/* flux[8] = 6*6*bn; */ +/* } */ + + + + + // equilibrium "maxwellian" from macro data w void w2f(const float *w, float *f){ for(int d = 0; d < 4; d++){ @@ -53,14 +89,55 @@ void f2w(const float *f, float *w){ // exact macro data w +// transport void exact_sol(float* xy, float t, float* w){ - float x = xy[0] - t * _VX - 0.5f; - float y = xy[1] - t * _VY - 0.5f; - float d2 = x * x + y *y; - for(int iv = 0; iv < _M; iv++) + int signe = 1; + for(int iv = 0; iv < _M; iv++){ + float x = xy[0] - t * _VX * signe - 0.5f; + float y = xy[1] - t * _VY * signe - 0.5f; + float d2 = x * x + y *y; w[iv] = exp(-30*d2); + signe = -signe; + } } + +// mhd +/* void conservatives(float *y, float *w) { */ +/* float gam = 1.6666666666f; */ + +/* w[0] = y[0]; */ +/* w[1] = y[0]*y[1]; */ +/* w[2] = y[2]/(gam-1) + y[0]*(y[1]*y[1]+y[3]*y[3]+y[4]*y[4])/2 */ +/* + (y[7]*y[7]+y[5]*y[5]+y[6]*y[6])/2; */ +/* w[3] = y[0]*y[3]; */ +/* w[4] = y[0]*y[4]; */ +/* w[5] = y[5]; */ +/* w[6] = y[6]; */ +/* w[7] = y[7]; // Bx */ +/* w[8] = y[8]; // psi */ +/* } */ + + +/* void exact_sol(float* x, float t, float* w){ */ +/* float gam = 1.6666666666f; */ +/* float yL[9]; */ + +/* yL[0] = gam*gam; */ +/* yL[1] = -sin(x[1]); */ +/* yL[2] = gam; */ +/* yL[3] = sin(x[0]); */ +/* yL[4] = 0; */ +/* yL[5] = sin(2*(x[0])); */ +/* yL[6] = 0; */ +/* yL[7] = -sin(x[1]); */ +/* yL[8] = 0; */ + +/* conservatives(yL, w); */ + +/* } */ + + // initial condition on the macro data __kernel void init_sol(__global float *fn){ @@ -77,7 +154,7 @@ __kernel void init_sol(__global float *fn){ float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; exact_sol(xy, t, wnow); - + float fnow[_N]; w2f(wnow, fnow); @@ -127,7 +204,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){ // second order relaxation for(int ik = 0; ik < _N; ik++) - //fnext[ik] = 2 * fnext[ik] - fnow[ik]; + fnext[ik] = 2 * fnext[ik] - fnow[ik]; // save for(int ik = 0; ik < _N; ik++){ -- GitLab