Commit 032043ed by Romane Helie

Merge branch 'master' of gitlab.math.unistra.fr:patapon/patapon

parents 5c11b3d4 52b622f2
 ... ... @@ -15,6 +15,7 @@ //#pragma OPENCL EXTENSION cl_khr_fp64 : enable #include "vortex_kernels.h" #define real _real_ #define _NX _nx_ ... ...
 void flux_phy(const real *W, const real *vn, real *flux);
 ... ... @@ -20,6 +20,7 @@ from utils import Figure, float2str, load_kernel, parse_cl_args, get_ite_title # matplotlib.use('Agg') #from matplotlib import rcParams prec = {'single': 'float', 'double': 'double'} # Default values ... ... @@ -183,14 +184,16 @@ def solve_ocl(m=_m, n=_n, nx=_nx, ny=_ny, Lx=_Lx, Ly=_Ly, Tmax=_Tmax, wplot = np.reshape(fn_cpu, (4, m, ncprx, ncpry)) fig.update(x, y, np.sum( wplot[:, _ivplot, :, :], axis=0), suptitle=ite_title, cb=ite == 0) if np.isnan(np.sum(kinetic_cpu)): exit(f"Nan in kinetic_cpu at ite {ite}") else: #print("t=",t," L2 norm =", np.sqrt(np.sum(kinetic_cpu)/Lx/Ly)) print("t=",t," L1 norm =", np.sum(kinetic_cpu)) fig.update(x, y, np.sum( wplot[:, _ivplot, :, :], axis=0), suptitle=ite_title, cb=ite == 0) else: print(ite_title, end='\r') ... ...
 ... ... @@ -34,7 +34,7 @@ vmax = np.sqrt(gpes * hmax) + umax # time stepping cfl = 0.8 _Tmax = 0.05 _Tmax = 1.0 def solve_ocl(m=_m, nx=_nx, ny=_ny, Tmax=_Tmax, Lx=_Lx, Ly=_Ly, animate=True, ivplot=0, precision="single", **kwargs): ... ... @@ -60,12 +60,13 @@ def solve_ocl(m=_m, nx=_nx, ny=_ny, Tmax=_Tmax, Lx=_Lx, Ly=_Ly, animate=True, iv np_real, source = load_kernel("stvenant_kernels.cl", parameters, precision=precision, print_source=True, module_file=__file__) # OpenCL init ctx = cl.create_some_context() mf = cl.mem_flags # compile OpenCL C program prg = cl.Program(ctx, source).build(options="-cl-fast-relaxed-math") prg = cl.Program(ctx, source).build(options="-cl-fast-relaxed-math -cl-single-precision-constant") dev = ctx.devices[0]; ... ...
 ... ... @@ -5,72 +5,61 @@ #define _DT _dt_ #define _M _m_ #define double float #define _LAMBDA _lambda_ #define _G (9.81_F) #define _VOL (_DX * _DY) __constant int dir[4][2] = { {1, 0}, {-1, 0}, {0, 1}, {0, -1}}; __constant int dir[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}}; __constant float ds[4] = { _DY, _DY, _DX, _DX }; #define double float __constant double ds[4] = {_DY, _DY, _DX, _DX}; void riem_stvenant(double *wL, double *wR, double xi, double *w); void riem_stvenant(double *wL, double *wR, double xi, double *w); double Z(double h1, double h2); double Z(double h1, double h2); double dZ(double h1, double h2); double dZ(double h1, double h2); void flux_riem_2d(double *wL, double *wR, double *vn, double *flux); void fluxphy(double *w, double *n, double *flux) { double h = w[0]; double u = w[1] / h; double v = w[2] / h; void fluxphy(float *w, float* n, float *flux){ float h = w[0]; float u = w[1] / h; float v=w[2] / h; float un = u * n[0] + v * n[1]; double un = u * n[0] + v * n[1]; flux[0] = h * un; flux[1] = h * u * un + _G * h * h / 2 * n[0]; flux[2] = h * v * un + _G * h * h / 2 * n[1]; flux[1] = h * u * un + _G * h * h / 2 * n[0]; flux[2] = h * v * un + _G * h * h / 2 * n[1]; } void fluxnum(float *wL, float *wR, float* vnorm, float* flux){ float fL[_M]; float fR[_M]; void fluxnum(double *wL, double *wR, double *vnorm, double *flux) { double fL[_M]; double fR[_M]; fluxphy(wL, vnorm, fL); fluxphy(wR, vnorm, fR); for(int iv = 0; iv < _M; iv++){ for (int iv = 0; iv < _M; iv++) { flux[iv] = 0.5f * (fL[iv] + fR[iv]) - 0.5f * _LAMBDA * (wR[iv] - wL[iv]); } } __constant double g = 9.81; void flux_riem_2d(double *wL, double *wR, double *vnorm, double *flux){ void flux_riem_2d(double *wL, double *wR, double *vnorm, double *flux) { double qnL = wL[1] * vnorm[0] + wL[2] * vnorm[1]; double qnR = wR[1] * vnorm[0] + wR[2] * vnorm[1]; double qnL = wL[1] * vnorm[0] + wL[2] * vnorm[1]; double qnR = wR[1] * vnorm[0] + wR[2] * vnorm[1]; double qtL = -wL[1] * vnorm[1] + wL[2] * vnorm[0]; double qtR = -wR[1] * vnorm[1] + wR[2] * vnorm[0]; double qtL = -wL[1] * vnorm[1] + wL[2] * vnorm[0]; double qtR = -wR[1] * vnorm[1] + wR[2] * vnorm[0]; double vL[2] = {wL[0], qnL}; double vR[2] = {wR[0], qnR}; ... ... @@ -100,38 +89,30 @@ void flux_riem_2d(double *wL, double *wR, double *vnorm, double *flux){ // puis calcul du flux. fluxphy(w, vnorm, flux); //if (wL[0] != wR[0]) //printf("flux=%f %f %f w=%f %f %f\n",flux[0], flux[1], flux[2], // if (wL[0] != wR[0]) // printf("flux=%f %f %f w=%f %f %f\n",flux[0], flux[1], flux[2], // w[0], w[1], w[2]); } void riem_stvenant(double *wL, double *wR, double xi, double *w){ void riem_stvenant(double *wL, double *wR, double xi, double *w) { double hL = wL[0]; double uL = wL[1]/wL[0]; double uL = wL[1] / wL[0]; double hR = wR[0]; double uR = wR[1]/wR[0]; double uR = wR[1] / wR[0]; double hs = 1e-10; int itermax = 10; for(int it = 0; it < itermax; it++){ double f = uL - (hs - hL) * Z(hs, hL) - uR - (hs - hR) * Z(hs, hR); double df = -(hs - hL) * dZ(hs, hL) - Z(hs, hL) - (hs - hR) * dZ(hs, hR) - Z(hs, hR); for (int it = 0; it < itermax; it++) { double f = uL - (hs - hL) * Z(hs, hL) - uR - (hs - hR) * Z(hs, hR); double df = -(hs - hL) * dZ(hs, hL) - Z(hs, hL) - (hs - hR) * dZ(hs, hR) - Z(hs, hR); double dhs = -f / df; hs += dhs; //printf("it=%d f=%e df=%e hs=%e dhs=%e\n",it,f,df,hs,dhs); // printf("it=%d f=%e df=%e hs=%e dhs=%e\n",it,f,df,hs,dhs); } double us = uL - (hs - hL) * Z(hs, hL); ... ... @@ -139,7 +120,7 @@ void riem_stvenant(double *wL, double v1m, v1p, v2m, v2p; // 1-onde if (hs < hL){ if (hs < hL) { v1m = uL - sqrt(g * hL); v1p = us - sqrt(g * hs); } else { ... ... @@ -151,7 +132,7 @@ void riem_stvenant(double *wL, } // 2 onde if (hs < hR){ if (hs < hR) { v2m = us + sqrt(g * hs); v2p = uR + sqrt(g * hR); } else { ... ... @@ -162,22 +143,22 @@ void riem_stvenant(double *wL, v2p = v2m; } //if (hL != hR) //printf("v=%f %f %f %f\n hs=%f us=%f\n", v1m,v1p,v2m,v2p, hs,us); // if (hL != hR) // printf("v=%f %f %f %f\n hs=%f us=%f\n", v1m,v1p,v2m,v2p, hs,us); if (xi < v1m) { w[0] = wL[0]; w[1] = wL[1]; } else if (xi < v1p){ double u = (uL + 2 * xi + 2 *sqrt(g * hL)) / 3; } else if (xi < v1p) { double u = (uL + 2 * xi + 2 * sqrt(g * hL)) / 3; double h = (u - xi) * (u - xi) / g; w[0] = h; w[1] = h * u; } else if (xi < v2m){ } else if (xi < v2m) { w[0] = hs; w[1] = hs * us; } else if (xi < v2p){ double u = (uR + 2 * xi - 2 *sqrt(g * hR)) / 3; } else if (xi < v2p) { double u = (uR + 2 * xi - 2 * sqrt(g * hR)) / 3; double h = (u - xi) * (u - xi) / g; w[0] = h; w[1] = h * u; ... ... @@ -185,152 +166,171 @@ void riem_stvenant(double *wL, w[0] = wR[0]; w[1] = wR[1]; } } double Heaviside(double x){ double Heaviside(double x) { if (x > 0) return 1; else return 0; } double Dirac(double x){ return 0; } double Dirac(double x) { return 0; } double Z(double hs, double h){ double Z(double hs, double h) { double t0 = 2.0*sqrt(g)/(sqrt(hs)+sqrt(h))*Heaviside(h-hs)+sqrt(2.0)*sqrt(g)*sqrt(h+hs)/sqrt(h*hs)/2.0-sqrt(2.0)*sqrt(g)*sqrt(h+hs)/sqrt(h*hs)*Heaviside(h-hs)/2.0; double t0 = 2.0 * sqrt(g) / (sqrt(hs) + sqrt(h)) * Heaviside(h - hs) + sqrt(2.0) * sqrt(g) * sqrt(h + hs) / sqrt(h * hs) / 2.0 - sqrt(2.0) * sqrt(g) * sqrt(h + hs) / sqrt(h * hs) * Heaviside(h - hs) / 2.0; return t0; return t0; } double dZ(double hs, double h){ double t0 = -sqrt(g)/pow(sqrt(hs)+sqrt(h),2.0f)*Heaviside(h-hs)/sqrt(hs)-2.0*sqrt (g)/(sqrt(hs)+sqrt(h))*Dirac(-h+hs)+sqrt(2.0)*sqrt(g)/sqrt(h+hs)/sqrt(h*hs)/4.0 -sqrt(2.0)*sqrt(g)*sqrt(h+hs)/sqrt(h*h*h*hs*hs*hs)*h/4.0-sqrt(2.0)*sqrt(g)/sqrt (h+hs)/sqrt(h*hs)*Heaviside(h-hs)/4.0+sqrt(2.0)*sqrt(g)*sqrt(h+hs)/sqrt(h*h*h* hs*hs*hs)*Heaviside(h-hs)*h/4.0+sqrt(2.0)*sqrt(g)*sqrt(h+hs)/sqrt(h*hs)*Dirac(- h+hs)/2.0; return t0; double dZ(double hs, double h) { double t0 = -sqrt(g) / pow(sqrt(hs) + sqrt(h), 2.0) * Heaviside(h - hs) / sqrt(hs) - 2.0 * sqrt(g) / (sqrt(hs) + sqrt(h)) * Dirac(-h + hs) + sqrt(2.0) * sqrt(g) / sqrt(h + hs) / sqrt(h * hs) / 4.0 - sqrt(2.0) * sqrt(g) * sqrt(h + hs) / sqrt(h * h * h * hs * hs * hs) * h / 4.0 - sqrt(2.0) * sqrt(g) / sqrt(h + hs) / sqrt(h * hs) * Heaviside(h - hs) / 4.0 + sqrt(2.0) * sqrt(g) * sqrt(h + hs) / sqrt(h * h * h * hs * hs * hs) * Heaviside(h - hs) * h / 4.0 + sqrt(2.0) * sqrt(g) * sqrt(h + hs) / sqrt(h * hs) * Dirac(-h + hs) / 2.0; return t0; } void exact_sol(double *xy, double t, double *w) { double h = 1; double x = xy[0]; // - 0.5f; double y = xy[1]; // - 0.5f; void exact_sol(float* xy, float t, float* w){ float h = 1; float x = xy[0];// - 0.5f; float y = xy[1];// - 0.5f; if (x < 0.75f && x > 0.25f && y < 0.75f && y > 0.25f) h = 2; //if (x * x + y * y < 0.2f * 0.2f) h = 2; if (x < 0.75f && x > 0.25f && y < 0.75f && y > 0.25f) h = 2; // if (x * x + y * y < 0.2f * 0.2f) h = 2; w[0] = h; w[1] = 0; w[2] = 0; } __kernel void init_sol(__global float *wn){ __kernel void init_sol(__global double *wn) { int id = get_global_id(0); int i = id % _NX; int j = id / _NX; int ngrid = _NX * _NY; float wnow[_M]; double wnow[_M]; double t = 0; double xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; float t = 0; float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; exact_sol(xy, t, wnow); //printf("x=%f, y=%f \n",xy[0],xy[1]); // printf("x=%f, y=%f \n",xy[0],xy[1]); // load middle value for(int iv = 0; iv < _M; iv++){ for (int iv = 0; iv < _M; iv++) { int imem = i + j * _NX + iv * ngrid; wn[imem] = wnow[iv]; wn[imem] = wnow[iv]; // boundary values //if (i == 0 || i == _NX - 1 || j == 0 || j == _NY - 1) // if (i == 0 || i == _NX - 1 || j == 0 || j == _NY - 1) // wn[imem] = _WBORD; } } double minmod(double a, double b, double c) { double res = 0.0; if ((a > 0) && (b > 0) && (c > 0)) res = a < b ? (a < c ? a : c) : (b < c ? b : c); else if ((a < 0) && (b < 0) && (c < 0)) res = a > b ? (a > c ? a : c) : (b > c ? b : c); return res; } __kernel void muscl(__global const double *wn, __global double *dxwn, __global double *dywn, __global double *dtwn) { int id = get_global_id(0); int i = id % _NX; int j = id / _NX; int ngrid = _NX * _NY; // slope exists only for internal cells if (i > 0 && i < _NX - 1 && j > 0 && j < _NY - 1) { // etc... } } __kernel void time_step(__global float *wn, __global float *wnp1){ __kernel void time_step(__global const double *wn, __global double *wnp1) { int id = get_global_id(0); int i = id % _NX; int j = id / _NX; int ngrid = _NX * _NY; float wnow[_M]; float wnext[_M]; double wnow[_M]; double wnext[_M]; // load middle value for(int iv = 0; iv < _M; iv++){ for (int iv = 0; iv < _M; iv++) { int imem = i + j * _NX + iv * ngrid; wnow[iv] = wn[imem]; wnow[iv] = wn[imem]; // moralement: wn[iv][j][i] wnext[iv] = wnow[iv]; } } // only compute internal cells if (i > 0 && i < _NX - 1 && j > 0 && j < _NY - 1){ float flux[_M]; if (i > 0 && i < _NX - 1 && j > 0 && j < _NY - 1) { double flux[_M]; // loop on all directions: // idir = 0 (east) // idir = 1 (west) // idir = 2 (north) // idir = 3 (south) for(int idir = 0; idir < 4; idir++){ float vn[2]; for (int idir = 0; idir < 4; idir++) { double vn[2]; vn[0] = dir[idir][0]; vn[1] = dir[idir][1]; int iR = i + dir[idir][0]; int jR = j + dir[idir][1]; // load neighbour values float wR[_M]; for(int iv = 0; iv < _M; iv++){ int imem = iv * ngrid + iR + jR * _NX; wR[iv] = wn[imem]; double wR[_M]; for (int iv = 0; iv < _M; iv++) { int imem = iv * ngrid + iR + jR * _NX; wR[iv] = wn[imem]; } if (iR == 0 || iR == _NX - 1 || jR == 0 || jR == _NY - 1){ wR[0] = wnow[0]; float qn = wnow[1] * vn[0] + wnow[2] * vn[1]; wR[1] = wnow[1] - 2 * qn * vn[0]; wR[2] = wnow[2] - 2 * qn * vn[1]; if (iR == 0 || iR == _NX - 1 || jR == 0 || jR == _NY - 1) { wR[0] = wnow[0]; double qn = wnow[1] * vn[0] + wnow[2] * vn[1]; wR[1] = wnow[1] - 2 * qn * vn[0]; wR[2] = wnow[2] - 2 * qn * vn[1]; } //flux_riem_2d(wnow, wR, vn, flux); fluxnum(wnow, wR, vn, flux); // time evolution for(int iv = 0; iv < _M; iv++){ wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; } for (int iv = 0; iv < _M; iv++) { wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; } } } // end test for internal cells } // end test for internal cells // copy wnext into global memory // (including boundary cells) for(int iv = 0; iv < _M; iv++){ for (int iv = 0; iv < _M; iv++) { int imem = iv * ngrid + i + j * _NX; wnp1[imem] = wnext[iv]; } }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!