Commit c3e2d2d5 authored by coulette's avatar coulette
Browse files

up small tests

parent 7c266d3e
......@@ -191,7 +191,7 @@ if __name__ == '__main__':
# gpu solve
wplot_gpu = solve_ocl(**vars(args))
#print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
plt.clf()
#plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
......
......@@ -19,7 +19,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%bash\n",
......@@ -29,7 +31,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%env XAUTHORITY=/tmp/x11-auth-file\n",
......@@ -63,7 +67,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%env PYOPENCL_CTX=0:1"
......@@ -80,6 +86,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"outputs": [],
......@@ -111,7 +118,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Tmax = 0.1\n",
......@@ -130,7 +139,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%env PYOPENCL_CTX=0:4\n",
......@@ -140,7 +151,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"print(\"GPU [s]: {:f}\".format(result_gpu.best))\n",
......@@ -164,7 +177,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
"version": "3.6.3"
}
},
"nbformat": 4,
......
#define _NC _nc_
#define _NVS _nvs_
#define _NV _nv_
#define _NX _nx_
#define _NY _ny_
#define _DX _dx_
#define _DY _dy_
#define _VMAX _vmax_
#define _T _t_
#define real _real_
#define _NGRID (_NX * _NY)
__constant real vi[_NV][2]= _vi_;
__constant real v0[2] = {1.0_F, 0.0_F};
inline int get_memindex(int ix, int iy, int iv){
int index = 0;
index = iv * (_NGRID) + _NX * iy + ix;
return index;
}
inline int get_ivindex(int ivloc,int ivar){
int index = 0;
index = ivar * _NVS + ivloc;
return index;
}
void flux_phy( real w[_NC], real flux[_NC][2]){
for (int k = 0; k < _NC; k++){
flux[k][0] = v0[0] * w[k];
flux[k][1] = v0[1] * w[k];
}
}
void feq_D2Q4( real w[_NC], real f[_NV]){
real flux[_NC][2];
flux_phy(w,flux);
for (int k = 0; k < _NC ; k++){
for (int ivloc = 0; ivloc < _NVS ; ivloc++){
int iv = get_ivindex(ivloc,k);
f[iv] = 0.25_F * w[k] + (vi[ivloc][0] * flux[k][0] + vi[ivloc][1] * flux[k][1]) / (2.0 * _VMAX);
}
}
}
void analytical_sol(real t,real xin[2],real w[_NC]){
real x[2] = {xin[0] - 0.5_F - v0[0] * t, xin[1] - 0.5_F - v0[1] * t};
for (int k = 0; k < _NC; k++){
w[k] = exp (-100.0_F * (x[0] * x[0] + x[1] * x[1]));
}
}
__kernel void exact_sol(__global real *fn){
int id = get_global_id(0);
int i = id % _NX;
int j = id / _NX;
real x[2] = { i * _DX , j * _DY };
real w[_NC];
analytical_sol(_T,x,w);
real fnow[_NV];
feq_D2Q4(w,fnow);
for (int iv = 0; iv < _NV; iv++){
int imem = get_memindex(i,j,iv);
fn[imem] = fnow[iv];
}
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
#import matplotlib.pyplot as plt
import time
#define _NC _nc_
#define _NVS _nvs_
#define _NV _nv_
#define _NX _nx_
#define _NY _ny_
#define _RELCOEFF _relcoeff_
#define _VMAX _vmax_
#define real _real_
#define _NGRID (_NX * _NY)
#define _COMPRELCOEFF (1._F - _RELCOEFF)
__constant real vi[_NV][2]= _vi_;
__constant real v0[2] = {1.0_F, 0.0_F};
inline int get_memindex(int ix, int iy, int iv){
int index = 0;
index = iv * (_NGRID) + _NX * iy + ix;
return index;
}
inline int get_ivindex(int ivloc,int ivar){
int index = 0;
index = ivar * _NVS + ivloc;
return index;
}
void flux_phy( real w[_NC], real flux[_NC][2]){
for (int k = 0; k < _NC; k++){
flux[k][0] = v0[0] * w[k];
flux[k][1] = v0[1] * w[k];
}
}
void micro_to_macro( real f[_NV],real w[_NC]){
for (int k = 0; k < _NC; k++){
w[k] = 0._F;
for (int ivloc = 0; ivloc < _NVS; ivloc++){
w[k] += f[ k * _NVS + ivloc];
}
}
}
void feq_D2Q4( real w[_NC], real f[_NV]){
real flux[_NC][2];
flux_phy(w,flux);
for (int k = 0; k < _NC ; k++){
for (int ivloc = 0; ivloc < _NVS ; ivloc++){
int iv = get_ivindex(ivloc,k);
f[iv] = 0.25_F * w[k] + (vi[ivloc][0] * flux[k][0] + vi[ivloc][1] * flux[k][1]) / (2.0 * _VMAX);
}
}
}
__kernel void relax_bgk(__global real *fn, __global real *fnp1){
int id = get_global_id(0);
int i = id % _NX;
int j = id / _NX;
// get f
real fnow[_NV];
for (int iv = 0; iv < _NV; iv++){
int imem = get_memindex(i,j,iv);
fnow[iv] = fn[imem];
}
real wnow[_NC];
real feq[_NV];
feq_D2Q4(wnow,feq);
for (int iv = 0; iv < _NV ; iv++){
fnow[iv] = _RELCOEFF * feq[iv] + _COMPRELCOEFF * fnow[iv];
}
for (int iv = 0; iv < _NV; iv++){
int imem = get_memindex(i,j,iv);
fnp1[imem] = fnow[iv];
}
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt
import time
###########################################
# default values
#number of conservative variables
_nc = 1
# number of velocities per scalar quantity
_nvs = 4
_nv = _nc * _nvs
# discrete grid dimensions
_nx = 256
_ny = 256
# domain dimensions
Lx = 1
Ly = 1
# step sizes
_dx =Lx / _nx
_dy =Ly / _ny
hmin = min(_dx,_dy)
# velocity set
_vi = np.array([[1,0],[0,1],[-1,0],[0,-1]])
vmax = 2.0
#
_interp_d = 8
#
_cfl = 1.0
_dt = _cfl * hmin / vmax
#
_nt = 5000
##########################################
def pyarray2_to_C(arr,is_float =False):
ret = "{ "
for i in range(arr.shape[0]):
ret = ret +"{ "
for j in range(arr.shape[1]):
ret = ret + str(arr[i][j])
if (is_float):
ret = ret + '_F'
if (j < arr.shape[1]-1):
ret = ret + " , "
ret = ret + "}"
if (i < arr.shape[0]-1):
ret = ret + "," + "\n"
ret = ret + '}'
return ret
def lagragian_coeffs_old(x,d,dtype= np.float64):
res = np.zeros((2*d+2,),dtype =dtype)
if (0 == d):
res[0] = 1.0-x
res[1] = x
return res
a = 1.0
for i in range(2,d+2):
a = a * (x * x - i * i) / (d * d )
a = a * (x + 1.0) / d
a = a * (x - d - 1.0) / d
res[d] = a * (x-1.0) / d
res[d+1] = a * x / d
#
a = a * x *(x-1) / (d * d)
#
for i in range(-d,0):
res[i+d] = a / ( (x-i) / d )
for i in range(2,d+2):
res[i+d] = a / ( (x-i) / d )
#
a = 1.0
for i in range(-d, d+2):
res[i+d] = res[i+d] * a
a = a * d / ( d + i + 1.0 )
a = 1.0
for i in range(d+1, -d-1, -1):
res[i+d] = res[i+d] * a
a = a * d / ( i - d - 2.0)
return res
def lagragian_coeffs(x,d,dtype = np.float64):
res = np.zeros((2*d+2,),dtype =dtype)
base_x = -d + np.array(range(2*d+2))
for i in range(2*d +2):
res[i] = 1.0
xi = base_x[i]
for j in range(2*d+2):
if (i != j):
res[i] = res[i] * (x-base_x[j]) / (xi - base_x[j])
return res
def lagrange_interpol(d,xvals,fvals,istencil,alpha):
#coeff = lagragian_coeffs(alpha,d)
coeff = lagragian_coeffs(alpha,d)
print(coeff)
nf = fvals.shape[0]
res = 0.0
tmp_int = np.zeros((2 *d +2,),dtype= np.integer)
for i in range(2 * d +2):
ip = (istencil-d+i + nf) % nf
tmp_int[i] = ip
res = res + coeff[i] * fvals[ip]
return res
def lagrange_interpol_test(d):
npoint = 32
xmin = 0.0
xmax = 1.0
x = np.linspace(xmin,xmax,npoint)
h = x[1] - x[0]
print(" h "+str(h))
#f = 3.0 * x*x*x + 2.0 *x * x + 1.0
f = np. cos(10.0 *np.pi * x)
npoint2 = 5 * (npoint-1) +1
x2 = np.linspace(xmin,xmax,npoint2)
h2 = x2[1] -x2[0]
print(" h2 "+str(h2))
#f2exact = 3.0 * x2*x2*x2 + 2.0 *x2 * x2 + 1.0
f2exact = np.cos( 10.0 * np.pi * x2)
finterp = np.zeros((npoint2,),dtype= np.float64)
for i in range(npoint2):
xtest = x2[i] / h
itest = int(np.floor(xtest))
alphatest = xtest-itest
itest = (itest + npoint) % npoint
finterp[i]= lagrange_interpol(d,x,f,itest,alphatest)
fig = plt.figure()
plt.plot(x,f,'o',label='low exact')
plt.plot(x2,f2exact,'-',label='high exact')
plt.plot(x2,finterp,'+',label='interp')
plt.legend()
plt.show()
plt.close(fig)
def generate_interpol_array(locshift,idim,d):
coeff1 = np.zeros((locshift.shape[0],2*d+2),dtype= np.float64)
for i in range(locshift.shape[0]):
coeff1[i][:] = lagragian_coeffs(locshift[i][idim],d)
return coeff1
def generate_sl_transport_source(nc,nvs,nv,nx,ny,dx,dy,dt,vi,interp_d,precision):
shifts_full = np.zeros(vi.shape,dtype = np.float64)
vrel = np.array([dx, dt / dy])
for iv in range(nv):
shifts_full[iv][:] = vi[iv][:] * np.array([ dt / dx, dt / dy ])
nstencil = 2 * interp_d +2
shift_offsets = (np.floor(shifts_full)).astype(int)
shift_loc = shifts_full-shift_offsets
lag_coeffs_x = generate_interpol_array(shift_loc,0,interp_d)
lag_coeffs_y = generate_interpol_array(shift_loc,1,interp_d)
#
offset_str = pyarray2_to_C(shift_offsets)
locshift_str = pyarray2_to_C(shift_loc)
lag_coeffs_x_str = pyarray2_to_C(lag_coeffs_x)
lag_coeffs_y_str = pyarray2_to_C(lag_coeffs_y)
#
ff = "_F"
source = open("transportsl_kernels.cl", "r").read()
source = source.replace("_nc_", "("+str(nc)+")")
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+ ff + ")")
source = source.replace("_dy_", "("+str(dy)+ ff + ")")
source = source.replace("_nvs_", "("+str(nvs)+")")
source = source.replace("_nv_", "("+str(nv)+")")
source = source.replace("_offset_", offset_str)
source = source.replace("_locshift_", locshift_str)
source = source.replace("_interp_d_", "("+str(interp_d)+")")
source = source.replace("_lag_coeffs_x_",lag_coeffs_x_str)
source = source.replace("_lag_coeffs_y_",lag_coeffs_y_str)
if precision == "double":
source = source.replace("_F", "")
source = source.replace("_real_", "double")
else:
print("prec:", precision)
source = source.replace("_F", "f")
source = source.replace("_real_", "float")
print(source)
return source
def generate_relax_bgk_source(nc,nvs,nv,nx,ny,dt,vi,vmax,tau,theta,precision):
vi_str = pyarray2_to_C(vi.astype(np.float64))
# relaxation coefficient
if (0 == tau and 0 == theta):
print(" Singular relaxation not possible with full explicit method // aborting")
exit()
relcoeff = dt / ( tau + dt * theta)
#
print( "Relaxatio coefficient " +str(relcoeff))
ff = "_F"
source = open("relax_kernels.cl", "r").read()
source = source.replace("_nc_", "("+str(nc)+")")
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_nvs_", "("+str(nvs)+")")
source = source.replace("_nv_", "("+str(nv)+")")
source = source.replace("_vmax_","("+str(vmax)+ff+")")
source = source.replace("_relcoeff_","("+str(relcoeff) + ff +")")
source = source.replace("_vi_", vi_str)
if precision == "double":
source = source.replace("_F", "")
source = source.replace("_real_", "double")
else:
print("prec:", precision)
source = source.replace("_F", "f")
source = source.replace("_real_", "float")
return source
def generate_exactsol_source(nc,nvs,nv,nx,ny,dx,dy,dt,vi,vmax,t,precision):
vi_str = pyarray2_to_C(vi.astype(np.float64))
# relaxation coefficient
ff = "_F"
source = open("exactsol_kernels.cl", "r").read()
source = source.replace("_nc_", "("+str(nc)+")")
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+ ff + ")")
source = source.replace("_dy_", "("+str(dy)+ ff + ")")
source = source.replace("_nvs_", "("+str(nvs)+")")
source = source.replace("_nv_", "("+str(nv)+")")
source = source.replace("_vmax_","("+str(vmax)+ff+")")
source = source.replace("_t_","("+str(t)+ff+")")
source = source.replace("_vi_", vi_str)
if precision == "double":
source = source.replace("_F", "")
source = source.replace("_real_", "double")
else:
print("prec:", precision)
source = source.replace("_F", "f")
source = source.replace("_real_", "float")
return source
def plotwn(wn_cpu,it,t,nc,nvs,plot_macro=True,plot_micro=False):
title_str = r' it = '+str(it) + r' t = '+ str(t)
if (plot_micro):
for ic in range(nc):
for iv in range(nvs):
fig=plt.figure()
plt.title(title_str + "ic = " +str(ic) + "iv = "+str(iv))
plt.imshow(wn_cpu[iv,ic,:,:], title)
plt.colorbar()
plt.show()
plt.close(fig)
# macro plots
# if (plot_macro):
# for ic in range(nc):
# fig=plt.figure()
# plt.title(title_str + "ic = " +str(ic))
# plt.imshow(np.sum(wn_cpu[:,ic,:,:],axis = 0))
# plt.colorbar()
# plt.show()
# plt.close(fig)
if (plot_macro):
for ic in range(nc):
fig=plt.figure()
plt.title(title_str + "ic = " +str(ic))
xvals = np.linspace(0.0,1.0,wn_cpu.shape[2]);
iy = int((wn_cpu.shape[3]) / 2)
fvals = np.sum(wn_cpu[:,ic,:,iy],axis = 0)
plt.plot(xvals,fvals)
plt.show()
plt.close(fig)
def solve_transport_ocl(nc = _nc,nvs = _nvs,nv = _nv, nx = _nx, ny = _ny, dx = _dx, dy = _dy, dt = _dt, vi = _vi, nt = _nt, interp_d = _interp_d,precision = "single"):
if precision == "double":
np_real = 'float64'
dtype = np.float64
else:
np_real = 'float32'
dtype = np.float32
#
# plot options