Commit c6abce65 authored by ph's avatar ph

add transport solver

parent d27204ac
...@@ -6,24 +6,24 @@ import pyopencl as cl ...@@ -6,24 +6,24 @@ import pyopencl as cl
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
# number of conservative variables import time
m = 1
# boundary ##################" definition of default values
wbord = 0. # number of conservative variables
_m = 1
# boundary value
_wbord = 0.
# grid size # grid size
nx = 512 _nx = 512
ny = 512 _ny = 512
Lx = 1. Lx = 1.
Ly = 1. Ly = 1.
# number of animation frames _dx = Lx / _nx
nbplots = 10 _dy = Ly / _ny
dx = Lx / nx
dy = Ly / ny
# transport velocity # transport velocity
vel = np.array([1., 1.]) vel = np.array([1., 1.])
...@@ -31,17 +31,11 @@ vel = np.array([1., 1.]) ...@@ -31,17 +31,11 @@ vel = np.array([1., 1.])
vmax = np.sqrt(vel[0]**2+vel[1]**2) vmax = np.sqrt(vel[0]**2+vel[1]**2)
# time stepping # time stepping
Tmax = 0.5 / vmax _Tmax = 0.5 / vmax
cfl = 0.8 cfl = 0.8
_dt = cfl * (_dx * _dy) / (2 * _dx + 2 * _dy) / vmax
dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax ############# end of default values
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
print("itermax=",itermax, " iterplot=",iterplot, "tmax=",itermax*dt)
#exit()
def fluxnum(wL, wR, vnorm): def fluxnum(wL, wR, vnorm):
...@@ -60,10 +54,12 @@ def exact_sol(xy, t): ...@@ -60,10 +54,12 @@ def exact_sol(xy, t):
w = np.exp(-30*d2) w = np.exp(-30*d2)
return w return w
def solve_python(wn): def solve_python(m = _m, nx = _nx, ny = _ny, Tmax = _Tmax, dx = _dx, dy = _dy,
dt = _dt, exact_sol = exact_sol, wbord = _wbord,
fluxnum = fluxnum):
#init #init
t = 0. t = 0.
wn[:] = wbord wn = np.full((nx,ny), wbord, dtype = 'float32')
for i in range(1,nx-1): for i in range(1,nx-1):
for j in range(1,ny-1): for j in range(1,ny-1):
xy=[i*dx+dx/2,j*dy+dy/2] xy=[i*dx+dx/2,j*dy+dy/2]
...@@ -73,10 +69,11 @@ def solve_python(wn): ...@@ -73,10 +69,11 @@ def solve_python(wn):
wnp1 = np.zeros((nx,ny), dtype='float32') wnp1 = np.zeros((nx,ny), dtype='float32')
np.copyto(wnp1, wn) np.copyto(wnp1, wn)
iter = 0 iter = 0
elapsed = 0
while t < Tmax: while t < Tmax:
t = t+dt t = t+dt
iter += 1 iter += 1
print("iter=",iter, " t=",t) start = time.time()
for i in range(1,nx-1): for i in range(1,nx-1):
for j in range(1,ny-1): for j in range(1,ny-1):
wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i+1,j],np.array([1,0])) wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i+1,j],np.array([1,0]))
...@@ -85,93 +82,104 @@ def solve_python(wn): ...@@ -85,93 +82,104 @@ def solve_python(wn):
wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j-1],np.array([0,-1])) wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j-1],np.array([0,-1]))
np.copyto(wn, wnp1) np.copyto(wn, wnp1)
end = time.time()
elapsed += end - start
print("iter=",iter, " t=",t, "elapsed (s) =", elapsed)
return wn
def solve_ocl(m = _m, nx = _nx, ny = _ny, Tmax = _Tmax, dx = _dx, dy = _dy,
dt = _dt, exact_sol = exact_sol, wbord = _wbord,
fluxnum = fluxnum, animate = False):
source = open("transport_kernels.cl", "r").read() # load and adjust C program
source = open("transport_kernels.cl", "r").read()
source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+"f)") source = source.replace("_dx_", "("+str(dx)+"f)")
source = source.replace("_dy_", "("+str(dy)+"f)") source = source.replace("_dy_", "("+str(dy)+"f)")
source = source.replace("_dt_", "("+str(dt)+"f)") source = source.replace("_dt_", "("+str(dt)+"f)")
source = source.replace("_m_", "("+str(m)+")") source = source.replace("_m_", "("+str(m)+")")
source = source.replace("_wbord_", "("+str(wbord)+"f)") source = source.replace("_wbord_", "("+str(wbord)+"f)")
source = source.replace("_vx_", "("+str(vel[0])+"f)") source = source.replace("_vx_", "("+str(vel[0])+"f)")
source = source.replace("_vy_", "("+str(vel[1])+"f)") source = source.replace("_vy_", "("+str(vel[1])+"f)")
#print(source)
#exit(0) #print(source)
ctx = cl.create_some_context() #exit(0)
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
# taille = 2**8 # OpenCL init
# ctx = cl.create_some_context()
# x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize)) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
# y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize)) mf = cl.mem_flags
#
# z_cpu = np.empty((taille, ), dtype = np.float32)
# z_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, z_cpu.nbytes)
#
# verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32)
wn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize)) # create OpenCL buffers
wn_cpu = np.empty((nx * ny, ), dtype = np.float32) wn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
wnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize)) wnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
prg = cl.Program(ctx, source).build() prg = cl.Program(ctx, source).build()
# init data # init data
event = prg.init_sol(queue, (nx * ny, ), (32, ), wn_gpu).wait() event = prg.init_sol(queue, (nx * ny, ), (32, ), wn_gpu)
event.wait()
# time loop on cpu # number of animation frames
nbplots = 10
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
# time loop on gpu # time loop on gpu
t = 0 t = 0
iter = 0 iter = 0
while t < Tmax: elapsed = 0.;
t = t + dt wn_cpu = np.empty((nx * ny, ), dtype = np.float32)
iter = iter + 1
prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait()
# exchange buffer references for avoiding a copy
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
print("iter=",iter, " t=",t)
if iter % iterplot == 0:
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu, (nx, ny))
plt.clf()
plt.imshow(wplot,vmin=0, vmax=1)
plt.colorbar()
plt.pause(0.01)
# prg.K2(queue, (taille, ), None, y_gpu).wait()
# prg.K3(queue, (taille, ), None , x_gpu, y_gpu, z_gpu, wait_for = [event]).wait()
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu,(nx, ny))
while t < Tmax:
t = t + dt
iter = iter + 1
event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
#event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
event.wait()
elapsed += 1e-9 * (event.profile.end - event.profile.start)
# exchange buffer references for avoiding a copy
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
print("iter=",iter, " t=",t, "elapsed (s)=",elapsed)
if iter % iterplot == 0 and animate:
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu, (nx, ny))
plt.clf()
plt.imshow(wplot,vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar()
plt.pause(0.01)
# copy OpenCL data to CPU and return the results
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot_gpu = np.reshape(wn_cpu,(nx, ny))
return wplot_gpu
# gpu solve
wplot_gpu = solve_ocl()
plt.clf() plt.clf()
plt.imshow(wplot,vmin=0, vmax=1) plt.imshow(wplot_gpu, vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar() plt.colorbar()
plt.show() plt.show()
wplot_gpu = wplot.copy() # cpu solve
wplot_cpu = solve_python()
solve_python(wplot)
plt.clf() plt.clf()
plt.imshow(wplot,vmin=0, vmax=1) plt.imshow(wplot_cpu,vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar() plt.colorbar()
plt.show() plt.show()
# check difference
plt.clf() plt.clf()
plt.imshow(wplot-wplot_gpu) plt.imshow(wplot_cpu-wplot_gpu)
plt.gca().invert_yaxis()
plt.colorbar() plt.colorbar()
plt.show() plt.show()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment