Commit c6abce65 authored by ph's avatar ph

add transport solver

parent d27204ac
......@@ -6,24 +6,24 @@ import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt
# number of conservative variables
m = 1
import time
# boundary
wbord = 0.
##################" definition of default values
# number of conservative variables
_m = 1
# boundary value
_wbord = 0.
# grid size
nx = 512
ny = 512
_nx = 512
_ny = 512
Lx = 1.
Ly = 1.
# number of animation frames
nbplots = 10
dx = Lx / nx
dy = Ly / ny
_dx = Lx / _nx
_dy = Ly / _ny
# transport velocity
vel = np.array([1., 1.])
......@@ -31,17 +31,11 @@ vel = np.array([1., 1.])
vmax = np.sqrt(vel[0]**2+vel[1]**2)
# time stepping
Tmax = 0.5 / vmax
_Tmax = 0.5 / vmax
cfl = 0.8
dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
print("itermax=",itermax, " iterplot=",iterplot, "tmax=",itermax*dt)
#exit()
_dt = cfl * (_dx * _dy) / (2 * _dx + 2 * _dy) / vmax
############# end of default values
def fluxnum(wL, wR, vnorm):
......@@ -60,10 +54,12 @@ def exact_sol(xy, t):
w = np.exp(-30*d2)
return w
def solve_python(wn):
def solve_python(m = _m, nx = _nx, ny = _ny, Tmax = _Tmax, dx = _dx, dy = _dy,
dt = _dt, exact_sol = exact_sol, wbord = _wbord,
fluxnum = fluxnum):
#init
t = 0.
wn[:] = wbord
wn = np.full((nx,ny), wbord, dtype = 'float32')
for i in range(1,nx-1):
for j in range(1,ny-1):
xy=[i*dx+dx/2,j*dy+dy/2]
......@@ -73,10 +69,11 @@ def solve_python(wn):
wnp1 = np.zeros((nx,ny), dtype='float32')
np.copyto(wnp1, wn)
iter = 0
elapsed = 0
while t < Tmax:
t = t+dt
iter += 1
print("iter=",iter, " t=",t)
start = time.time()
for i in range(1,nx-1):
for j in range(1,ny-1):
wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i+1,j],np.array([1,0]))
......@@ -85,93 +82,104 @@ def solve_python(wn):
wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j-1],np.array([0,-1]))
np.copyto(wn, wnp1)
end = time.time()
elapsed += end - start
print("iter=",iter, " t=",t, "elapsed (s) =", elapsed)
return wn
def solve_ocl(m = _m, nx = _nx, ny = _ny, Tmax = _Tmax, dx = _dx, dy = _dy,
dt = _dt, exact_sol = exact_sol, wbord = _wbord,
fluxnum = fluxnum, animate = False):
source = open("transport_kernels.cl", "r").read()
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+"f)")
source = source.replace("_dy_", "("+str(dy)+"f)")
source = source.replace("_dt_", "("+str(dt)+"f)")
source = source.replace("_m_", "("+str(m)+")")
source = source.replace("_wbord_", "("+str(wbord)+"f)")
source = source.replace("_vx_", "("+str(vel[0])+"f)")
source = source.replace("_vy_", "("+str(vel[1])+"f)")
# load and adjust C program
source = open("transport_kernels.cl", "r").read()
source = source.replace("_nx_", "("+str(nx)+")")
source = source.replace("_ny_", "("+str(ny)+")")
source = source.replace("_dx_", "("+str(dx)+"f)")
source = source.replace("_dy_", "("+str(dy)+"f)")
source = source.replace("_dt_", "("+str(dt)+"f)")
source = source.replace("_m_", "("+str(m)+")")
source = source.replace("_wbord_", "("+str(wbord)+"f)")
source = source.replace("_vx_", "("+str(vel[0])+"f)")
source = source.replace("_vy_", "("+str(vel[1])+"f)")
#print(source)
#print(source)
#exit(0)
#exit(0)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
# OpenCL init
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
mf = cl.mem_flags
# taille = 2**8
#
# x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize))
# y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize))
#
# z_cpu = np.empty((taille, ), dtype = np.float32)
# z_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, z_cpu.nbytes)
#
# verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32)
# create OpenCL buffers
wn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
wnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
wn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
wn_cpu = np.empty((nx * ny, ), dtype = np.float32)
wnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize))
prg = cl.Program(ctx, source).build()
prg = cl.Program(ctx, source).build()
# init data
event = prg.init_sol(queue, (nx * ny, ), (32, ), wn_gpu)
event.wait()
# init data
event = prg.init_sol(queue, (nx * ny, ), (32, ), wn_gpu).wait()
# number of animation frames
nbplots = 10
# time loop on cpu
itermax = int(np.floor(Tmax / dt))
iterplot = int(itermax / nbplots)
# time loop on gpu
t = 0
iter = 0
elapsed = 0.;
wn_cpu = np.empty((nx * ny, ), dtype = np.float32)
# time loop on gpu
t = 0
iter = 0
while t < Tmax:
while t < Tmax:
t = t + dt
iter = iter + 1
prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait()
event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
#event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
event.wait()
elapsed += 1e-9 * (event.profile.end - event.profile.start)
# exchange buffer references for avoiding a copy
wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu
print("iter=",iter, " t=",t)
if iter % iterplot == 0:
print("iter=",iter, " t=",t, "elapsed (s)=",elapsed)
if iter % iterplot == 0 and animate:
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu, (nx, ny))
plt.clf()
plt.imshow(wplot,vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar()
plt.pause(0.01)
# copy OpenCL data to CPU and return the results
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
# prg.K2(queue, (taille, ), None, y_gpu).wait()
# prg.K3(queue, (taille, ), None , x_gpu, y_gpu, z_gpu, wait_for = [event]).wait()
cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait()
wplot = np.reshape(wn_cpu,(nx, ny))
wplot_gpu = np.reshape(wn_cpu,(nx, ny))
return wplot_gpu
# gpu solve
wplot_gpu = solve_ocl()
plt.clf()
plt.imshow(wplot,vmin=0, vmax=1)
plt.imshow(wplot_gpu, vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar()
plt.show()
wplot_gpu = wplot.copy()
solve_python(wplot)
# cpu solve
wplot_cpu = solve_python()
plt.clf()
plt.imshow(wplot,vmin=0, vmax=1)
plt.imshow(wplot_cpu,vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar()
plt.show()
# check difference
plt.clf()
plt.imshow(wplot-wplot_gpu)
plt.imshow(wplot_cpu-wplot_gpu)
plt.gca().invert_yaxis()
plt.colorbar()
plt.show()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment