Commit b465350d authored by Philippe Helluy's avatar Philippe Helluy

bug in 1d transport

parent f240b67c
This diff is collapsed.
...@@ -40,9 +40,9 @@ verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32) ...@@ -40,9 +40,9 @@ verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32)
prg = cl.Program(ctx, source).build() prg = cl.Program(ctx, source).build()
prg.K1(queue, (taille, ), None, x_gpu).wait() event = prg.K1(queue, (taille, ), None, x_gpu)
prg.K2(queue, (taille, ), None, y_gpu).wait() prg.K2(queue, (taille, ), None, y_gpu).wait()
prg.K3(queue, (taille, ), None, x_gpu, y_gpu, z_gpu).wait() prg.K3(queue, (taille, ), None , x_gpu, y_gpu, z_gpu, wait_for = [event]).wait()
cl.enqueue_copy(queue, z_cpu, z_gpu) cl.enqueue_copy(queue, z_cpu, z_gpu)
......
...@@ -5,23 +5,31 @@ from __future__ import absolute_import, print_function ...@@ -5,23 +5,31 @@ from __future__ import absolute_import, print_function
import pyopencl as cl import pyopencl as cl
import numpy as np import numpy as np
with open('kernels.cl', 'r') as f: with open('kernels.cl', 'r') as f:
source = f.read() source = f.read()
ctx = cl.create_some_context()
m = 2
raf = 1
deg = 1
source = source.replace("THE_DEG", str(deg))
source = source.replace("THE_RAF", str(raf))
source = source.replace("THE_M", str(m))
ctx = cl.create_some_context()
prg = cl.Program(ctx, source).build() prg = cl.Program(ctx, source).build()
point = [ ( 0.0, 0.0, 0.0), point_cpu = np.array([0.0, 0.0, 0.0,
( 1.0, 0.0, 0.0), 1.0, 0.0, 0.0,
( 1.0, 1.0, 0.0), 1.0, 1.0, 0.0,
( 0.0, 1.0, 0.0), 0.0, 1.0, 0.0,
( 0.0, 0.0, 1.0), 0.0, 0.0, 1.0,
( 1.0, 0.0, 1.0), 1.0, 0.0, 1.0,
( 1.0, 1.0, 1.0), 1.0, 1.0, 1.0,
( 0.0, 1.0, 1.0)] 0.0, 1.0, 1.0], dtype = np.float32)
element = [0,1,2,3,4,5,6,7] element = [0,1,2,3,4,5,6,7]
...@@ -32,4 +40,56 @@ face2node = [[0,1,5,4], ...@@ -32,4 +40,56 @@ face2node = [[0,1,5,4],
[5,6,7,4], [5,6,7,4],
[0,3,2,1]] [0,3,2,1]]
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
wsize = m * raf**3 * (deg+1)**3
row_size = raf * (deg + 1)
point_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = point_cpu)
wn_cpu = np.zeros(wsize, dtype = np.float32)
wn_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(wsize * np.dtype('float32').itemsize))
dtwn_cpu = np.zeros(wsize, dtype = np.float32)
dtwn_gpu = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf = dtwn_cpu);
ie = np.int32(0)
prg.Init(queue, (row_size, row_size, row_size), None, ie, point_gpu, wn_gpu).wait()
cl.enqueue_copy(queue, wn_cpu, wn_gpu)
#print(wn_cpu)
wn_out = wn_cpu.reshape((2*row_size**2,row_size))
#print(wn_out)
import matplotlib.pyplot as plt
x_cpu = np.zeros(3 * row_size**3, dtype = np.float32)
x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, x_cpu.nbytes)
prg.get_nodes(queue, (row_size, row_size, row_size),
None, ie, point_gpu, x_gpu).wait()
cl.enqueue_copy(queue, x_cpu, x_gpu).wait()
plt.plot(x_cpu[0:row_size],wn_out[0])
plt.plot(x_cpu[0:row_size],wn_out[1])
#plt.show()
#print(x_cpu[0:row_size])
#print(x_cpu[0:row_size])
tnow = np.float32(0)
npts = deg+1
prg.DGFlux(queue, (raf+1, row_size, row_size), (1, 1, 1), ie, point_gpu,tnow, wn_gpu,dtwn_gpu).wait()
prg.DGVolume(queue, (row_size, row_size, row_size), (npts,npts,npts), ie, point_gpu, wn_gpu,dtwn_gpu).wait()
cl.enqueue_copy(queue, dtwn_cpu, dtwn_gpu)
print(dtwn_cpu)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment