Commit 3c3de2af authored by Philippe Helluy's avatar Philippe Helluy

start patapon kernels

parent dc5c7c9a
#define _DEG 2
#define _RAF 3
#define _M 2
#define _NP (_DEG + 1)
__constant int npg = _NP * _NP * _NP * _RAF * _RAF * _RAF;
int GenericVarindex(int ipg, int iv) {
return ipg + npg * iv;
}
void get_dtau(float x, float y, float z,
__constant float *p, float dtau[][3]){
float t1 = -1 + z;
float t2 = -1 + y;
float t3 = t1 * t2;
float t6 = y * t1;
float t9 = z * t2;
float t12 = y * z;
float t16 = -1 + x;
float t17 = t1 * t16;
float t19 = x * t1;
float t23 = z * t16;
float t25 = x * z;
float t30 = t2 * t16;
float t32 = x * t2;
float t34 = x * y;
float t36 = y * t16;
dtau[0][0] = -t3 * p[0] + t3 * p[3] - t6 * p[6] + t6 * p[9] + t9 * p[12] - t9 * p[15] + t12 * p[18] - t12 * p[21];
dtau[0][1] = -t17 * p[0] + t19 * p[3] - t19 * p[6] + t17 * p[9] + t23 * p[12] - t25 * p[15] + t25 * p[18] - t23 * p[21];
dtau[0][2] = -t30 * p[0] + t32 * p[3] - t34 * p[6] + t36 * p[9] + t30 * p[12] - t32 * p[15] + t34 * p[18] - t36 * p[21];
dtau[1][0] = -t3 * p[1] + t3 * p[4] - t6 * p[7] + t6 * p[10] + t9 * p[13] - t9 * p[16] + t12 * p[19] - t12 * p[22];
dtau[1][1] = -t17 * p[1] + t19 * p[4] - t19 * p[7] + t17 * p[10] + t23 * p[13] - t25 * p[16] + t25 * p[19] - t23 * p[22];
dtau[1][2] = -t30 * p[1] + t32 * p[4] - t34 * p[7] + t36 * p[10] + t30 * p[13] - t32 * p[16] + t34 * p[19] - t36 * p[22];
dtau[2][0] = -t3 * p[2] + t3 * p[5] - t6 * p[8] + t6 * p[11] + t9 * p[14] - t9 * p[17] + t12 * p[20] - t12 * p[23];
dtau[2][1] = -t17 * p[2] + t19 * p[5] - t19 * p[8] + t17 * p[11] + t23 * p[14] - t25 * p[17] + t25 * p[20] - t23 * p[23];
dtau[2][2] = -t30 * p[2] + t32 * p[5] - t34 * p[8] + t36 * p[11] + t30 * p[14] - t32 * p[17] + t34 * p[20] - t36 * p[23];
}
...@@ -19,8 +19,8 @@ __kernel void K2(__global float *y){ ...@@ -19,8 +19,8 @@ __kernel void K2(__global float *y){
__kernel void K3(__global float const *x, __global float const *y, __global float *z){ __kernel void K3(__global float const *x, __global float const *y, __global float *z){
int i = get_global_id(0); int i = get_global_id(0);
//z[i] = x[i] + y[i]; z[i] = x[i] + y[i];
z[i] = get_local_id(0); //z[i] = get_local_id(0);
} }
""" """
...@@ -28,7 +28,7 @@ ctx = cl.create_some_context() ...@@ -28,7 +28,7 @@ ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx) queue = cl.CommandQueue(ctx)
mf = cl.mem_flags mf = cl.mem_flags
taille = 8 taille = 2**8
x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize)) x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize))
y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize)) y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize))
...@@ -36,6 +36,8 @@ y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsiz ...@@ -36,6 +36,8 @@ y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsiz
z_cpu = np.empty((taille, ), dtype = np.float32) z_cpu = np.empty((taille, ), dtype = np.float32)
z_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, z_cpu.nbytes) z_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, z_cpu.nbytes)
verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32)
prg = cl.Program(ctx, source).build() prg = cl.Program(ctx, source).build()
prg.K1(queue, (taille, ), None, x_gpu).wait() prg.K1(queue, (taille, ), None, x_gpu).wait()
...@@ -44,7 +46,7 @@ prg.K3(queue, (taille, ), None, x_gpu, y_gpu, z_gpu).wait() ...@@ -44,7 +46,7 @@ prg.K3(queue, (taille, ), None, x_gpu, y_gpu, z_gpu).wait()
cl.enqueue_copy(queue, z_cpu, z_gpu) cl.enqueue_copy(queue, z_cpu, z_gpu)
print(z_cpu) print(z_cpu - verif_cpu)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
with open('kernels.cl', 'r') as f:
source = f.read()
ctx = cl.create_some_context()
prg = cl.Program(ctx, source).build()
point = [ ( 0.0, 0.0, 0.0),
( 1.0, 0.0, 0.0),
( 1.0, 1.0, 0.0),
( 0.0, 1.0, 0.0),
( 0.0, 0.0, 1.0),
( 1.0, 0.0, 1.0),
( 1.0, 1.0, 1.0),
( 0.0, 1.0, 1.0)]
element = [0,1,2,3,4,5,6,7]
face2node = [[0,1,5,4],
[1,2,6,5],
[2,3,7,6],
[0,4,7,3],
[5,6,7,4],
[0,3,2,1]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment