Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
David Coulette
patapon
Commits
3c3de2af
Commit
3c3de2af
authored
Jul 08, 2016
by
Philippe Helluy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
start patapon kernels
parent
dc5c7c9a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
82 additions
and
4 deletions
+82
-4
kernels.cl
kernels.cl
+41
-0
simple_kernels.py
simple_kernels.py
+6
-4
testtranspose.py
testtranspose.py
+35
-0
No files found.
kernels.cl
0 → 100644
View file @
3c3de2af
#
define
_DEG
2
#
define
_RAF
3
#
define
_M
2
#
define
_NP
(
_DEG
+
1
)
__constant
int
npg
=
_NP
*
_NP
*
_NP
*
_RAF
*
_RAF
*
_RAF
;
int
GenericVarindex
(
int
ipg,
int
iv
)
{
return
ipg
+
npg
*
iv
;
}
void
get_dtau
(
float
x,
float
y,
float
z,
__constant
float
*p,
float
dtau[][3]
)
{
float
t1
=
-1
+
z
;
float
t2
=
-1
+
y
;
float
t3
=
t1
*
t2
;
float
t6
=
y
*
t1
;
float
t9
=
z
*
t2
;
float
t12
=
y
*
z
;
float
t16
=
-1
+
x
;
float
t17
=
t1
*
t16
;
float
t19
=
x
*
t1
;
float
t23
=
z
*
t16
;
float
t25
=
x
*
z
;
float
t30
=
t2
*
t16
;
float
t32
=
x
*
t2
;
float
t34
=
x
*
y
;
float
t36
=
y
*
t16
;
dtau[0][0]
=
-t3
*
p[0]
+
t3
*
p[3]
-
t6
*
p[6]
+
t6
*
p[9]
+
t9
*
p[12]
-
t9
*
p[15]
+
t12
*
p[18]
-
t12
*
p[21]
;
dtau[0][1]
=
-t17
*
p[0]
+
t19
*
p[3]
-
t19
*
p[6]
+
t17
*
p[9]
+
t23
*
p[12]
-
t25
*
p[15]
+
t25
*
p[18]
-
t23
*
p[21]
;
dtau[0][2]
=
-t30
*
p[0]
+
t32
*
p[3]
-
t34
*
p[6]
+
t36
*
p[9]
+
t30
*
p[12]
-
t32
*
p[15]
+
t34
*
p[18]
-
t36
*
p[21]
;
dtau[1][0]
=
-t3
*
p[1]
+
t3
*
p[4]
-
t6
*
p[7]
+
t6
*
p[10]
+
t9
*
p[13]
-
t9
*
p[16]
+
t12
*
p[19]
-
t12
*
p[22]
;
dtau[1][1]
=
-t17
*
p[1]
+
t19
*
p[4]
-
t19
*
p[7]
+
t17
*
p[10]
+
t23
*
p[13]
-
t25
*
p[16]
+
t25
*
p[19]
-
t23
*
p[22]
;
dtau[1][2]
=
-t30
*
p[1]
+
t32
*
p[4]
-
t34
*
p[7]
+
t36
*
p[10]
+
t30
*
p[13]
-
t32
*
p[16]
+
t34
*
p[19]
-
t36
*
p[22]
;
dtau[2][0]
=
-t3
*
p[2]
+
t3
*
p[5]
-
t6
*
p[8]
+
t6
*
p[11]
+
t9
*
p[14]
-
t9
*
p[17]
+
t12
*
p[20]
-
t12
*
p[23]
;
dtau[2][1]
=
-t17
*
p[2]
+
t19
*
p[5]
-
t19
*
p[8]
+
t17
*
p[11]
+
t23
*
p[14]
-
t25
*
p[17]
+
t25
*
p[20]
-
t23
*
p[23]
;
dtau[2][2]
=
-t30
*
p[2]
+
t32
*
p[5]
-
t34
*
p[8]
+
t36
*
p[11]
+
t30
*
p[14]
-
t32
*
p[17]
+
t34
*
p[20]
-
t36
*
p[23]
;
}
simple_kernels.py
View file @
3c3de2af
...
...
@@ -19,8 +19,8 @@ __kernel void K2(__global float *y){
__kernel void K3(__global float const *x, __global float const *y, __global float *z){
int i = get_global_id(0);
//
z[i] = x[i] + y[i];
z[i] = get_local_id(0);
z[i] = x[i] + y[i];
//
z[i] = get_local_id(0);
}
"""
...
...
@@ -28,7 +28,7 @@ ctx = cl.create_some_context()
queue
=
cl
.
CommandQueue
(
ctx
)
mf
=
cl
.
mem_flags
taille
=
8
taille
=
2
**
8
x_gpu
=
cl
.
Buffer
(
ctx
,
mf
.
WRITE_ONLY
,
size
=
(
taille
*
np
.
dtype
(
'float32'
).
itemsize
))
y_gpu
=
cl
.
Buffer
(
ctx
,
mf
.
WRITE_ONLY
,
size
=
(
taille
*
np
.
dtype
(
'float32'
).
itemsize
))
...
...
@@ -36,6 +36,8 @@ y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsiz
z_cpu
=
np
.
empty
((
taille
,
),
dtype
=
np
.
float32
)
z_gpu
=
cl
.
Buffer
(
ctx
,
mf
.
WRITE_ONLY
,
z_cpu
.
nbytes
)
verif_cpu
=
np
.
fromfunction
(
lambda
i
:
i
*
i
+
i
,
(
taille
,
),
dtype
=
np
.
float32
)
prg
=
cl
.
Program
(
ctx
,
source
).
build
()
prg
.
K1
(
queue
,
(
taille
,
),
None
,
x_gpu
).
wait
()
...
...
@@ -44,7 +46,7 @@ prg.K3(queue, (taille, ), None, x_gpu, y_gpu, z_gpu).wait()
cl
.
enqueue_copy
(
queue
,
z_cpu
,
z_gpu
)
print
(
z_cpu
)
print
(
z_cpu
-
verif_cpu
)
testtranspose.py
0 → 100644
View file @
3c3de2af
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from
__future__
import
absolute_import
,
print_function
import
pyopencl
as
cl
import
numpy
as
np
with
open
(
'kernels.cl'
,
'r'
)
as
f
:
source
=
f
.
read
()
ctx
=
cl
.
create_some_context
()
prg
=
cl
.
Program
(
ctx
,
source
).
build
()
point
=
[
(
0.0
,
0.0
,
0.0
),
(
1.0
,
0.0
,
0.0
),
(
1.0
,
1.0
,
0.0
),
(
0.0
,
1.0
,
0.0
),
(
0.0
,
0.0
,
1.0
),
(
1.0
,
0.0
,
1.0
),
(
1.0
,
1.0
,
1.0
),
(
0.0
,
1.0
,
1.0
)]
element
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
face2node
=
[[
0
,
1
,
5
,
4
],
[
1
,
2
,
6
,
5
],
[
2
,
3
,
7
,
6
],
[
0
,
4
,
7
,
3
],
[
5
,
6
,
7
,
4
],
[
0
,
3
,
2
,
1
]]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment