Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
David Coulette
patapon
Commits
e3962aa7
Commit
e3962aa7
authored
Dec 05, 2017
by
ph
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
orsag tang ok
parent
1dccb9bd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
112 additions
and
104 deletions
+112
-104
lbm_cl.py
lbm_cl.py
+21
-19
lbm_kernels.cl
lbm_kernels.cl
+91
-85
No files found.
lbm_cl.py
View file @
e3962aa7
...
...
@@ -18,23 +18,21 @@ import time
##################" definition of default values
# number of conservative variables
_m
=
2
_m
=
9
# number of kinetic variables
_n
=
4
*
_m
_ivplot
=
1
_ivplot
=
0
# grid size
_nx
=
64
_ny
=
64
Lx
=
2
*
3.14159265358979323846264338328
Ly
=
2
*
3.14159265358979323846264338328
_nx
=
256
_ny
=
256
Lx
=
1
Ly
=
1
_dx
=
Lx
/
_nx
_dy
=
Ly
/
_ny
...
...
@@ -42,12 +40,12 @@ _dy = Ly / _ny
vel
=
np
.
array
([
1.
,
1.
])
# lattice speed
_vmax
=
7
.
_vmax
=
20
.
# time stepping
_Tmax
=
10.
/
_vmax
#
_Tmax =
0
.
_Tmax
=
2
.
cfl
=
1
_dt
=
cfl
*
_dx
/
_vmax
...
...
@@ -61,20 +59,24 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
dt
=
_dt
,
animate
=
False
):
ff
=
"_F"
# load and adjust C program
source
=
open
(
"lbm_kernels.cl"
,
"r"
).
read
()
source
=
source
.
replace
(
"_nx_"
,
"("
+
str
(
nx
)
+
")"
)
source
=
source
.
replace
(
"_ny_"
,
"("
+
str
(
ny
)
+
")"
)
source
=
source
.
replace
(
"_dx_"
,
"("
+
str
(
dx
)
+
"
f
)"
)
source
=
source
.
replace
(
"_dy_"
,
"("
+
str
(
dy
)
+
"
f
)"
)
source
=
source
.
replace
(
"_dt_"
,
"("
+
str
(
dt
)
+
"
f
)"
)
source
=
source
.
replace
(
"_dx_"
,
"("
+
str
(
dx
)
+
ff
+
")"
)
source
=
source
.
replace
(
"_dy_"
,
"("
+
str
(
dy
)
+
ff
+
")"
)
source
=
source
.
replace
(
"_dt_"
,
"("
+
str
(
dt
)
+
ff
+
")"
)
source
=
source
.
replace
(
"_m_"
,
"("
+
str
(
m
)
+
")"
)
source
=
source
.
replace
(
"_n_"
,
"("
+
str
(
n
)
+
")"
)
source
=
source
.
replace
(
"_vx_"
,
"("
+
str
(
vel
[
0
])
+
"
f
)"
)
source
=
source
.
replace
(
"_vy_"
,
"("
+
str
(
vel
[
1
])
+
"
f
)"
)
source
=
source
.
replace
(
"_lambda_"
,
"("
+
str
(
vmax
)
+
"
f
)"
)
source
=
source
.
replace
(
"_vx_"
,
"("
+
str
(
vel
[
0
])
+
ff
+
")"
)
source
=
source
.
replace
(
"_vy_"
,
"("
+
str
(
vel
[
1
])
+
ff
+
")"
)
source
=
source
.
replace
(
"_lambda_"
,
"("
+
str
(
vmax
)
+
ff
+
")"
)
#print(source)
source
=
source
.
replace
(
"_F"
,
""
)
print
(
source
)
#exit(0)
...
...
@@ -88,9 +90,9 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
# create OpenCL buffers
fn_gpu
=
cl
.
Buffer
(
ctx
,
mf
.
READ_WRITE
,
size
=
(
4
*
m
*
nx
*
ny
*
np
.
dtype
(
'float
32
'
).
itemsize
))
size
=
(
4
*
m
*
nx
*
ny
*
np
.
dtype
(
'float
64
'
).
itemsize
))
fnp1_gpu
=
cl
.
Buffer
(
ctx
,
mf
.
READ_WRITE
,
size
=
(
4
*
m
*
nx
*
ny
*
np
.
dtype
(
'float
32
'
).
itemsize
))
size
=
(
4
*
m
*
nx
*
ny
*
np
.
dtype
(
'float
64
'
).
itemsize
))
# create a queue (for submitting opencl operations)
queue
=
cl
.
CommandQueue
(
ctx
,
properties
=
cl
.
command_queue_properties
.
PROFILING_ENABLE
)
...
...
@@ -109,7 +111,7 @@ def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
t
=
0
iter
=
0
elapsed
=
0.
;
fn_cpu
=
np
.
empty
((
4
*
m
*
nx
*
ny
,
),
dtype
=
np
.
float
32
)
fn_cpu
=
np
.
empty
((
4
*
m
*
nx
*
ny
,
),
dtype
=
np
.
float
64
)
print
(
"start OpenCL computations..."
)
while
t
<
Tmax
:
...
...
lbm_kernels.cl
View file @
e3962aa7
#
pragma
OPENCL
EXTENSION
cl_khr_fp64
:
enable
#
define
real
double
#
define
_NX
_nx_
#
define
_NY
_ny_
#
define
_DX
_dx_
...
...
@@ -10,65 +15,66 @@
#
define
_LAMBDA
_lambda_
#
define
M_PI
(
3.14159265358979323846264338328_F
)
#
define
_VOL
(
_DX
*
_DY
)
__constant
int
dir[4][2]
=
{
{-1,
0},
{1,
0},
{0,
-1},
{0,
1}}
;
__constant
float
ds[4]
=
{
_DY,
_DY,
_DX,
_DX
}
;
__constant
real
ds[4]
=
{
_DY,
_DY,
_DX,
_DX
}
;
//
physical
flux
of
the
hyperbolic
system
void
flux_phy
(
const
float
*w,
const
float
*
vnorm,
float
*
flux
)
{
float
vdotn
=
_VX
*
vnorm[0]
+
_VY
*
vnorm[1]
;
int
signe
=
1
;
for
(
int
iv
=
0
; iv < _M; iv++){
flux[iv]
=
vdotn
*
w[iv]
*
signe
;
signe
=
-signe
;
}
}
/*
void
flux_phy
(
const
real
*w,
const
real
*
vnorm,
real
*
flux
)
{
*/
/*
real
vdotn
=
_VX
*
vnorm[0]
+
_VY
*
vnorm[1]
;
*/
/*
int
signe
=
1
;
*/
/*
for
(
int
iv
=
0
; iv < _M; iv++){
*/
/*
flux[iv]
=
vdotn
*
w[iv]
*
signe
;
*/
/*
signe
=
-signe
;
*/
/*
}
*/
/*
}
*/
/*
void
flux_phy
(
const
float
*W,
const
float
*vn,
float
*flux
)
{
*/
void
flux_phy
(
const
real
*W,
const
real
*vn,
real
*flux
)
{
/*
float
gam
=
1.6666666666
f
; */
real
gam
=
1.6666666666
_F
;
/*
float
un
=
W[1]
/W[0]*
vn[0]
+
W[3]
/W[0]*
vn[1]
+
W[4]
/W[0]*vn[2]
; */
/*
float
bn
=
W[7]
*
vn[0]
+
W[5]
*
vn[1]
+
W[6]
*
vn[2]
;
*/
real
un
=
(
W[1]
*
vn[0]
+
W[3]
*
vn[1]
+
W[4]
*
vn[2]
)
/
W[0]
;
real
bn
=
W[7]
*
vn[0]
+
W[5]
*
vn[1]
+
W[6]
*
vn[2]
;
/*
float
p
=
(
gam-1
)
*
(
W[2]
-
W[0]*
(
W[1]/W[0]*W[1]/W[0]
*/
/*
+
W[3]/W[0]*W[3]/W[0]
*/
/*
+
W[4]
/W[0]*W[4]/W[0]
)
/2
*/
/*
-
(
W[7]
*
W[7]
+
W[5]
*
W[5]
+
W[6]
*
W[6]
)
/2
)
;
*/
real
p
=
(
gam
-
1
)
*
(
W[2]
-
(
W[1]
*
W[1]
+
W[3]
*
W[3]
+
W[4]
*
W[4]
)
/
2
/
W[0]
-
(
W[7]
*
W[7]
+
W[5]
*
W[5]
+
W[6]
*
W[6]
)
/2
)
;
/*
flux[0]
=
W[0]
*un
; */
/*
flux[1]
=
W[0]*
un*W[1]
/W[0]
+
*/
/*
(
p
+
(
W[7]
*
W[7]
+
W[5]
*
W[5]
+
W[6]
*
W[6]
)
/2
)
*/
/*
*vn[0]
-
bn
*
W[7]
;
*/
/*
flux[2]
=
(
W[2]
+
p
+
(
W[7]*W[7]
+
W[5]*W[5]
+
W[6]*W[6]
)
/2
)
*un
*/
/*
-
(
W[7]*W[1]
/W[0]
+
W[5]*W[3]
/W[0]
+
W[6]*W[4]/W[0]
)
*bn
; */
/*
flux[3]
=
W[0]*
un*W[3]
/W[0]
+
(
p
+
(
W[7]*W[7]
+
W[5]*W[5]
*/
/*
+
W[6]*W[6]
)
/2
)
*vn[1]
-
bn*W[5]
;
*/
/*
flux[4]
=
W[0]*
un*W[4]
/W[0]
+
(
p
+
(
W[7]*W[7]
+
W[5]*W[5]
*/
/*
+
W[6]*W[6]
)
/2
)
*vn[2]
-
bn*W[6]
;
*/
flux[0]
=
W[0]
*
un
;
flux[1]
=
un*W[1]
+
(
p
+
(
W[7]
*
W[7]
+
W[5]
*
W[5]
+
W[6]
*
W[6]
)
/
2
)
*
vn[0]
-
bn
*
W[7]
;
flux[2]
=
(
W[2]
+
p
+
(
W[7]*W[7]
+
W[5]*W[5]
+
W[6]*W[6]
)
/2
)
*un
-
(
W[7]*W[1]
+
W[5]*W[3]
+
W[6]*W[4]
)
*bn
/W[0]
;
flux[3]
=
un*W[3]
+
(
p
+
(
W[7]*W[7]
+
W[5]*W[5]
+
W[6]*W[6]
)
/2
)
*vn[1]
-
bn*W[5]
;
flux[4]
=
un*W[4]
+
(
p
+
(
W[7]*W[7]
+
W[5]*W[5]
+
W[6]*W[6]
)
/2
)
*vn[2]
-
bn*W[6]
;
/*
flux[5]
=
-bn*W[3]/W[0]
+
un*W[5]
+
W[8]*vn[1]
;
*/
/*
flux[6]
=
-bn*W[4]/W[0]
+
un*W[6]
+
W[8]*vn[2]
;
*/
/*
flux[7]
=
-bn*W[1]/W[0]
+
un*W[7]
+
W[8]*vn[0]
;
*/
flux[5]
=
-bn*W[3]/W[0]
+
un*W[5]
+
W[8]*vn[1]
;
flux[6]
=
-bn*W[4]/W[0]
+
un*W[6]
+
W[8]*vn[2]
;
flux[7]
=
-bn*W[1]/W[0]
+
un*W[7]
+
W[8]*vn[0]
;
/*
flux[8]
=
6*6*bn
;
*/
/*
}
*/
flux[8]
=
6*6*bn
;
}
//
equilibrium
"maxwellian"
from
macro
data
w
void
w2f
(
const
float
*w,
float
*f
)
{
void
w2f
(
const
real
*w,
real
*f
)
{
for
(
int
d
=
0
; d < 4; d++){
float
flux[_M]
;
float
vnorm[2]
=
{dir[d][0],
dir[d][1]}
;
real
flux[_M]
;
real
vnorm[2]
=
{dir[d][0],
dir[d][1]}
;
flux_phy
(
w,
vnorm,
flux
)
;
for
(
int
iv
=
0
; iv < _M; iv++){
f[d
*
_M
+
iv]
=
w[iv]
/
4
+
flux[iv]
/
2
/
_LAMBDA
;
...
...
@@ -78,7 +84,7 @@ void w2f(const float *w, float *f){
//
macro
data
w
from
micro
data
f
void
f2w
(
const
float
*f,
float
*w
)
{
void
f2w
(
const
real
*f,
real
*w
)
{
for
(
int
iv
=
0
; iv < _M; iv++) w[iv] = 0;
for
(
int
d
=
0
; d < 4; d++){
for
(
int
iv
=
0
; iv < _M; iv++){
...
...
@@ -90,56 +96,56 @@ void f2w(const float *f, float *w){
//
exact
macro
data
w
//
transport
void
exact_sol
(
float
*
xy,
float
t
,
float
*
w
)
{
int
signe
=
1
;
for
(
int
iv
=
0
; iv < _M; iv++){
float
x
=
xy[0]
-
t
*
_VX
*
signe
-
0.5
f
;
float
y
=
xy[1]
-
t
*
_VY
*
signe
-
0.5
f
;
float
d2
=
x
*
x
+
y
*y
;
w[iv]
=
exp
(
-30*d2
)
;
signe
=
-signe
;
}
}
/*
void
exact_sol
(
real
*
xy,
real
t
,
real
*
w
)
{
*/
/*
int
signe
=
1
;
*/
/*
for
(
int
iv
=
0
; iv < _M; iv++){
*/
/*
real
x
=
xy[0]
-
t
*
_VX
*
signe
-
0.5
_F
; */
/*
real
y
=
xy[1]
-
t
*
_VY
*
signe
-
0.5
_F
; */
/*
real
d2
=
x
*
x
+
y
*y
;
*/
/*
w[iv]
=
exp
(
-30*d2
)
;
*/
/*
signe
=
-signe
;
*/
/*
}
*/
/*
}
*/
//
mhd
/*
void
conservatives
(
float
*y,
float
*w
)
{
*/
/*
float
gam
=
1.6666666666
f
; */
/*
w[0]
=
y[0]
;
*/
/*
w[1]
=
y[0]*y[1]
;
*
/
/*
w[2]
=
y[2]/
(
gam-1
)
+
y[0]*
(
y[1]*y[1]+y[3]*y[3]+y[4]*y[4]
)
/2
*/
/*
+
(
y[7]*y[7]+y[5]*y[5]+y[6]*y[6]
)
/2
;
*
/
/*
w[3]
=
y[0]*y[3]
;
*/
/*
w[4]
=
y[0]*y[4]
;
*
/
/*
w[5]
=
y[5]
;
*/
/*
w[6]
=
y[6]
;
*/
/*
w[7]
=
y[7]
; // Bx
*/
/*
w[8]
=
y[8]
; // psi
*/
/*
}
*/
void
conservatives
(
real
*y,
real
*w
)
{
real
gam
=
1.6666666666
_F
;
w[0]
=
y[0]
;
// rho
w[1]
=
y[0]*y[1]
; /
/ rho u
w[2]
=
y[2]/
(
gam-1
)
+
y[0]*
(
y[1]*y[1]+y[3]*y[3]+y[4]*y[4]
)
/2
+
(
y[7]*y[7]+y[5]*y[5]+y[6]*y[6]
)
/2
; /
/ rho E
w[3]
=
y[0]*y[3]
;
// rho v
w[4]
=
y[0]*y[4]
; /
/ rho w
w[5]
=
y[5]
;
// By
w[6]
=
y[6]
;
// Bz
w[7]
=
y[7]
; // Bx
w[8]
=
y[8]
; // psi
}
/*
void
exact_sol
(
float*
x,
float
t
,
float
*
w
)
{
*/
/*
float
gam
=
1.6666666666
f
; */
/*
float
yL[9]
;
*/
void
exact_sol
(
real*
x,
real
t
,
real
*
w
)
{
real
gam
=
1.6666666666
_F
;
real
yL[9]
;
/*
yL[0]
=
gam*gam
;
*/
/*
yL[1]
=
-sin
(
x[1]
)
;
*/
/*
yL[2]
=
gam
;
*/
/*
yL[3]
=
sin
(
x[0]
)
;
*/
/*
yL[4]
=
0
;
*/
/*
yL[5]
=
sin
(
2*
(
x[0]
)
)
;
*/
/*
yL[6]
=
0
;
*/
/*
yL[7]
=
-sin
(
x[1]
)
; */
/*
yL[8]
=
0
;
*/
yL[0]
=
gam*gam
;
//rho
yL[1]
=
-sin
(
2
*
M_PI
*
x[1]
)
;
// u
yL[2]
=
gam
;
// p
yL[3]
=
sin
(
2
*
M_PI
*
x[0]
)
;
// v
yL[4]
=
0
;
// w
yL[5]
=
sin
(
4
*
M_PI
*
x[0]
)
;
// By
yL[6]
=
0
;
// Bz
yL[7]
=
-sin
(
2
*
M_PI
*
x[1]
)
; // Bx
yL[8]
=
0
;
// psi
/*
conservatives
(
yL,
w
)
;
*/
conservatives
(
yL,
w
)
;
/*
}
*/
}
//
initial
condition
on
the
macro
data
__kernel
void
init_sol
(
__global
float
*fn
)
{
__kernel
void
init_sol
(
__global
real
*fn
)
{
int
id
=
get_global_id
(
0
)
;
...
...
@@ -148,14 +154,14 @@ __kernel void init_sol(__global float *fn){
int
ngrid
=
_NX
*
_NY
;
float
wnow[_M]
;
real
wnow[_M]
;
float
t
=
0
;
float
xy[2]
=
{i
*
_DX
+
_DX
/
2
,
j
*
_DY
+
_DY
/
2}
;
real
t
=
0
;
real
xy[2]
=
{i
*
_DX
+
_DX
/
2
,
j
*
_DY
+
_DY
/
2}
;
exact_sol
(
xy,
t
,
wnow
)
;
float
fnow[_N]
;
real
fnow[_N]
;
w2f
(
wnow,
fnow
)
;
//printf
(
"x=%f, y=%f \n"
,
xy[0],xy[1]
)
;
...
...
@@ -171,7 +177,7 @@ __kernel void init_sol(__global float *fn){
//
one
time
step
of
the
LBM
scheme
__kernel
void
time_step
(
__global
float
*fn,
__global
float
*fnp1
)
{
__kernel
void
time_step
(
__global
real
*fn,
__global
real
*fnp1
)
{
int
id
=
get_global_id
(
0
)
;
...
...
@@ -181,7 +187,7 @@ __kernel void time_step(__global float *fn, __global float *fnp1){
int
ngrid
=
_NX
*
_NY
;
float
fnow[_N]
;
real
fnow[_N]
;
//
periodic
shift
in
each
direction
for
(
int
d
=
0
; d < 4; d++){
...
...
@@ -195,16 +201,16 @@ __kernel void time_step(__global float *fn, __global float *fnp1){
}
}
float
wnow[_M]
;
real
wnow[_M]
;
f2w
(
fnow,
wnow
)
;
float
fnext[_N]
;
real
fnext[_N]
;
//
first
order
relaxation
w2f
(
wnow,
fnext
)
;
//
second
order
relaxation
for
(
int
ik
=
0
; ik < _N; ik++)
fnext[ik]
=
2
*
fnext[ik]
-
fnow[ik]
;
fnext[ik]
=
1.9_F
*
fnext[ik]
-
0.9_F
*
fnow[ik]
;
//
save
for
(
int
ik
=
0
; ik < _N; ik++){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment