lbm_cl.py 4.35 KB
Newer Older
ph's avatar
ph committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#!/usr/bin/env python3
# -*- coding: utf-8 -*-


# resolution of a transport equation by the finite volume method
# on regular grid

# regular python implementation compared to a pyopencl version


from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt

import time


##################" definition of default values
# number of conservative variables
ph's avatar
ph committed
21
_m = 9
ph's avatar
ph committed
22 23 24 25

# number of kinetic variables
_n = 4 * _m

ph's avatar
ph committed
26
_ivplot = 0
ph's avatar
ph committed
27 28

# grid size
ph's avatar
ph committed
29 30
_nx = 256
_ny = 256
ph's avatar
up  
ph committed
31 32 33

Lx = 1
Ly = 1
ph's avatar
ph committed
34

ph's avatar
ph committed
35

ph's avatar
ph committed
36 37 38 39 40 41 42
_dx = Lx / _nx
_dy = Ly / _ny

# transport velocity
vel = np.array([1., 1.])

# lattice speed
ph's avatar
ph committed
43
_vmax = 20.
ph's avatar
ph committed
44 45 46


# time stepping
ph's avatar
ph committed
47
_Tmax = 10. / _vmax
ph's avatar
ph committed
48
_Tmax = 2.
ph's avatar
ph committed
49 50 51 52 53 54 55 56 57 58
cfl = 1

_dt = cfl * _dx / _vmax
############# end of default values



def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
              Tmax = _Tmax, vmax = _vmax,
              dx = _dx, dy = _dy,
ph's avatar
up  
ph committed
59
              dt = _dt,
ph's avatar
ph committed
60 61
              animate = False):

ph's avatar
ph committed
62 63 64

    ff = "_F"
    
ph's avatar
ph committed
65 66 67 68
    # load and adjust  C program
    source = open("lbm_kernels.cl", "r").read()
    source = source.replace("_nx_", "("+str(nx)+")")
    source = source.replace("_ny_", "("+str(ny)+")")
ph's avatar
ph committed
69 70 71
    source = source.replace("_dx_", "("+str(dx)+ ff + ")")
    source = source.replace("_dy_", "("+str(dy)+ ff + ")")
    source = source.replace("_dt_", "("+str(dt)+ ff + ")")
ph's avatar
ph committed
72 73
    source = source.replace("_m_", "("+str(m)+")")
    source = source.replace("_n_", "("+str(n)+")")    
ph's avatar
ph committed
74 75 76
    source = source.replace("_vx_", "("+str(vel[0])+ ff + ")")
    source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
    source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
ph's avatar
ph committed
77

ph's avatar
ph committed
78 79
    source = source.replace("_F", "")
    print(source)
ph's avatar
ph committed
80 81 82 83 84 85 86 87 88 89 90 91 92

    #exit(0)

    # OpenCL init
    ctx = cl.create_some_context()
    mf = cl.mem_flags

    # compile OpenCL C program
    prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing  \
                                                  -cl-fast-relaxed-math")

    # create OpenCL buffers
    fn_gpu   = cl.Buffer(ctx, mf.READ_WRITE,
ph's avatar
ph committed
93
                         size=(4 * m * nx * ny * np.dtype('float64').itemsize))
ph's avatar
ph committed
94
    fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
ph's avatar
ph committed
95
                         size=(4 * m * nx * ny * np.dtype('float64').itemsize))
ph's avatar
ph committed
96 97 98 99 100 101 102 103 104 105

    # create a queue (for submitting opencl operations)
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)


    # init data
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
    event.wait()

    # number of animation frames
ph's avatar
ph committed
106
    nbplots = 100
ph's avatar
ph committed
107 108 109 110 111 112 113
    itermax = int(np.floor(Tmax / dt))
    iterplot = int(itermax / nbplots)

    # time loop
    t = 0
    iter = 0
    elapsed = 0.;
ph's avatar
ph committed
114
    fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64)
ph's avatar
ph committed
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129

    print("start OpenCL computations...")
    while t < Tmax:
        t = t + dt
        iter = iter + 1
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
        event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
        event.wait()
        elapsed += 1e-9 * (event.profile.end - event.profile.start)
        # exchange buffer references for avoiding a copy
        fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu
        print("iter=",iter, " t=",t, "elapsed (s)=",elapsed)
        if iter % iterplot == 0 and animate:
            cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
ph's avatar
up  
ph committed
130
            wplot = np.reshape(fn_cpu, (4, m, nx, ny))
ph's avatar
ph committed
131
            plt.clf()
ph's avatar
ph committed
132
            #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
ph's avatar
up  
ph committed
133
            plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0))
ph's avatar
ph committed
134 135 136 137 138 139 140
            plt.gca().invert_yaxis()
            plt.colorbar()
            plt.pause(0.01)

    # copy OpenCL data to CPU and return the results
    cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()

ph's avatar
up  
ph committed
141
    wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny))
ph's avatar
ph committed
142 143 144
    return wplot_gpu

# gpu solve
ph's avatar
ph committed
145
wplot_gpu = solve_ocl(animate = True)
ph's avatar
up  
ph committed
146
#print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
ph's avatar
ph committed
147
plt.clf()
ph's avatar
up  
ph committed
148 149
#plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
ph's avatar
ph committed
150 151 152 153
plt.gca().invert_yaxis()
plt.colorbar()
plt.show()

ph's avatar
ph committed
154 155 156 157 158 159
# for iv in range(4):
#     plt.imshow(wplot_gpu[iv,:,:])
#     plt.gca().invert_yaxis()
#     plt.colorbar()
#     plt.show()

ph's avatar
ph committed
160 161 162 163 164 165 166 167 168

# check difference
# plt.clf()
# plt.imshow(wplot_cpu-wplot_gpu)
# plt.gca().invert_yaxis()
# plt.colorbar()
# plt.show()