lbm_cl.py 4.35 KB
Newer Older
ph's avatar
ph committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#!/usr/bin/env python3
# -*- coding: utf-8 -*-


# resolution of a transport equation by the finite volume method
# on regular grid

# regular python implementation compared to a pyopencl version


from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt

import time


##################" definition of default values
# number of conservative variables
ph's avatar
up  
ph committed
21
_m = 2
ph's avatar
ph committed
22 23 24 25

# number of kinetic variables
_n = 4 * _m

ph's avatar
up  
ph committed
26
_ivplot = 1
ph's avatar
ph committed
27 28

# grid size
ph's avatar
up  
ph committed
29 30
_nx = 64
_ny = 64
ph's avatar
ph committed
31

ph's avatar
up  
ph committed
32 33 34 35 36
Lx = 2 * 3.14159265358979323846264338328
Ly = 2 * 3.14159265358979323846264338328

Lx = 1
Ly = 1
ph's avatar
ph committed
37 38 39 40 41 42 43 44

_dx = Lx / _nx
_dy = Ly / _ny

# transport velocity
vel = np.array([1., 1.])

# lattice speed
ph's avatar
up  
ph committed
45
_vmax = 7.
ph's avatar
ph committed
46 47 48


# time stepping
ph's avatar
ph committed
49 50
_Tmax = 10. / _vmax
#_Tmax = 0.
ph's avatar
ph committed
51 52 53 54 55 56 57 58 59 60
cfl = 1

_dt = cfl * _dx / _vmax
############# end of default values



def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
              Tmax = _Tmax, vmax = _vmax,
              dx = _dx, dy = _dy,
ph's avatar
up  
ph committed
61
              dt = _dt,
ph's avatar
ph committed
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
              animate = False):

    # load and adjust  C program
    source = open("lbm_kernels.cl", "r").read()
    source = source.replace("_nx_", "("+str(nx)+")")
    source = source.replace("_ny_", "("+str(ny)+")")
    source = source.replace("_dx_", "("+str(dx)+"f)")
    source = source.replace("_dy_", "("+str(dy)+"f)")
    source = source.replace("_dt_", "("+str(dt)+"f)")
    source = source.replace("_m_", "("+str(m)+")")
    source = source.replace("_n_", "("+str(n)+")")    
    source = source.replace("_vx_", "("+str(vel[0])+"f)")
    source = source.replace("_vy_", "("+str(vel[1])+"f)")
    source = source.replace("_lambda_", "("+str(vmax)+"f)")

    #print(source)

    #exit(0)

    # OpenCL init
    ctx = cl.create_some_context()
    mf = cl.mem_flags

    # compile OpenCL C program
    prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing  \
                                                  -cl-fast-relaxed-math")

    # create OpenCL buffers
    fn_gpu   = cl.Buffer(ctx, mf.READ_WRITE,
                         size=(4 * m * nx * ny * np.dtype('float32').itemsize))
    fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
                         size=(4 * m * nx * ny * np.dtype('float32').itemsize))

    # create a queue (for submitting opencl operations)
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)


    # init data
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
    event.wait()

    # number of animation frames
ph's avatar
ph committed
104
    nbplots = 100
ph's avatar
ph committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    itermax = int(np.floor(Tmax / dt))
    iterplot = int(itermax / nbplots)

    # time loop
    t = 0
    iter = 0
    elapsed = 0.;
    fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32)

    print("start OpenCL computations...")
    while t < Tmax:
        t = t + dt
        iter = iter + 1
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
        event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
        event.wait()
        elapsed += 1e-9 * (event.profile.end - event.profile.start)
        # exchange buffer references for avoiding a copy
        fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu
        print("iter=",iter, " t=",t, "elapsed (s)=",elapsed)
        if iter % iterplot == 0 and animate:
            cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
ph's avatar
up  
ph committed
128
            wplot = np.reshape(fn_cpu, (4, m, nx, ny))
ph's avatar
ph committed
129
            plt.clf()
ph's avatar
ph committed
130
            #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
ph's avatar
up  
ph committed
131
            plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0))
ph's avatar
ph committed
132 133 134 135 136 137 138
            plt.gca().invert_yaxis()
            plt.colorbar()
            plt.pause(0.01)

    # copy OpenCL data to CPU and return the results
    cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()

ph's avatar
up  
ph committed
139
    wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny))
ph's avatar
ph committed
140 141 142
    return wplot_gpu

# gpu solve
ph's avatar
ph committed
143
wplot_gpu = solve_ocl(animate = True)
ph's avatar
up  
ph committed
144
#print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
ph's avatar
ph committed
145
plt.clf()
ph's avatar
up  
ph committed
146 147
#plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
ph's avatar
ph committed
148 149 150 151
plt.gca().invert_yaxis()
plt.colorbar()
plt.show()

ph's avatar
ph committed
152 153 154 155 156 157
# for iv in range(4):
#     plt.imshow(wplot_gpu[iv,:,:])
#     plt.gca().invert_yaxis()
#     plt.colorbar()
#     plt.show()

ph's avatar
ph committed
158 159 160 161 162 163 164 165 166

# check difference
# plt.clf()
# plt.imshow(wplot_cpu-wplot_gpu)
# plt.gca().invert_yaxis()
# plt.colorbar()
# plt.show()