lbm_cl.py 4.72 KB
Newer Older
ph's avatar
ph committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#!/usr/bin/env python3
# -*- coding: utf-8 -*-


# resolution of a transport equation by the finite volume method
# on regular grid

# regular python implementation compared to a pyopencl version


from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt

import time


##################" definition of default values
# number of conservative variables
ph's avatar
ph committed
21
_m = 9
ph's avatar
ph committed
22 23 24 25

# number of kinetic variables
_n = 4 * _m

ph's avatar
ph committed
26
_ivplot = 0
ph's avatar
ph committed
27 28

# grid size
ph's avatar
ph committed
29 30
_nx = 256
_ny = 256
ph's avatar
up  
ph committed
31 32 33

Lx = 1
Ly = 1
ph's avatar
ph committed
34

ph's avatar
ph committed
35

ph's avatar
ph committed
36 37 38 39 40 41 42
_dx = Lx / _nx
_dy = Ly / _ny

# transport velocity
vel = np.array([1., 1.])

# lattice speed
ph's avatar
ph committed
43
_vmax = 20.
ph's avatar
ph committed
44 45 46


# time stepping
ph's avatar
ph committed
47
_Tmax = 10. / _vmax
ph's avatar
ph committed
48
_Tmax = 2.
ph's avatar
ph committed
49 50 51 52 53 54 55 56 57 58
cfl = 1

_dt = cfl * _dx / _vmax
############# end of default values



def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
              Tmax = _Tmax, vmax = _vmax,
              dx = _dx, dy = _dy,
ph's avatar
up  
ph committed
59
              dt = _dt,
Matthieu Boileau's avatar
Matthieu Boileau committed
60 61
              animate = False,
              interactive=True):
ph's avatar
ph committed
62

ph's avatar
ph committed
63 64 65

    ff = "_F"
    
ph's avatar
ph committed
66 67 68 69
    # load and adjust  C program
    source = open("lbm_kernels.cl", "r").read()
    source = source.replace("_nx_", "("+str(nx)+")")
    source = source.replace("_ny_", "("+str(ny)+")")
ph's avatar
ph committed
70 71 72
    source = source.replace("_dx_", "("+str(dx)+ ff + ")")
    source = source.replace("_dy_", "("+str(dy)+ ff + ")")
    source = source.replace("_dt_", "("+str(dt)+ ff + ")")
ph's avatar
ph committed
73 74
    source = source.replace("_m_", "("+str(m)+")")
    source = source.replace("_n_", "("+str(n)+")")    
ph's avatar
ph committed
75 76 77
    source = source.replace("_vx_", "("+str(vel[0])+ ff + ")")
    source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
    source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
ph's avatar
ph committed
78

ph's avatar
ph committed
79
    source = source.replace("_F", "")
Matthieu Boileau's avatar
Matthieu Boileau committed
80
    #print(source)
ph's avatar
ph committed
81 82 83 84 85 86 87 88 89 90 91 92 93

    #exit(0)

    # OpenCL init
    ctx = cl.create_some_context()
    mf = cl.mem_flags

    # compile OpenCL C program
    prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing  \
                                                  -cl-fast-relaxed-math")

    # create OpenCL buffers
    fn_gpu   = cl.Buffer(ctx, mf.READ_WRITE,
ph's avatar
ph committed
94
                         size=(4 * m * nx * ny * np.dtype('float64').itemsize))
ph's avatar
ph committed
95
    fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
ph's avatar
ph committed
96
                         size=(4 * m * nx * ny * np.dtype('float64').itemsize))
ph's avatar
ph committed
97 98 99 100 101 102 103 104 105 106

    # create a queue (for submitting opencl operations)
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)


    # init data
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
    event.wait()

    # number of animation frames
ph's avatar
ph committed
107
    nbplots = 100
ph's avatar
ph committed
108 109 110 111 112 113 114
    itermax = int(np.floor(Tmax / dt))
    iterplot = int(itermax / nbplots)

    # time loop
    t = 0
    iter = 0
    elapsed = 0.;
ph's avatar
ph committed
115
    fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64)
ph's avatar
ph committed
116

Matthieu Boileau's avatar
Matthieu Boileau committed
117 118 119 120 121
    if animate:
        fig = plt.gcf()
        fig.show()
        fig.canvas.draw()

ph's avatar
ph committed
122 123 124 125 126 127 128 129 130 131 132
    print("start OpenCL computations...")
    while t < Tmax:
        t = t + dt
        iter = iter + 1
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
        event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
        event.wait()
        elapsed += 1e-9 * (event.profile.end - event.profile.start)
        # exchange buffer references for avoiding a copy
        fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu
Matthieu Boileau's avatar
Matthieu Boileau committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146
        title = "iter = {}, t = {:f}, elapsed (s) = {:f}".format(iter, t, elapsed)
        if animate:
            if iter % iterplot == 0:
                cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
                wplot = np.reshape(fn_cpu, (4, m, nx, ny))
                plt.clf()
                #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
                fig.suptitle(title)
                plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0))
                plt.gca().invert_yaxis()
                plt.colorbar()
                fig.canvas.draw()
        else:
            print(title, end='\r')
ph's avatar
ph committed
147 148 149 150

    # copy OpenCL data to CPU and return the results
    cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()

ph's avatar
up  
ph committed
151
    wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny))
ph's avatar
ph committed
152 153
    return wplot_gpu

Matthieu Boileau's avatar
Matthieu Boileau committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178

if __name__ == '__main__':
    # gpu solve
    wplot_gpu = solve_ocl(animate=False)
    #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
    plt.clf()
    #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
    plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
    plt.gca().invert_yaxis()
    plt.colorbar()
    plt.show()

    # for iv in range(4):
    #     plt.imshow(wplot_gpu[iv,:,:])
    #     plt.gca().invert_yaxis()
    #     plt.colorbar()
    #     plt.show()


    # check difference
    # plt.clf()
    # plt.imshow(wplot_cpu-wplot_gpu)
    # plt.gca().invert_yaxis()
    # plt.colorbar()
    # plt.show()
ph's avatar
ph committed
179 180