lbm_cl.py 5.02 KB
Newer Older
ph's avatar
ph committed
1 2
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
Matthieu Boileau's avatar
Matthieu Boileau committed
3 4 5
"""
Resolution of a transport equation by the finite volume method
on regular grid
ph's avatar
ph committed
6

Matthieu Boileau's avatar
Matthieu Boileau committed
7 8
regular python implementation compared to a pyopencl version
"""
ph's avatar
ph committed
9 10 11 12 13 14 15 16 17 18 19

from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt

import time


##################" definition of default values
# number of conservative variables
ph's avatar
ph committed
20
_m = 9
ph's avatar
ph committed
21 22 23 24

# number of kinetic variables
_n = 4 * _m

ph's avatar
ph committed
25
_ivplot = 0
ph's avatar
ph committed
26 27

# grid size
28 29
_nx = 1024
_ny = 1024
ph's avatar
up  
ph committed
30

Matthieu Boileau's avatar
Matthieu Boileau committed
31 32
_Lx = 1
_Ly = 1
ph's avatar
ph committed
33 34 35 36

# transport velocity
vel = np.array([1., 1.])

ph's avatar
ph committed
37
_Tmax = 2.
ph's avatar
ph committed
38 39 40 41 42 43

############# end of default values



def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
Matthieu Boileau's avatar
Matthieu Boileau committed
44 45
              Tmax = _Tmax,
              Lx = _Lx, Ly = _Ly,
46
              animate = True,
Matthieu Boileau's avatar
Matthieu Boileau committed
47 48
              interactive=True,
              precision="single"):
ph's avatar
ph committed
49

Matthieu Boileau's avatar
Matthieu Boileau committed
50 51 52 53 54 55 56 57 58 59
    dx = Lx / nx
    dy = Ly / ny

    # lattice speed
    vmax = 20.

    # time stepping
    cfl = 1

    dt = cfl * dx / vmax
ph's avatar
ph committed
60 61 62

    ff = "_F"
    
ph's avatar
ph committed
63 64 65 66
    # load and adjust  C program
    source = open("lbm_kernels.cl", "r").read()
    source = source.replace("_nx_", "("+str(nx)+")")
    source = source.replace("_ny_", "("+str(ny)+")")
ph's avatar
ph committed
67 68 69
    source = source.replace("_dx_", "("+str(dx)+ ff + ")")
    source = source.replace("_dy_", "("+str(dy)+ ff + ")")
    source = source.replace("_dt_", "("+str(dt)+ ff + ")")
ph's avatar
ph committed
70 71
    source = source.replace("_m_", "("+str(m)+")")
    source = source.replace("_n_", "("+str(n)+")")    
ph's avatar
ph committed
72 73 74
    source = source.replace("_vx_", "("+str(vel[0])+ ff + ")")
    source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
    source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
ph's avatar
ph committed
75

Matthieu Boileau's avatar
Matthieu Boileau committed
76 77 78 79 80 81 82 83 84 85 86 87
    
    if precision == "double":
        source = source.replace("_F", "")
        source = source.replace("_real_", "double")
        np_real = 'float64'
        dtype = np.float64
    else:
        print("prec:", precision)
        source = source.replace("_F", "f")
        source = source.replace("_real_", "float")
        np_real = 'float32'
        dtype = np.float32
Matthieu Boileau's avatar
Matthieu Boileau committed
88
    #print(source)
ph's avatar
ph committed
89 90 91 92 93 94 95 96

    #exit(0)

    # OpenCL init
    ctx = cl.create_some_context()
    mf = cl.mem_flags

    # compile OpenCL C program
97
    prg = cl.Program(ctx, source).build(options = "")
ph's avatar
ph committed
98 99 100

    # create OpenCL buffers
    fn_gpu   = cl.Buffer(ctx, mf.READ_WRITE,
Matthieu Boileau's avatar
Matthieu Boileau committed
101
                         size=(4 * m * nx * ny * np.dtype(np_real).itemsize))
ph's avatar
ph committed
102
    fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
Matthieu Boileau's avatar
Matthieu Boileau committed
103
                         size=(4 * m * nx * ny * np.dtype(np_real).itemsize))
ph's avatar
ph committed
104 105 106 107 108 109 110 111 112

    # create a queue (for submitting opencl operations)
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)


    # init data
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
    event.wait()

113 114 115
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fnp1_gpu)
    event.wait()

ph's avatar
ph committed
116
    # number of animation frames
ph's avatar
ph committed
117
    nbplots = 100
ph's avatar
ph committed
118 119 120 121 122 123 124
    itermax = int(np.floor(Tmax / dt))
    iterplot = int(itermax / nbplots)

    # time loop
    t = 0
    iter = 0
    elapsed = 0.;
Matthieu Boileau's avatar
Matthieu Boileau committed
125
    fn_cpu = np.empty((4 * m * nx * ny, ), dtype = dtype)
ph's avatar
ph committed
126

Matthieu Boileau's avatar
Matthieu Boileau committed
127 128 129 130 131
    if animate:
        fig = plt.gcf()
        fig.show()
        fig.canvas.draw()

ph's avatar
ph committed
132 133 134 135 136 137 138 139 140 141
    print("start OpenCL computations...")
    while t < Tmax:
        t = t + dt
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
        event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
        event.wait()
        elapsed += 1e-9 * (event.profile.end - event.profile.start)
        # exchange buffer references for avoiding a copy
        fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu
Matthieu Boileau's avatar
Matthieu Boileau committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155
        title = "iter = {}, t = {:f}, elapsed (s) = {:f}".format(iter, t, elapsed)
        if animate:
            if iter % iterplot == 0:
                cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
                wplot = np.reshape(fn_cpu, (4, m, nx, ny))
                plt.clf()
                #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
                fig.suptitle(title)
                plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0))
                plt.gca().invert_yaxis()
                plt.colorbar()
                fig.canvas.draw()
        else:
            print(title, end='\r')
ph's avatar
ph committed
156

157 158
        iter = iter + 1

ph's avatar
ph committed
159 160 161
    # copy OpenCL data to CPU and return the results
    cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()

ph's avatar
up  
ph committed
162
    wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny))
ph's avatar
ph committed
163 164
    return wplot_gpu

Matthieu Boileau's avatar
Matthieu Boileau committed
165 166 167

if __name__ == '__main__':
    # gpu solve
168
    wplot_gpu = solve_ocl()
Matthieu Boileau's avatar
Matthieu Boileau committed
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
    #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
    plt.clf()
    #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
    plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
    plt.gca().invert_yaxis()
    plt.colorbar()
    plt.show()

    # for iv in range(4):
    #     plt.imshow(wplot_gpu[iv,:,:])
    #     plt.gca().invert_yaxis()
    #     plt.colorbar()
    #     plt.show()


    # check difference
    # plt.clf()
    # plt.imshow(wplot_cpu-wplot_gpu)
    # plt.gca().invert_yaxis()
    # plt.colorbar()
    # plt.show()
ph's avatar
ph committed
190 191