lbm_cl.py 5.04 KB
Newer Older
ph's avatar
ph committed
1 2
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
Matthieu Boileau's avatar
Matthieu Boileau committed
3 4 5
"""
Resolution of a transport equation by the finite volume method
on regular grid
ph's avatar
ph committed
6

Matthieu Boileau's avatar
Matthieu Boileau committed
7 8
regular python implementation compared to a pyopencl version
"""
ph's avatar
ph committed
9 10 11 12 13 14 15 16 17 18 19

from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt

import time


##################" definition of default values
# number of conservative variables
ph's avatar
ph committed
20
_m = 9
ph's avatar
ph committed
21 22 23 24

# number of kinetic variables
_n = 4 * _m

ph's avatar
ph committed
25
_ivplot = 0
ph's avatar
ph committed
26 27

# grid size
ph's avatar
ph committed
28 29
_nx = 256
_ny = 256
ph's avatar
up  
ph committed
30

Matthieu Boileau's avatar
Matthieu Boileau committed
31 32
_Lx = 1
_Ly = 1
ph's avatar
ph committed
33 34 35 36

# transport velocity
vel = np.array([1., 1.])

ph's avatar
ph committed
37
_Tmax = 2.
ph's avatar
ph committed
38 39 40 41 42 43

############# end of default values



def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
Matthieu Boileau's avatar
Matthieu Boileau committed
44 45
              Tmax = _Tmax,
              Lx = _Lx, Ly = _Ly,
Matthieu Boileau's avatar
Matthieu Boileau committed
46
              animate = False,
Matthieu Boileau's avatar
Matthieu Boileau committed
47 48
              interactive=True,
              precision="single"):
ph's avatar
ph committed
49

Matthieu Boileau's avatar
Matthieu Boileau committed
50 51 52 53 54 55 56 57 58 59
    dx = Lx / nx
    dy = Ly / ny

    # lattice speed
    vmax = 20.

    # time stepping
    cfl = 1

    dt = cfl * dx / vmax
ph's avatar
ph committed
60 61 62

    ff = "_F"
    
ph's avatar
ph committed
63 64 65 66
    # load and adjust  C program
    source = open("lbm_kernels.cl", "r").read()
    source = source.replace("_nx_", "("+str(nx)+")")
    source = source.replace("_ny_", "("+str(ny)+")")
ph's avatar
ph committed
67 68 69
    source = source.replace("_dx_", "("+str(dx)+ ff + ")")
    source = source.replace("_dy_", "("+str(dy)+ ff + ")")
    source = source.replace("_dt_", "("+str(dt)+ ff + ")")
ph's avatar
ph committed
70 71
    source = source.replace("_m_", "("+str(m)+")")
    source = source.replace("_n_", "("+str(n)+")")    
ph's avatar
ph committed
72 73 74
    source = source.replace("_vx_", "("+str(vel[0])+ ff + ")")
    source = source.replace("_vy_", "("+str(vel[1])+ ff + ")")
    source = source.replace("_lambda_", "("+str(vmax)+ ff + ")")
ph's avatar
ph committed
75

Matthieu Boileau's avatar
Matthieu Boileau committed
76 77 78 79 80 81 82 83 84 85 86 87
    
    if precision == "double":
        source = source.replace("_F", "")
        source = source.replace("_real_", "double")
        np_real = 'float64'
        dtype = np.float64
    else:
        print("prec:", precision)
        source = source.replace("_F", "f")
        source = source.replace("_real_", "float")
        np_real = 'float32'
        dtype = np.float32
Matthieu Boileau's avatar
Matthieu Boileau committed
88
    #print(source)
ph's avatar
ph committed
89 90 91 92 93 94 95 96 97 98 99 100 101

    #exit(0)

    # OpenCL init
    ctx = cl.create_some_context()
    mf = cl.mem_flags

    # compile OpenCL C program
    prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing  \
                                                  -cl-fast-relaxed-math")

    # create OpenCL buffers
    fn_gpu   = cl.Buffer(ctx, mf.READ_WRITE,
Matthieu Boileau's avatar
Matthieu Boileau committed
102
                         size=(4 * m * nx * ny * np.dtype(np_real).itemsize))
ph's avatar
ph committed
103
    fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
Matthieu Boileau's avatar
Matthieu Boileau committed
104
                         size=(4 * m * nx * ny * np.dtype(np_real).itemsize))
ph's avatar
ph committed
105 106 107 108 109 110 111 112 113 114

    # create a queue (for submitting opencl operations)
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)


    # init data
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
    event.wait()

    # number of animation frames
ph's avatar
ph committed
115
    nbplots = 100
ph's avatar
ph committed
116 117 118 119 120 121 122
    itermax = int(np.floor(Tmax / dt))
    iterplot = int(itermax / nbplots)

    # time loop
    t = 0
    iter = 0
    elapsed = 0.;
Matthieu Boileau's avatar
Matthieu Boileau committed
123
    fn_cpu = np.empty((4 * m * nx * ny, ), dtype = dtype)
ph's avatar
ph committed
124

Matthieu Boileau's avatar
Matthieu Boileau committed
125 126 127 128 129
    if animate:
        fig = plt.gcf()
        fig.show()
        fig.canvas.draw()

ph's avatar
ph committed
130 131 132 133 134 135 136 137 138 139 140
    print("start OpenCL computations...")
    while t < Tmax:
        t = t + dt
        iter = iter + 1
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
        event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
        event.wait()
        elapsed += 1e-9 * (event.profile.end - event.profile.start)
        # exchange buffer references for avoiding a copy
        fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu
Matthieu Boileau's avatar
Matthieu Boileau committed
141 142 143 144 145 146 147 148 149 150 151 152 153 154
        title = "iter = {}, t = {:f}, elapsed (s) = {:f}".format(iter, t, elapsed)
        if animate:
            if iter % iterplot == 0:
                cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
                wplot = np.reshape(fn_cpu, (4, m, nx, ny))
                plt.clf()
                #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1)
                fig.suptitle(title)
                plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0))
                plt.gca().invert_yaxis()
                plt.colorbar()
                fig.canvas.draw()
        else:
            print(title, end='\r')
ph's avatar
ph committed
155 156 157 158

    # copy OpenCL data to CPU and return the results
    cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()

ph's avatar
up  
ph committed
159
    wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny))
ph's avatar
ph committed
160 161
    return wplot_gpu

Matthieu Boileau's avatar
Matthieu Boileau committed
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

if __name__ == '__main__':
    # gpu solve
    wplot_gpu = solve_ocl(animate=False)
    #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
    plt.clf()
    #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1)
    plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0))
    plt.gca().invert_yaxis()
    plt.colorbar()
    plt.show()

    # for iv in range(4):
    #     plt.imshow(wplot_gpu[iv,:,:])
    #     plt.gca().invert_yaxis()
    #     plt.colorbar()
    #     plt.show()


    # check difference
    # plt.clf()
    # plt.imshow(wplot_cpu-wplot_gpu)
    # plt.gca().invert_yaxis()
    # plt.colorbar()
    # plt.show()
ph's avatar
ph committed
187 188