lbm_cl.py 4.04 KB
Newer Older
ph's avatar
ph committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
#!/usr/bin/env python3
# -*- coding: utf-8 -*-


# resolution of a transport equation by the finite volume method
# on regular grid

# regular python implementation compared to a pyopencl version


from __future__ import absolute_import, print_function
import pyopencl as cl
import numpy as np
import matplotlib.pyplot as plt

import time


##################" definition of default values
# number of conservative variables
_m = 1

# number of kinetic variables
_n = 4 * _m


# grid size
_nx = 64
_ny = 64

Lx = 1.
Ly = 1.

_dx = Lx / _nx
_dy = Ly / _ny

# transport velocity
vel = np.array([1., 1.])

# lattice speed
_vmax = 2.


# time stepping
_Tmax = 0.5 / _vmax
cfl = 1

_dt = cfl * _dx / _vmax
############# end of default values

def exact_sol(xy, t):
    x = xy[0] - t * vel[0] - 0.5
    y = xy[1] - t * vel[1] - 0.5
    d2 = x * x + y *y
    w = np.exp(-30*d2)
    return w



def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny,
              Tmax = _Tmax, vmax = _vmax,
              dx = _dx, dy = _dy,
              dt = _dt, exact_sol = exact_sol,
              animate = False):

    # load and adjust  C program
    source = open("lbm_kernels.cl", "r").read()
    source = source.replace("_nx_", "("+str(nx)+")")
    source = source.replace("_ny_", "("+str(ny)+")")
    source = source.replace("_dx_", "("+str(dx)+"f)")
    source = source.replace("_dy_", "("+str(dy)+"f)")
    source = source.replace("_dt_", "("+str(dt)+"f)")
    source = source.replace("_m_", "("+str(m)+")")
    source = source.replace("_n_", "("+str(n)+")")    
    source = source.replace("_vx_", "("+str(vel[0])+"f)")
    source = source.replace("_vy_", "("+str(vel[1])+"f)")
    source = source.replace("_lambda_", "("+str(vmax)+"f)")

    #print(source)

    #exit(0)

    # OpenCL init
    ctx = cl.create_some_context()
    mf = cl.mem_flags

    # compile OpenCL C program
    prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing  \
                                                  -cl-fast-relaxed-math")

    # create OpenCL buffers
    fn_gpu   = cl.Buffer(ctx, mf.READ_WRITE,
                         size=(4 * m * nx * ny * np.dtype('float32').itemsize))
    fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE,
                         size=(4 * m * nx * ny * np.dtype('float32').itemsize))

    # create a queue (for submitting opencl operations)
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)


    # init data
    event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu)
    event.wait()

    # number of animation frames
    nbplots = 10
    itermax = int(np.floor(Tmax / dt))
    iterplot = int(itermax / nbplots)

    # time loop
    t = 0
    iter = 0
    elapsed = 0.;
    fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32)

    print("start OpenCL computations...")
    while t < Tmax:
        t = t + dt
        iter = iter + 1
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu)
        event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu)
        #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event])
        event.wait()
        elapsed += 1e-9 * (event.profile.end - event.profile.start)
        # exchange buffer references for avoiding a copy
        fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu
        print("iter=",iter, " t=",t, "elapsed (s)=",elapsed)
        if iter % iterplot == 0 and animate:
            cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()
            wplot = np.reshape(fn_cpu, (nx, ny))
            plt.clf()
            plt.imshow(wplot,vmin=0, vmax=1)
            plt.gca().invert_yaxis()
            plt.colorbar()
            plt.pause(0.01)

    # copy OpenCL data to CPU and return the results
    cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait()

    wplot_gpu = np.reshape(fn_cpu,(nx, ny))
    return wplot_gpu

# gpu solve
wplot_gpu = solve_ocl()
plt.clf()
plt.imshow(wplot_gpu, vmin=0, vmax=1)
plt.gca().invert_yaxis()
plt.colorbar()
plt.show()


# check difference
# plt.clf()
# plt.imshow(wplot_cpu-wplot_gpu)
# plt.gca().invert_yaxis()
# plt.colorbar()
# plt.show()