lbm_cl.py 4.04 KB
 ph committed Nov 28, 2017 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 ``````#!/usr/bin/env python3 # -*- coding: utf-8 -*- # resolution of a transport equation by the finite volume method # on regular grid # regular python implementation compared to a pyopencl version from __future__ import absolute_import, print_function import pyopencl as cl import numpy as np import matplotlib.pyplot as plt import time ##################" definition of default values # number of conservative variables _m = 1 # number of kinetic variables _n = 4 * _m # grid size _nx = 64 _ny = 64 Lx = 1. Ly = 1. _dx = Lx / _nx _dy = Ly / _ny # transport velocity vel = np.array([1., 1.]) # lattice speed _vmax = 2. # time stepping _Tmax = 0.5 / _vmax cfl = 1 _dt = cfl * _dx / _vmax ############# end of default values def exact_sol(xy, t): x = xy[0] - t * vel[0] - 0.5 y = xy[1] - t * vel[1] - 0.5 d2 = x * x + y *y w = np.exp(-30*d2) return w def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, Tmax = _Tmax, vmax = _vmax, dx = _dx, dy = _dy, dt = _dt, exact_sol = exact_sol, animate = False): # load and adjust C program source = open("lbm_kernels.cl", "r").read() source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_dx_", "("+str(dx)+"f)") source = source.replace("_dy_", "("+str(dy)+"f)") source = source.replace("_dt_", "("+str(dt)+"f)") source = source.replace("_m_", "("+str(m)+")") source = source.replace("_n_", "("+str(n)+")") source = source.replace("_vx_", "("+str(vel[0])+"f)") source = source.replace("_vy_", "("+str(vel[1])+"f)") source = source.replace("_lambda_", "("+str(vmax)+"f)") #print(source) #exit(0) # OpenCL init ctx = cl.create_some_context() mf = cl.mem_flags # compile OpenCL C program prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing \ -cl-fast-relaxed-math") # create OpenCL buffers fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float32').itemsize)) fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float32').itemsize)) # create a queue (for submitting opencl operations) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) # init data event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu) event.wait() # number of animation frames nbplots = 10 itermax = int(np.floor(Tmax / dt)) iterplot = int(itermax / nbplots) # time loop t = 0 iter = 0 elapsed = 0.; fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32) print("start OpenCL computations...") while t < Tmax: t = t + dt iter = iter + 1 #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu) event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu) #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event]) event.wait() elapsed += 1e-9 * (event.profile.end - event.profile.start) # exchange buffer references for avoiding a copy fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu print("iter=",iter, " t=",t, "elapsed (s)=",elapsed) if iter % iterplot == 0 and animate: cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() wplot = np.reshape(fn_cpu, (nx, ny)) plt.clf() plt.imshow(wplot,vmin=0, vmax=1) plt.gca().invert_yaxis() plt.colorbar() plt.pause(0.01) # copy OpenCL data to CPU and return the results cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() wplot_gpu = np.reshape(fn_cpu,(nx, ny)) return wplot_gpu # gpu solve wplot_gpu = solve_ocl() plt.clf() plt.imshow(wplot_gpu, vmin=0, vmax=1) plt.gca().invert_yaxis() plt.colorbar() plt.show() # check difference # plt.clf() # plt.imshow(wplot_cpu-wplot_gpu) # plt.gca().invert_yaxis() # plt.colorbar() # plt.show() ``````