#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Resolution of a transport equation by the finite volume method on regular grid regular python implementation compared to a pyopencl version """ from __future__ import absolute_import, print_function import pyopencl as cl import numpy as np import matplotlib.pyplot as plt import time ##################" definition of default values # number of conservative variables _m = 9 # number of kinetic variables _n = 4 * _m _ivplot = 0 # grid size _nx = 256 _ny = 256 Lx = 1 Ly = 1 _dx = Lx / _nx _dy = Ly / _ny # transport velocity vel = np.array([1., 1.]) # lattice speed _vmax = 20. # time stepping _Tmax = 10. / _vmax _Tmax = 2. cfl = 1 _dt = cfl * _dx / _vmax ############# end of default values def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, Tmax = _Tmax, vmax = _vmax, dx = _dx, dy = _dy, dt = _dt, animate = False, interactive=True): ff = "_F" # load and adjust C program source = open("lbm_kernels.cl", "r").read() source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_dx_", "("+str(dx)+ ff + ")") source = source.replace("_dy_", "("+str(dy)+ ff + ")") source = source.replace("_dt_", "("+str(dt)+ ff + ")") source = source.replace("_m_", "("+str(m)+")") source = source.replace("_n_", "("+str(n)+")") source = source.replace("_vx_", "("+str(vel[0])+ ff + ")") source = source.replace("_vy_", "("+str(vel[1])+ ff + ")") source = source.replace("_lambda_", "("+str(vmax)+ ff + ")") source = source.replace("_F", "") #print(source) #exit(0) # OpenCL init ctx = cl.create_some_context() mf = cl.mem_flags # compile OpenCL C program prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing \ -cl-fast-relaxed-math") # create OpenCL buffers fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float64').itemsize)) fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float64').itemsize)) # create a queue (for submitting opencl operations) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) # init data event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu) event.wait() # number of animation frames nbplots = 100 itermax = int(np.floor(Tmax / dt)) iterplot = int(itermax / nbplots) # time loop t = 0 iter = 0 elapsed = 0.; fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float64) if animate: fig = plt.gcf() fig.show() fig.canvas.draw() print("start OpenCL computations...") while t < Tmax: t = t + dt iter = iter + 1 #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu) event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu) #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event]) event.wait() elapsed += 1e-9 * (event.profile.end - event.profile.start) # exchange buffer references for avoiding a copy fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu title = "iter = {}, t = {:f}, elapsed (s) = {:f}".format(iter, t, elapsed) if animate: if iter % iterplot == 0: cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() wplot = np.reshape(fn_cpu, (4, m, nx, ny)) plt.clf() #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1) fig.suptitle(title) plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0)) plt.gca().invert_yaxis() plt.colorbar() fig.canvas.draw() else: print(title, end='\r') # copy OpenCL data to CPU and return the results cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny)) return wplot_gpu if __name__ == '__main__': # gpu solve wplot_gpu = solve_ocl(animate=False) #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.clf() #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1) plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.gca().invert_yaxis() plt.colorbar() plt.show() # for iv in range(4): # plt.imshow(wplot_gpu[iv,:,:]) # plt.gca().invert_yaxis() # plt.colorbar() # plt.show() # check difference # plt.clf() # plt.imshow(wplot_cpu-wplot_gpu) # plt.gca().invert_yaxis() # plt.colorbar() # plt.show()