#!/usr/bin/env python3 # -*- coding: utf-8 -*- # resolution of a transport equation by the finite volume method # on regular grid # regular python implementation compared to a pyopencl version from __future__ import absolute_import, print_function import pyopencl as cl import numpy as np import matplotlib.pyplot as plt import time ##################" definition of default values # number of conservative variables _m = 2 # number of kinetic variables _n = 4 * _m _ivplot = 1 # grid size _nx = 64 _ny = 64 Lx = 2 * 3.14159265358979323846264338328 Ly = 2 * 3.14159265358979323846264338328 Lx = 1 Ly = 1 _dx = Lx / _nx _dy = Ly / _ny # transport velocity vel = np.array([1., 1.]) # lattice speed _vmax = 7. # time stepping _Tmax = 10. / _vmax #_Tmax = 0. cfl = 1 _dt = cfl * _dx / _vmax ############# end of default values def solve_ocl(m = _m, n = _n, nx = _nx, ny = _ny, Tmax = _Tmax, vmax = _vmax, dx = _dx, dy = _dy, dt = _dt, animate = False): # load and adjust C program source = open("lbm_kernels.cl", "r").read() source = source.replace("_nx_", "("+str(nx)+")") source = source.replace("_ny_", "("+str(ny)+")") source = source.replace("_dx_", "("+str(dx)+"f)") source = source.replace("_dy_", "("+str(dy)+"f)") source = source.replace("_dt_", "("+str(dt)+"f)") source = source.replace("_m_", "("+str(m)+")") source = source.replace("_n_", "("+str(n)+")") source = source.replace("_vx_", "("+str(vel[0])+"f)") source = source.replace("_vy_", "("+str(vel[1])+"f)") source = source.replace("_lambda_", "("+str(vmax)+"f)") #print(source) #exit(0) # OpenCL init ctx = cl.create_some_context() mf = cl.mem_flags # compile OpenCL C program prg = cl.Program(ctx, source).build(options = "-cl-strict-aliasing \ -cl-fast-relaxed-math") # create OpenCL buffers fn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float32').itemsize)) fnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(4 * m * nx * ny * np.dtype('float32').itemsize)) # create a queue (for submitting opencl operations) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) # init data event = prg.init_sol(queue, (nx * ny, ), (32, ), fn_gpu) event.wait() # number of animation frames nbplots = 100 itermax = int(np.floor(Tmax / dt)) iterplot = int(itermax / nbplots) # time loop t = 0 iter = 0 elapsed = 0.; fn_cpu = np.empty((4 * m * nx * ny, ), dtype = np.float32) print("start OpenCL computations...") while t < Tmax: t = t + dt iter = iter + 1 #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu) event = prg.time_step(queue, (nx * ny, ), (64, ), fn_gpu, fnp1_gpu) #event = prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu, wait_for = [event]) event.wait() elapsed += 1e-9 * (event.profile.end - event.profile.start) # exchange buffer references for avoiding a copy fn_gpu, fnp1_gpu = fnp1_gpu, fn_gpu print("iter=",iter, " t=",t, "elapsed (s)=",elapsed) if iter % iterplot == 0 and animate: cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() wplot = np.reshape(fn_cpu, (4, m, nx, ny)) plt.clf() #plt.imshow(np.sum(wplot, axis = 0),vmin=0, vmax=1) plt.imshow(np.sum(wplot[:, _ivplot, :, :], axis = 0)) plt.gca().invert_yaxis() plt.colorbar() plt.pause(0.01) # copy OpenCL data to CPU and return the results cl.enqueue_copy(queue, fn_cpu, fn_gpu).wait() wplot_gpu = np.reshape(fn_cpu,(4, m, nx, ny)) return wplot_gpu # gpu solve wplot_gpu = solve_ocl(animate = True) #print(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.clf() #plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0), vmin=0, vmax=1) plt.imshow(np.sum(wplot_gpu[:, _ivplot, :, :],axis=0)) plt.gca().invert_yaxis() plt.colorbar() plt.show() # for iv in range(4): # plt.imshow(wplot_gpu[iv,:,:]) # plt.gca().invert_yaxis() # plt.colorbar() # plt.show() # check difference # plt.clf() # plt.imshow(wplot_cpu-wplot_gpu) # plt.gca().invert_yaxis() # plt.colorbar() # plt.show()