diff --git a/transport_cl.py b/transport_cl.py new file mode 100644 index 0000000000000000000000000000000000000000..9a1fc3591f8008aa03486ade84ec9be6ad1bb153 --- /dev/null +++ b/transport_cl.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, print_function +import pyopencl as cl +import numpy as np +import matplotlib.pyplot as plt + +# number of conservative variables +m = 1 + +# boundary +wbord = 0. + + +# grid size +nx = 50 +ny = 50 +Lx = 1. +Ly = 1. + +dx = Lx / nx +dy = Ly / ny + +# transport velocity +vel = np.array([1., 1.]) + +vmax = np.sqrt(vel[0]**2+vel[1]**2) + +# time stepping +Tmax = 1 +cfl = 0.8 + +iterplot = 5000 + +dt = cfl * (dx * dy) / (2 * dx + 2 * dy) / vmax + +def fluxnum(wL, wR, vnorm): + vdotn = vel[0] * vnorm[0] + vel[1] * vnorm[1] + vnp = 0 + if vdotn > 0: + vnp = vdotn + vnm = vdotn - vnp + flux = vnm * wR + vnp * wL + return flux + +def exact_sol(xy, t): + x = xy[0] - t * vel[0] - 0.5 + y = xy[1] - t * vel[1] - 0.5 + d2 = x * x + y *y + w = np.exp(-30*d2) + return w + +def solve_python(wn): + #init + t = 0. + wn[:] = wbord + for i in range(1,nx-1): + for j in range(1,ny-1): + xy=[i*dx+dx/2,j*dy+dy/2] + wn[i,j] = exact_sol(xy, t) + #print("i j", xy,wn[i,j]) + + wnp1 = np.zeros((nx,ny)) + np.copyto(wnp1, wn) + iter = 0 + while t < Tmax: + t = t+dt + iter += 1 + print("iter=",iter, " t=",t) + for i in range(1,nx-1): + for j in range(1,ny-1): + wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i+1,j],np.array([1,0])) + wnp1[i,j] -= dt / dx * fluxnum(wn[i,j],wn[i-1,j],np.array([-1,0])) + wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j+1],np.array([0,1])) + wnp1[i,j] -= dt / dy * fluxnum(wn[i,j],wn[i,j-1],np.array([0,-1])) + + np.copyto(wn, wnp1) + + + +source = open("transport_kernels.cl", "r").read() + +source = source.replace("_nx_", "("+str(nx)+")") +source = source.replace("_ny_", "("+str(ny)+")") +source = source.replace("_dx_", "("+str(dx)+"f)") +source = source.replace("_dy_", "("+str(dy)+"f)") +source = source.replace("_dt_", "("+str(dt)+"f)") +source = source.replace("_m_", "("+str(m)+")") +source = source.replace("_wbord_", "("+str(wbord)+"f)") +source = source.replace("_vx_", "("+str(vel[0])+"f)") +source = source.replace("_vy_", "("+str(vel[1])+"f)") + +#print(source) + +#exit(0) + +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) +mf = cl.mem_flags + +# taille = 2**8 +# +# x_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize)) +# y_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=(taille * np.dtype('float32').itemsize)) +# +# z_cpu = np.empty((taille, ), dtype = np.float32) +# z_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, z_cpu.nbytes) +# +# verif_cpu = np.fromfunction(lambda i: i * i + i, (taille, ), dtype = np.float32) + +wn_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize)) +wn_cpu = np.empty((nx * ny, ), dtype = np.float32) +wnp1_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=(nx * ny * np.dtype('float32').itemsize)) + +prg = cl.Program(ctx, source).build() + +# init data +event = prg.init_sol(queue, (nx * ny, ), (32, ), wn_gpu).wait() + +# time loop on cpu + + +# time loop on gpu +t = 0 +iter = 0 +while t < Tmax: + t = t + dt + iter = iter + 1 + prg.time_step(queue, (nx * ny, ), (32, ), wn_gpu, wnp1_gpu).wait() + wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu + print("iter=",iter, " t=",t) + if iter % iterplot == 0: + cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() + wplot = np.reshape(wn_cpu, (nx, ny)) + plt.clf() + plt.imshow(wplot) + plt.pause(0.01) + +if iter % 2 != 0: + wn_gpu, wnp1_gpu = wnp1_gpu, wn_gpu + + +# prg.K2(queue, (taille, ), None, y_gpu).wait() +# prg.K3(queue, (taille, ), None , x_gpu, y_gpu, z_gpu, wait_for = [event]).wait() +# +cl.enqueue_copy(queue, wn_cpu, wn_gpu).wait() +# +wplot = np.reshape(wn_cpu,(nx, ny)) + +plt.clf() +plt.imshow(wplot) +plt.show() + +solve_python(wplot) + +plt.clf() +plt.imshow(wplot[1:nx-1,1:ny-1]) +plt.show() + + + diff --git a/transport_kernels.cl b/transport_kernels.cl new file mode 100644 index 0000000000000000000000000000000000000000..8ebbe78d0aca88d220d4ccc064bd18bc17c6c92e --- /dev/null +++ b/transport_kernels.cl @@ -0,0 +1,127 @@ +#define _NX _nx_ +#define _NY _ny_ +#define _DX _dx_ +#define _DY _dy_ +#define _DT _dt_ +#define _M _m_ + +#define _VX _vx_ +#define _VY _vy_ + +#define _WBORD _wbord_ + + +#define _VOL (_DX * _DY) + +__constant int dir[4][2] = { {1, 0}, {-1, 0}, + {0, 1}, {0, -1}}; + +__constant float ds[4] = { _DY, _DY, _DX, _DX }; + + + +void fluxnum(float *wL, float *wR, float* vnorm, float* flux){ + float vdotn = _VX * vnorm[0] + _VY * vnorm[1]; + float vnp = vdotn > 0 ? vdotn : 0; + float vnm = vdotn - vnp; + + for(int iv = 0; iv < _M; iv++){ + flux[iv] = vnm * wR[iv] + vnp * wL[iv]; + } +} + +void exact_sol(float* xy, float t, float* w){ + float x = xy[0] - t * _VX - 0.5f; + float y = xy[1] - t * _VY - 0.5f; + float d2 = x * x + y *y; + for(int iv = 0; iv < _M; iv++) + w[0] = exp(-30*d2); +} + +__kernel void init_sol(__global float *wn){ + + int id = get_global_id(0); + + int i = id % _NX; + int j = id / _NX; + + int ngrid = _NX * _NY; + + float wnow[_M]; + + float t = 0; + float xy[2] = {i * _DX + _DX / 2, j * _DY + _DY / 2}; + + exact_sol(xy, t, wnow); + //printf("x=%f, y=%f \n",xy[0],xy[1]); + // load middle value + for(int iv = 0; iv < _M; iv++){ + int imem = i + j * _NX + iv * ngrid; + wn[imem] = wnow[iv]; + } + +} + + + + +__kernel void time_step(__global float *wn, __global float *wnp1){ + + int id = get_global_id(0); + + int i = id % _NX; + int j = id / _NX; + + int ngrid = _NX * _NY; + + float wnow[_M]; + float wnext[_M]; + + // load middle value + for(int iv = 0; iv < _M; iv++){ + int imem = i + j * _NX + iv * ngrid; + wnow[iv] = wn[imem]; + wnext[iv] = wnow[iv]; + } + + float flux[_M]; + + // loop on all directions: + // idir = 0 (east) + // idir = 1 (west) + // idir = 2 (north) + // idir = 3 (south) + for(int idir = 0; idir < 4; idir++){ + float vn[2]; + vn[0] = dir[idir][0]; + vn[1] = dir[idir][1]; + int iR = i + dir[idir][0]; + int jR = j + dir[idir][1]; + // load neighbour values if accessible + float wR[_M]; + for(int iv = 0; iv < _M; iv++) wR[iv] = _WBORD; + if ((iR >= 0) && (iR <= _NX - 1) && (jR >= 0) && (jR <= _NY - 1)){ + for(int iv = 0; iv < _M; iv++){ + int imem = iv * ngrid + iR + jR * _NX; + wR[iv] = wn[imem]; + } + } + fluxnum(wnow, wR, vn, flux); + + // time evolution + for(int iv = 0; iv < _M; iv++){ + wnext[iv] -= _DT * ds[idir] / _VOL * flux[iv]; + } + + + } + + // copy wnext into global memory + + for(int iv = 0; iv < _M; iv++){ + int imem = iv * ngrid + i + j * _NX; + wnp1[imem] = wnext[iv]; + } + + +}