Commit 1a3be6c8 authored by Philippe Helluy's avatar Philippe Helluy

kernel volume computation ok

parent fb338180
......@@ -5,12 +5,164 @@
#define _NP (_DEG + 1)
#define _NPG_ROW ((_DEG + 1) * _RAF)
#define _NPG_FACE (_NPG_ROW * _NPG_ROW)
#define _NPG_MACROCELL (_NPG_ROW * _NPG_FACE)
// number of nodes in each hexaedron cell
#define _NHEXA 8
__constant int npg = _NP * _NP * _NP * _RAF * _RAF * _RAF;
int GenericVarindex(int ipg, int iv) {
//! Gauss LObatto Points (GLOP) up to order 4
__constant float gauss_lob_point[] = {
0.5,
0,
1,
0,
0.5,
1,
0,
0.276393202250021030359082633127,
0.723606797749978969640917366873,
1,
0,
0.172673164646011428100853771877,
0.5,
0.827326835353988571899146228123,
1
};
//! GLOP weights up to order 4
__constant float gauss_lob_weight[] = {
1,
0.5,
0.5,
0.166666666666666666666666666667,
0.666666666666666666666666666668,
0.166666666666666666666666666667,
0.0833333333333333333333333333333,
0.416666666666666666666666666666,
0.416666666666666666666666666666,
0.0833333333333333333333333333333,
0.05,
0.272222222222222222222222222223,
0.355555555555555555555555555556,
0.272222222222222222222222222219,
0.05
};
//! indirection for finding the GLOP
//! data for a given degree in the previous arrays
__constant int gauss_lob_offset[] = {0, 1, 3, 6, 10};
__constant float gauss_lob_dpsi[] = {
0.0,
-1.,
-1.,
1.,
1.,
-3.,
-1.,
1.,
4.,
0.,
-4.,
-1.,
1.,
3.,
-6,
-1.61803398874989484820458683436,
.618033988749894848204586834362,
-1,
8.09016994374947424102293417177,
0,
-2.23606797749978969640917366872,
3.09016994374947424102293417184,
-3.09016994374947424102293417182,
2.23606797749978969640917366872,
0,
-8.09016994374947424102293417177,
1,
-.618033988749894848204586834362,
1.61803398874989484820458683436,
6,
-10,
-2.48198050606196571569743868436,
.75,
-.518019493938034284302561315632,
1,
13.5130049774484800076860550594,
0,
-2.67316915539090667050969419631,
1.52752523165194666886268239794,
-2.82032835588485332564727827404,
-5.33333333333333333333333333336,
3.49148624377587810025755976667,
0,
-3.49148624377587810025755976662,
5.33333333333333333333333333336,
2.82032835588485332564727827399,
-1.52752523165194666886268239791,
2.67316915539090667050969419635,
0,
-13.5130049774484800076860550594,
-1,
.518019493938034284302561315631,
-.75,
2.48198050606196571569743868437,
10
};
//! indirection for finding the GLOP
//! data for a given degree in the previous arrays
__constant int gauss_lob_dpsi_offset[] = {0, 1, 5, 14, 30};
float dlag(int deg, int ib, int ipg) {
return gauss_lob_dpsi[gauss_lob_dpsi_offset[deg] + ib * (deg + 1) + ipg];
}
//! \brief 1d GLOP weights for a given degree
//! \param[in] deg degree
//! \param[in] i glop index
//! \returns the glop weight
float wglop(int deg, int i) {
return gauss_lob_weight[gauss_lob_offset[deg] + i];
}
float glop(int deg, int i){
return gauss_lob_point[gauss_lob_offset[deg] + i];
}
int Varindex(int ipg, int iv) {
return ipg + npg * iv;
}
void NumFlux(float *wL, float *wR, float *vnorm, float *flux)
{
const float transport_v2d[] = {sqrt(0.5), sqrt(0.5), 0};
//const real transport_v2d[] = {1,0, 0};
float vn
= transport_v2d[0] * vnorm[0]
+ transport_v2d[1] * vnorm[1]
+ transport_v2d[2] * vnorm[2];
float vnp = vn > 0 ? vn : 0;
float vnm = vn - vnp;
flux[0] = vnp * wL[0] + vnm * wR[0];
/* if (fabs(vnorm[2])>1e-6) { */
/* printf("vnds %lf %lf %lf \n", vnorm[0], vnorm[1], vnorm[2]); */
/* } */
// verify that 2d computations are actually
// activated
//assert(fabs(vnorm[2]) < 1e-8);
}
void get_dtau(float x, float y, float z,
__constant float *p, float dtau[][3]){
float t1 = -1 + z;
......@@ -39,3 +191,104 @@ dtau[2][1] = -t17 * p[2] + t19 * p[5] - t19 * p[8] + t17 * p[11] + t23 * p[14] -
dtau[2][2] = -t30 * p[2] + t32 * p[5] - t34 * p[8] + t36 * p[11] + t30 * p[14] - t32 * p[17] + t34 * p[20] - t36 * p[23];
}
__kernel
void DGVolume(int ie, // 1: macrocel index
__constant float *physnodes, // 2: macrocell nodes
__global float *wn, // 3: field values
__global float *dtwn // 4: time derivative
)
{
__constant float *physnode = physnodes + ie * 3 * _NHEXA;
int ix = get_global_id(0);
int iy = get_global_id(1);
int iz = get_global_id(2);
int woffset = ie * _M * _NPG_MACROCELL;
// subcell id
int icL[3];
icL[0] = ix / _RAF;
icL[1] = iy / _RAF;
icL[2] = iz / _RAF;
int p[3];
p[0] = ix % _RAF;
p[1] = iy % _RAF;
p[2] = iz % _RAF;
// ref coordinates
float hx = 1. / _RAF;
float hy = 1. / _RAF;
float hz = 1. / _RAF;
int offset[3] = {gauss_lob_offset[_DEG] + p[0],
gauss_lob_offset[_DEG] + p[1],
gauss_lob_offset[_DEG] + p[2]};
float x = hx * (icL[0] + gauss_lob_point[offset[0]]);
float y = hy * (icL[1] + gauss_lob_point[offset[1]]);
float z = hz * (icL[2] + gauss_lob_point[offset[2]]);
float wpg = hx * hy * hz
* gauss_lob_weight[offset[0]]
* gauss_lob_weight[offset[1]]
* gauss_lob_weight[offset[2]];
float codtau[3][3];
{
float dtau[3][3];
get_dtau(x, y, z, physnode, dtau); // 1296 mults
codtau[0][0] = dtau[1][1] * dtau[2][2] - dtau[1][2] * dtau[2][1];
codtau[0][1] = -dtau[1][0] * dtau[2][2] + dtau[1][2] * dtau[2][0];
codtau[0][2] = dtau[1][0] * dtau[2][1] - dtau[1][1] * dtau[2][0];
codtau[1][0] = -dtau[0][1] * dtau[2][2] + dtau[0][2] * dtau[2][1];
codtau[1][1] = dtau[0][0] * dtau[2][2] - dtau[0][2] * dtau[2][0];
codtau[1][2] = -dtau[0][0] * dtau[2][1] + dtau[0][1] * dtau[2][0];
codtau[2][0] = dtau[0][1] * dtau[1][2] - dtau[0][2] * dtau[1][1];
codtau[2][1] = -dtau[0][0] * dtau[1][2] + dtau[0][2] * dtau[1][0];
codtau[2][2] = dtau[0][0] * dtau[1][1] - dtau[0][1] * dtau[1][0];
}
float wL[_M];
int ipgL = ix + _NPG_ROW * (iy + _NPG_ROW * iz);
for(int iv = 0; iv < _M; iv++){
int imemL = Varindex(ipgL, iv) + woffset;
wL[iv] = wn[imemL];
}
float flux[_M];
int q[3] = {p[0], p[1], p[2]};
// Loop on the "cross" points
for(int iq = 0; iq < _NP; iq++) {
q[0] = (p[0] + iq) % _NP;
float dphiref[3] = {0, 0, 0};
dphiref[0] = dlag(_DEG, q[0], p[0]) * _RAF;
float dphi[3];
for(int ii = 0; ii < 3; ii++) {
float *codtauii = codtau[ii];
dphi[ii]
= codtauii[0] * dphiref[0]
+ codtauii[1] * dphiref[1]
+ codtauii[2] * dphiref[2];
}
NumFlux(wL, wL, dphi, flux); // 3m mults when using NumFlux
int ipgR = (ix + iq) % _NP + _NPG_ROW * (iy + _NPG_ROW * iz);
for(int iv = 0; iv < _M; iv++) {
int imemR = Varindex(ipgR, iv) + woffset;
dtwn[imemR] += flux[iv] * wpg;
}
}
}
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment