非常感谢,我的Kernel是:
__global__ void KerCalFluidRepForce(int n, double3* totalcoor, int3* cc, int* ncube, int* sortpart, float* outerReigon, int2* beginendcube, float3* Repforce)
{
const int p = blockIdx.x*blockDim.x + threadIdx.x; //-Number of particle.
if (p < n)//n 指的是全体粒子数
{
const int p1 = p;
double shift = 0.0;
if(sortpart[p1] >= 0 && sortpart[p1] <CONSTANTS.nBound2Start)
{
Repforce[sortpart[p1]].x=0.0;
Repforce[sortpart[p1]].y=0.0;
Repforce[sortpart[p1]].z=0.0;
}
//-Obtains basic data of particle p1.
const double posp1x = __ldg(&totalcoor[p1].x);//获取当前p1粒子位置
const double posp1y = __ldg(&totalcoor[p1].y);//获取当前p1粒子位置
const double posp1z = __ldg(&totalcoor[p1].z);//获取当前p1粒子位置
//-Obtains neighborhood search limits.获取邻居搜索的极限
int ini1, fin1, ini2, fin2, ini3, fin3;//三个方向上的极限,
ini1 = cc[p1].x - 1;//cc.x为粒子所在格子的x方向坐标
fin1 = cc[p1].x +1;
ini2 = cc[p1].y - min(cc[p1].y, 1);//cc.y为粒子所在格子的y方向坐标
fin2 = cc[p1].y + ((ncube[1] - cc[p1].y - 1)>0 ? 1 : 0);
ini3 = cc[p1].z - min(cc[p1].z, 1);//cc.z为粒子所在格子的z方向坐标
fin3 = cc[p1].z + ((ncube[2] - cc[p1].z - 1)>0 ? 1 : 0);
//-Interaction with Particles.
for (int m3 = ini3; m3 < fin3 + 1; m3++)
{
for (int m2 = ini2; m2 < fin2 + 1; m2++)
{
for (int m1 = ini1; m1 < fin1+1; m1++)
{
//periodic condition on the x directions
int k1;
if (m1 == -1)
{
k1 = ncube[0] - 1;
shift = outerReigon[0];
}
else if (m1 == ncube[0])
{
k1 = 0;
shift = -outerReigon[0];
}
else
{
k1 = m1;
shift = 0.0;
}
int pini, pfin = 0;
cusph::ParticleRange(beginendcube, ncube, k1, m2, m3, pini, pfin);
for (int p2 = pini; p2 < pfin; p2++)
{
if (sortpart[p1] >= 0 && sortpart[p1] <CONSTANTS.nBound2Start && sortpart[p2] >= CONSTANTS.nBound1Start && sortpart[p2] < CONSTANTS.nTotalPar)//sortpart[p1]-FluidPar,sortpart[p2]-Bound1Par
{
double FRep;
double chi, eta, feta;
float h = 0.5;//initial distance between the boundary particle and fluid particle
float RepCoef = 0.01;
const double posp2x = __ldg(&totalcoor[p2].x);//获取当前p2粒子位置
const double posp2y = __ldg(&totalcoor[p2].y);//获取当前p2粒子位置
const double posp2z = __ldg(&totalcoor[p2].z);//获取当前p2粒子位置
double drx = posp1x - posp2x + shift;
double dry = posp1y - posp2y;
double drz = posp1z - posp2z;
double rr2 = drx*drx + dry*dry + drz*drz;
double rr = sqrt(rr2);
if (rr < 1.5*h)
chi = 1.0 - rr / (1.5*h);
else
chi = 0.0;
eta = rr / (0.75*h);
if (eta < 2.0 / 3.0)
feta = 2.0 / 3.0;
else if (eta < 1.0)
feta = 2.0 * eta - 1.5*eta*eta;
else if (eta < 2.0)
feta = 0.5*(2 - eta)*(2 - eta);
else
feta = 0.0;
if (chi>0 && feta>0)
{
FRep = RepCoef*CONSTANTS.SoundSpeed*CONSTANTS.SoundSpeed*chi * feta / (rr*rr) *CONSTANTS.Mass;
Repforce[sortpart[p1]].x += FRep*drx;
Repforce[sortpart[p1]].y += FRep*dry;
Repforce[sortpart[p1]].z += FRep*drz;
}
}//sortpart[p1]-FluidPar,sortpart[p2]-Bound1Par
}//p2
}//m1
}//m2
}//m3
if(p1==3203)
printf("%d, %f,%f ,%f BBBB\n ",sortpart[p1],Repforce[sortpart[p1]].x,Repforce[sortpart[p1]].y,Repforce[sortpart[p1]].z);
}//if (p < n)
}
它是计算粒子排斥力的一个核函数,我在最后输出的if(p1==3203)就是输出经过核函数计算之后的sortpart[p1]粒子的排斥力三个分量,这个输出可以得到正确结果