7
« 于: 八月 16, 2022, 04:25:52 pm »
代码如下所示,d_m_rgbbuffer指向的是全局内存,该函数的作用是YUV转RGB 注释部分(*lmbmp++部分)就是RGB数据写入全局内存中,这一部分所花时间占整个时间的70%,(一个线程处理192个数据)请问应该如何进行优化?
__device__ void StoreBuffer(short * QtZzMCUBuffer, short dx, short dy, unsigned char* d_m_rgbbuffer)
{
short i, j;
unsigned char* lpbmp;
unsigned char R, G, B;
int y, u, v, rr, gg, bb;
//unsigned char RGB[63 * 3];//得到该8*8像素的RGB
for (i = 0; i <8; i++)
{
if ((dy + i) < d_m_height)
{
lpbmp = ((unsigned char*)d_m_rgbbuffer + (dy + i) * 3 * d_m_width + dx * 3);
for (j = 0; j < 8; j++)
{
if ((dx + j) < d_m_width)
{
y = QtZzMCUBuffer[i * 8 + j];
u = QtZzMCUBuffer[i * 8 + j + 64];
v = QtZzMCUBuffer[i * 8 + j + 128];
rr = ((y << 8) + 18 * u + 367 * v) >> 8;
gg = ((y << 8) - 159 * u - 220 * v) >> 8;
bb = ((y << 8) + 411 * u - 29 * v) >> 8;
R = (unsigned char)rr;
G = (unsigned char)gg;
B = (unsigned char)bb;
if (rr & 0xffffff00) if (rr > 255) R = 255; else if (rr < 0) R = 0;
if (gg & 0xffffff00) if (gg > 255) G = 255; else if (gg < 0) G = 0;
if (bb & 0xffffff00) if (bb > 255) B = 255; else if (bb < 0) B = 0;
//*lpbmp++ = B;
//*lpbmp++ = G;
//*lpbmp++ = R;
}
else break;
}
}
else break;
}
}