/* *********************************************************************************
* FILE: yuv2rgbIVP.c
*
* DESCRIPTION:
*
* This file contains the IVP implementations for RGB to YUV conversion
* and YUV to RGB conversion. Following 2 sets of formulae are taken
* from Keith Jack's excellent book "Video Demystified" (ISBN 1-878707-09-4).
*
* RGB to YUV Conversion
* Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
* Cr = V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
* Cb = U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
*
* YUV to RGB Conversion
* B = 1.164(Y - 16) + 2.018(U - 128)
* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
* R = 1.164(Y - 16) + 1.596(V - 128)
*
* ********************************************************************************* */
/* *********************************************************************************
* FUNCTION: xvConvertRgbToYuv()
*
* IVP-EP vectorized implementation to convert from RGB pixel values to YUV format.
*
* DESCRIPTION
*
* RGB to YUV Conversion
* Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
* Cr = V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
* Cb = U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
*
* Floating point Constants in the above equations are converted to
* fixed point by multiplying with 128.
*
* INPUTS:
* uint8_t *pr pointer to input component R
* uint8_t *pg pointer to input component G
* uint8_t *pb pointer to input component B
* uint32_t width image width
* uint32_t height image height
*
* OUTPUTS:
* uint8_t * pyuv pointer to output image (yuv)
*
* ********************************************************************************* */
int32_t xvConvertRgbToYuv(uint8_t *pr, uint8_t *pg, uint8_t *pb, uint8_t *pyuv, int32_t width, int32_t height)
{
int32_t i, j;
uint8_t *py = (uint8_t *) pyuv;
uint8_t *pu = pyuv + (width * height);
uint8_t *pv = pyuv + (2 * width * height);
xb_vec2Nx8 dvec128, dvec16;
xb_vec2Nx8 * __restrict pdvecDataR;
xb_vec2Nx8 * __restrict pdvecDataG;
xb_vec2Nx8 * __restrict pdvecDataB;
xb_vec2Nx8 * __restrict pdvecDataY;
xb_vec2Nx8 * __restrict pdvecDataV;
xb_vec2Nx8 * __restrict pdvecDataU;
xb_vec2Nx8 * __restrict pdvecDataRTemp;
xb_vec2Nx8 * __restrict pdvecDataGTemp;
xb_vec2Nx8 * __restrict pdvecDataBTemp;
xb_vec2Nx8 * __restrict pdvecDataYTemp;
xb_vec2Nx8 * __restrict pdvecDataVTemp;
xb_vec2Nx8 * __restrict pdvecDataUTemp;
xb_vec2Nx8 dvecYc1, dvecYc2, dvecYc3;
xb_vec2Nx8 dvecVc1, dvecVc2, dvecVc3;
xb_vec2Nx8 dvecUc1, dvecUc2, dvecUc3;
xb_vec2Nx8 dvecR, dvecG, dvecB;
xb_vec2Nx8 dvecY, dvecU, dvecV;
xb_vec2Nx24 daccSumY, daccSumV, daccSumU;
int32_t shift = 7;
dvecYc1 = YC1;
dvecYc2 = YC2;
dvecYc3 = YC3;
dvecVc1 = VC1;
dvecVc2 = VC2;
dvecVc3 = VC3;
dvecUc1 = UC1;
dvecUc2 = UC2;
dvecUc3 = UC3;
pdvecDataR = (xb_vec2Nx8 *) pr;
pdvecDataG = (xb_vec2Nx8 *) pg;
pdvecDataB = (xb_vec2Nx8 *) pb;
pdvecDataY = (xb_vec2Nx8 *) py;
pdvecDataU = (xb_vec2Nx8 *) pu;
pdvecDataV = (xb_vec2Nx8 *) pv;
dvec128 = 128;
dvec16 = 16;
for (i = 0; i < (width / (XCHAL_IVPN_SIMD_WIDTH*2)); i++)
{
pdvecDataRTemp = pdvecDataR++;
pdvecDataGTemp = pdvecDataG++;
pdvecDataBTemp = pdvecDataB++;
pdvecDataYTemp = pdvecDataY++;
pdvecDataVTemp = pdvecDataV++;
pdvecDataUTemp = pdvecDataU++;
for (j = 0; j < height; j++)
{
IVP_LV2NX8_XP(dvecR, pdvecDataRTemp, width);
IVP_LV2NX8_XP(dvecG, pdvecDataGTemp, width);
IVP_LV2NX8_XP(dvecB, pdvecDataBTemp, width);
// calculate Y
// Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
daccSumY = IVP_MULUUP2NX8(dvecR, dvecYc1, dvecG, dvecYc2);
IVP_MULUUA2NX8(daccSumY, dvecB, dvecYc3);
dvecY = IVP_PACKVRU2NX24(daccSumY, shift); //pack signed pixel values into xb_dvec2NX8
dvecY = IVP_ADD2NX8(dvecY, dvec16); //add 16 with wrap around
IVP_SV2NX8_XP(dvecY, pdvecDataYTemp, width);
// V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
daccSumV = IVP_MULUSP2NX8(dvecG, dvecVc2, dvecB, dvecVc3);
IVP_MULUUA2NX8(daccSumV, dvecR, dvecVc1);
dvecV = IVP_PACKVR2NX24(daccSumV, shift); //pack signed pixel values into xb_dvec2NX8
dvecV = IVP_ADD2NX8(dvecV, dvec128); //add 128 with wrap around
IVP_SV2NX8_XP(dvecV, pdvecDataVTemp, width);
// U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
daccSumU = IVP_MULUSP2NX8(dvecG, dvecUc2, dvecB, dvecUc3);
IVP_MULUSA2NX8(daccSumU, dvecR, dvecUc1);
dvecU = IVP_PACKVR2NX24(daccSumU, shift); //pack signed pixel values into xb_dvec2NX8
dvecU = IVP_ADD2NX8(dvecU, dvec128); //add 128 with wrap around
IVP_SV2NX8_XP(dvecU, pdvecDataUTemp, width);
}
}
return(1);
}
/* *********************************************************************************
* FUNCTION: xvConvertYuvToRgb()
*
* IVP-EP vectorized implementation to convert from RGB pixel values to YUV format.
*
* DESCRIPTION
*
* YUV to RGB Conversion
* R = 1.164(Y - 16) + 1.596(V - 128)
* B = 1.164(Y - 16) + 2.018(U - 128)
* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
*
*
* Floating point Constants in the above equations are converted to
* fixed point by multiplying with 128.
*
* INPUTS:
* uint8_t * pyuv pointer to input image (yuv)
* uint32_t width image width
* uint32_t height image height
*
* OUTPUTS:
* uint8_t *pr pointer to output component R
* uint8_t *pg pointer to output component G
* uint8_t *pb pointer to output component B
*
* ********************************************************************************* */
int32_t xvConvertYuvToRgb(uint8_t *pyuv, uint8_t *pR, uint8_t *pG, uint8_t *pB, int32_t width, int32_t height)
{
int32_t i, j;
uint8_t *pY, *pV, *pU;
int32_t shift, Const_0813_0391;
int16_t Const_1164, Const_2018, Const_1596;
xb_vec2Nx8 dvecY, dvecU, dvecV, dvecR, dvecG, dvecB;
xb_vec2Nx8 dvecYm16, dvecUm126, dvecVm126;
xb_vec2Nx24 daccY;
xb_vec2Nx8 dvec16, dvec128;
xb_vec2Nx8 * __restrict pdvecDataY;
xb_vec2Nx8 * __restrict pdvecDataV;
xb_vec2Nx8 * __restrict pdvecDataU;
xb_vec2Nx8 * __restrict pdvecDataR;
xb_vec2Nx8 * __restrict pdvecDataG;
xb_vec2Nx8 * __restrict pdvecDataB;
xb_vec2Nx8 * __restrict pdvecDataRTemp;
xb_vec2Nx8 * __restrict pdvecDataGTemp;
xb_vec2Nx8 * __restrict pdvecDataBTemp;
xb_vec2Nx8 * __restrict pdvecDataYTemp;
xb_vec2Nx8 * __restrict pdvecDataUTemp;
xb_vec2Nx8 * __restrict pdvecDataVTemp;
// pointers to planar y, u and v components
pY = pyuv;
pU = pyuv + (width * height);
pV = pyuv + (width * height * 2);
pdvecDataY = (xb_vec2Nx8 *) pY;
pdvecDataV = (xb_vec2Nx8 *) pV;
pdvecDataU = (xb_vec2Nx8 *) pU;
pdvecDataR = (xb_vec2Nx8 *) pR;
pdvecDataG = (xb_vec2Nx8 *) pG;
pdvecDataB = (xb_vec2Nx8 *) pB;
// load constants
dvec16 = 16;
dvec128 = 128;
shift = 7;
Const_1164 = C1164;
Const_2018 = C2018;
Const_1596 = C1596;
// Two 16b constants used in MULP are merged into one 32b constant as per syntax requirement of MULP
Const_0813_0391 = (-1 * (C0813 << 16));
Const_0813_0391 = XT_OR((Const_0813_0391 & 0xFFFF0000), ((C0391 * -1) & 0x0000FFFF));
for (i = 0; i < (width / (XCHAL_IVPN_SIMD_WIDTH*2)); i++)
{
pdvecDataYTemp = pdvecDataY++;
pdvecDataUTemp = pdvecDataU++;
pdvecDataVTemp = pdvecDataV++;
pdvecDataRTemp = pdvecDataR++;
pdvecDataBTemp = pdvecDataB++;
pdvecDataGTemp = pdvecDataG++;
for (j = 0; j < height; j++)
{
IVP_LV2NX8_XP(dvecY, pdvecDataYTemp, width);
IVP_LV2NX8_XP(dvecU, pdvecDataUTemp, width);
IVP_LV2NX8_XP(dvecV, pdvecDataVTemp, width);
dvecYm16 = dvecY - dvec16;
dvecUm126 = dvecU - dvec128;
dvecVm126 = dvecV - dvec128;
//RED:R = 1.164(Y - 16) + 1.596(V - 128)
daccY = IVP_MULUS2N8XR16(dvecYm16, Const_1164);
IVP_MULA2N8XR16(daccY, dvecVm126, Const_1596);
dvecR = IVP_PACKVRU2NX24(daccY, shift); //pack signed pixel values into xb_dvec2NX8
IVP_SV2NX8_XP(dvecR, pdvecDataRTemp, width);
//Blue: B = 1.164(Y - 16)+ 2.018(U - 128)
daccY = IVP_MULUS2N8XR16(dvecYm16, Const_1164);
IVP_MULA2N8XR16(daccY, dvecUm126, Const_2018);
dvecB = IVP_PACKVRU2NX24(daccY, shift); //pack signed pixel values into xb_dvec2NX8
IVP_SV2NX8_XP(dvecB, pdvecDataBTemp, width);
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
daccY = IVP_MULP2N8XR16(dvecVm126, dvecUm126, Const_0813_0391);
IVP_MULUSA2N8XR16(daccY, dvecYm16, Const_1164);
dvecG = IVP_PACKVRU2NX24(daccY, shift); //pack signed pixel values into xb_dvec2NX8
IVP_SV2NX8_XP(dvecG, pdvecDataGTemp, width);
}
}
return(0);
}