Hi3519 DSP Demo (二)

/* *********************************************************************************
 * FILE:  yuv2rgbIVP.c
 *
 * DESCRIPTION:
 *
 * This file contains the IVP implementations for RGB to YUV conversion
 * and YUV to RGB conversion. Following 2 sets of formulae are taken
 * from Keith Jack's excellent book "Video Demystified" (ISBN 1-878707-09-4).
 *
 * RGB to YUV Conversion
 * Y  =      (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
 * Cr = V =  (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
 * Cb = U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
 *
 * YUV to RGB Conversion
 * B = 1.164(Y - 16)                   + 2.018(U - 128)
 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
 * R = 1.164(Y - 16) + 1.596(V - 128)
 *
 * ********************************************************************************* */


 

/* *********************************************************************************
 * FUNCTION: xvConvertRgbToYuv()
 *
 *      IVP-EP vectorized implementation to convert from RGB pixel values to YUV format.
 *
 * DESCRIPTION
 *
 *      RGB to YUV Conversion
 *      Y  =      (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
 *      Cr = V =  (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
 *      Cb = U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
 *
 *      Floating point Constants in the above equations are converted to
 *      fixed point by multiplying with 128.
 *
 * INPUTS:
 *      uint8_t *pr				pointer to input component R
 *      uint8_t *pg				pointer to input component G
 *      uint8_t *pb				pointer to input component B
 *      uint32_t width          image width
 *      uint32_t height         image height
 *
 * OUTPUTS:
 *      uint8_t * pyuv			pointer to output image (yuv)
 *
 * ********************************************************************************* */
int32_t xvConvertRgbToYuv(uint8_t *pr, uint8_t *pg, uint8_t *pb, uint8_t *pyuv, int32_t width, int32_t height)
{
  int32_t i, j;
  uint8_t *py = (uint8_t *) pyuv;
  uint8_t *pu = pyuv + (width * height);
  uint8_t *pv = pyuv + (2 * width * height);
  xb_vec2Nx8 dvec128, dvec16;

  xb_vec2Nx8 * __restrict pdvecDataR;
  xb_vec2Nx8 * __restrict pdvecDataG;
  xb_vec2Nx8 * __restrict pdvecDataB;

  xb_vec2Nx8 * __restrict pdvecDataY;
  xb_vec2Nx8 * __restrict pdvecDataV;
  xb_vec2Nx8 * __restrict pdvecDataU;

  xb_vec2Nx8 * __restrict pdvecDataRTemp;
  xb_vec2Nx8 * __restrict pdvecDataGTemp;
  xb_vec2Nx8 * __restrict pdvecDataBTemp;

  xb_vec2Nx8 * __restrict pdvecDataYTemp;
  xb_vec2Nx8 * __restrict pdvecDataVTemp;
  xb_vec2Nx8 * __restrict pdvecDataUTemp;


  xb_vec2Nx8 dvecYc1, dvecYc2, dvecYc3;
  xb_vec2Nx8 dvecVc1, dvecVc2, dvecVc3;
  xb_vec2Nx8 dvecUc1, dvecUc2, dvecUc3;

  xb_vec2Nx8 dvecR, dvecG, dvecB;
  xb_vec2Nx8 dvecY, dvecU, dvecV;
  xb_vec2Nx24 daccSumY, daccSumV, daccSumU;
  int32_t shift = 7;
  dvecYc1 = YC1;
  dvecYc2 = YC2;
  dvecYc3 = YC3;

  dvecVc1 = VC1;
  dvecVc2 = VC2;
  dvecVc3 = VC3;

  dvecUc1 = UC1;
  dvecUc2 = UC2;
  dvecUc3 = UC3;

  pdvecDataR = (xb_vec2Nx8 *) pr;
  pdvecDataG = (xb_vec2Nx8 *) pg;
  pdvecDataB = (xb_vec2Nx8 *) pb;

  pdvecDataY = (xb_vec2Nx8 *) py;
  pdvecDataU = (xb_vec2Nx8 *) pu;
  pdvecDataV = (xb_vec2Nx8 *) pv;

  dvec128 = 128;
  dvec16  = 16;
  for (i = 0; i < (width / (XCHAL_IVPN_SIMD_WIDTH*2)); i++)
  {
    pdvecDataRTemp = pdvecDataR++;
    pdvecDataGTemp = pdvecDataG++;
    pdvecDataBTemp = pdvecDataB++;
    pdvecDataYTemp = pdvecDataY++;
    pdvecDataVTemp = pdvecDataV++;
    pdvecDataUTemp = pdvecDataU++;

    for (j = 0; j < height; j++)
    {
      IVP_LV2NX8_XP(dvecR, pdvecDataRTemp, width);
      IVP_LV2NX8_XP(dvecG, pdvecDataGTemp, width);
      IVP_LV2NX8_XP(dvecB, pdvecDataBTemp, width);
      // calculate Y
      // Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
      daccSumY = IVP_MULUUP2NX8(dvecR, dvecYc1, dvecG, dvecYc2);
      IVP_MULUUA2NX8(daccSumY, dvecB, dvecYc3);
      dvecY = IVP_PACKVRU2NX24(daccSumY, shift);           //pack signed pixel values into xb_dvec2NX8
      dvecY = IVP_ADD2NX8(dvecY, dvec16);                  //add 16 with wrap around
      IVP_SV2NX8_XP(dvecY, pdvecDataYTemp, width);
      // V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
      daccSumV = IVP_MULUSP2NX8(dvecG, dvecVc2, dvecB, dvecVc3);
      IVP_MULUUA2NX8(daccSumV, dvecR, dvecVc1);

      dvecV = IVP_PACKVR2NX24(daccSumV, shift);            //pack signed pixel values into xb_dvec2NX8
      dvecV = IVP_ADD2NX8(dvecV, dvec128);                 //add 128 with wrap around
      IVP_SV2NX8_XP(dvecV, pdvecDataVTemp, width);

      // U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
      daccSumU = IVP_MULUSP2NX8(dvecG, dvecUc2, dvecB, dvecUc3);
      IVP_MULUSA2NX8(daccSumU, dvecR, dvecUc1);
      dvecU = IVP_PACKVR2NX24(daccSumU, shift);            //pack signed pixel values into xb_dvec2NX8
      dvecU = IVP_ADD2NX8(dvecU, dvec128);                 //add 128 with wrap around
      IVP_SV2NX8_XP(dvecU, pdvecDataUTemp, width);
    }
  }
  return(1);
}

/* *********************************************************************************
 * FUNCTION: xvConvertYuvToRgb()
 *
 *      IVP-EP vectorized implementation to convert from RGB pixel values to YUV format.
 *
 * DESCRIPTION
 *
 *      YUV to RGB Conversion
 *      R = 1.164(Y - 16) + 1.596(V - 128)
 *      B = 1.164(Y - 16) + 2.018(U - 128)
 *      G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
 *
 *
 *      Floating point Constants in the above equations are converted to
 *      fixed point by multiplying with 128.
 *
 * INPUTS:
 *      uint8_t * pyuv			pointer to input image (yuv)
 *      uint32_t width          image width
 *      uint32_t height         image height
 *
 * OUTPUTS:
 *      uint8_t *pr				pointer to output component R
 *      uint8_t *pg				pointer to output component G
 *      uint8_t *pb				pointer to output component B
 *
 * ********************************************************************************* */
int32_t xvConvertYuvToRgb(uint8_t *pyuv, uint8_t *pR, uint8_t *pG, uint8_t *pB, int32_t width, int32_t height)
{
  int32_t i, j;
  uint8_t *pY, *pV, *pU;
  int32_t shift, Const_0813_0391;
  int16_t Const_1164, Const_2018, Const_1596;
  xb_vec2Nx8 dvecY, dvecU, dvecV, dvecR, dvecG, dvecB;
  xb_vec2Nx8 dvecYm16, dvecUm126, dvecVm126;
  xb_vec2Nx24 daccY;
  xb_vec2Nx8 dvec16, dvec128;
  xb_vec2Nx8 * __restrict pdvecDataY;
  xb_vec2Nx8 * __restrict pdvecDataV;
  xb_vec2Nx8 * __restrict pdvecDataU;
  xb_vec2Nx8 * __restrict pdvecDataR;
  xb_vec2Nx8 * __restrict pdvecDataG;
  xb_vec2Nx8 * __restrict pdvecDataB;
  xb_vec2Nx8 * __restrict pdvecDataRTemp;
  xb_vec2Nx8 * __restrict pdvecDataGTemp;
  xb_vec2Nx8 * __restrict pdvecDataBTemp;
  xb_vec2Nx8 * __restrict pdvecDataYTemp;
  xb_vec2Nx8 * __restrict pdvecDataUTemp;
  xb_vec2Nx8 * __restrict pdvecDataVTemp;

  // pointers to planar y, u and v components
  pY = pyuv;
  pU = pyuv + (width * height);
  pV = pyuv + (width * height * 2);

  pdvecDataY = (xb_vec2Nx8 *) pY;
  pdvecDataV = (xb_vec2Nx8 *) pV;
  pdvecDataU = (xb_vec2Nx8 *) pU;

  pdvecDataR = (xb_vec2Nx8 *) pR;
  pdvecDataG = (xb_vec2Nx8 *) pG;
  pdvecDataB = (xb_vec2Nx8 *) pB;

  // load constants
  dvec16  = 16;
  dvec128 = 128;
  shift   = 7;

  Const_1164      = C1164;
  Const_2018      = C2018;
  Const_1596      = C1596;
  // Two 16b constants used in MULP are merged into one 32b constant as per syntax requirement of MULP
  Const_0813_0391 = (-1 * (C0813 << 16));
  Const_0813_0391 = XT_OR((Const_0813_0391 & 0xFFFF0000), ((C0391 * -1) & 0x0000FFFF));

  for (i = 0; i < (width / (XCHAL_IVPN_SIMD_WIDTH*2)); i++)
  {
    pdvecDataYTemp = pdvecDataY++;
    pdvecDataUTemp = pdvecDataU++;
    pdvecDataVTemp = pdvecDataV++;

    pdvecDataRTemp = pdvecDataR++;
    pdvecDataBTemp = pdvecDataB++;
    pdvecDataGTemp = pdvecDataG++;

    for (j = 0; j < height; j++)
    {
      IVP_LV2NX8_XP(dvecY, pdvecDataYTemp, width);
      IVP_LV2NX8_XP(dvecU, pdvecDataUTemp, width);
      IVP_LV2NX8_XP(dvecV, pdvecDataVTemp, width);
      dvecYm16  = dvecY - dvec16;
      dvecUm126 = dvecU - dvec128;
      dvecVm126 = dvecV - dvec128;

      //RED:R = 1.164(Y - 16) + 1.596(V - 128)
      daccY = IVP_MULUS2N8XR16(dvecYm16, Const_1164);
      IVP_MULA2N8XR16(daccY, dvecVm126, Const_1596);
      dvecR = IVP_PACKVRU2NX24(daccY, shift);               //pack signed pixel values into xb_dvec2NX8
      IVP_SV2NX8_XP(dvecR, pdvecDataRTemp, width);

      //Blue:  B = 1.164(Y - 16)+ 2.018(U - 128)
      daccY = IVP_MULUS2N8XR16(dvecYm16, Const_1164);
      IVP_MULA2N8XR16(daccY, dvecUm126, Const_2018);
      dvecB = IVP_PACKVRU2NX24(daccY, shift);               //pack signed pixel values into xb_dvec2NX8
      IVP_SV2NX8_XP(dvecB, pdvecDataBTemp, width);

      //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
      daccY = IVP_MULP2N8XR16(dvecVm126, dvecUm126, Const_0813_0391);
      IVP_MULUSA2N8XR16(daccY, dvecYm16, Const_1164);
      dvecG = IVP_PACKVRU2NX24(daccY, shift);               //pack signed pixel values into xb_dvec2NX8
      IVP_SV2NX8_XP(dvecG, pdvecDataGTemp, width);
    }
  }

  return(0);
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

月光下的麦克

您的犒赏是我最大的动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值