NV12转BGR24算法总结

2019年11月4日 205次阅读

概述

项目中用到yuv NV12转BGR24的算法，总结了几种常用算法代码如下。

直接转换

//NV 12 转BGR
void NV12_T_BGR(unsigned int width, unsigned int height, unsigned char *yuyv,
         unsigned char *bgr) {
    const int nv_start = width * height;
    int i, j, index = 0, rgb_index = 0;
    unsigned char y, u, v;
    int r, g, b, nv_index = 0;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++) {
            //nv_index = (rgb_index / 2 - width / 2 * ((i + 1) / 2)) * 2;
            nv_index = i / 2 * width + j - j % 2;

            y = yuyv[rgb_index];
            v = yuyv[nv_start + nv_index];
            u = yuyv[nv_start + nv_index + 1];
// u = yuyv[nv_start + nv_index ];
// v = yuyv[nv_start + nv_index + 1];

            r = y + (140 * (v - 128)) / 100;  //r
            g = y - (34 * (u - 128)) / 100 - (71 * (v - 128)) / 100; //g
            b = y + (177 * (u - 128)) / 100; //b

            if (r > 255)
                r = 255;
            if (g > 255)
                g = 255;
            if (b > 255)
                b = 255;
            if (r < 0)
                r = 0;
            if (g < 0)
                g = 0;
            if (b < 0)
                b = 0;

            index = rgb_index % width + (height - i - 1) * width;
            bgr[index * 3 + 2] = r;
            bgr[index * 3 + 1] = g;
            bgr[index * 3 + 0] = b;
            rgb_index++;
        }
    }

}

直接转换（查表优化）

static int crv_tab[256];
static int cbu_tab[256];
static int cgu_tab[256];
static int cgv_tab[256];
static int tab_76309[256];
static unsigned char clp[1024];   //for clip in CCIR601
static int inityuv420p = 0;
void init_yuv420p_table()
{
    int crv,cbu,cgu,cgv;
    int i,ind;


    if (inityuv420p == 1) return;

    crv = 104597; cbu = 132201;  /* fra matrise i global.h */
    cgu = 25675;  cgv = 53279;

    for (i = 0; i < 256; i++)
    {
        crv_tab[i] = ((i-128) * crv);
        cbu_tab[i] = ((i-128) * cbu);
        cgu_tab[i] = ((i-128) * cgu);
        cgv_tab[i] = ((i-128) * cgv);
        tab_76309[i] = ((76309*(i-16)));
    }

    for (i = 0; i < 384; i++)
        clp[i] = 0;
    ind = 384;
    for (i = 0;i < 256; i++)
        clp[ind++] = i;
    ind = 640;
    for (i = 0;i < 384; i++)
        clp[ind++] = 255;

    inityuv420p = 1;
}

void yuv420sp_to_rgb24(int type, unsigned char* yuvbuffer,unsigned char* rgbbuffer, int width,int height)
{
    if (inityuv420p != 1) init_yuv420p_table();
    int y1, y2, u = 0, v = 0;
    unsigned char *py1, *py2;
    int i, j, c1, c2, c3, c4;
    unsigned char *d1, *d2;
    unsigned char *src_u;

    src_u = yuvbuffer + width * height;   // u

    py1 = yuvbuffer;                      // y
    py2 = py1 + width;

    d2 = rgbbuffer + 3 * width * (height - 2);
    d1 = d2 + 3 * width;

    int dotline = 9 * width;
    static unsigned char *pclp384 = clp + 384;
    for (j = 0; j < height; j += 2)
    {
        for (i = 0; i < width; i += 2)
        {
            u = *src_u++;
            v = *src_u++;      // v紧跟u，在u的下一个位置

            c1 = crv_tab[v];
            //c2 = cgu_tab[u];
            //c3 = cgv_tab[v];
            c4 = cbu_tab[u];

            int c2c3 = cgu_tab[u] + cgv_tab[v];
            //RGB

            //up-left
            /*y1 = tab_76309[*py1++];
            *d1++ = clp[384+((y1 + c1)>>16)];
            *d1++ = clp[384+((y1 - c2 - c3)>>16)];
            *d1++ = clp[384+((y1 + c4)>>16)];

            //down-left
            y2 = tab_76309[*py2++];
            *d2++ = clp[384+((y2 + c1)>>16)];
            *d2++ = clp[384+((y2 - c2 - c3)>>16)];
            *d2++ = clp[384+((y2 + c4)>>16)];

            //up-right
            y1 = tab_76309[*py1++];
            *d1++ = clp[384+((y1 + c1)>>16)];
            *d1++ = clp[384+((y1 - c2 - c3)>>16)];
            *d1++ = clp[384+((y1 + c4)>>16)];

            //down-right
            y2 = tab_76309[*py2++];
            *d2++ = clp[384+((y2 + c1)>>16)];
            *d2++ = clp[384+((y2 - c2 - c3)>>16)];
            *d2++ = clp[384+((y2 + c4)>>16)];*/

            y1 = tab_76309[*py1++];
            *d1++ = pclp384[((y1 + c1))>>16];
            *d1++ = pclp384[((y1 - c2c3))>>16];
            *d1++ = pclp384[((y1 + c4))>>16];

            //down-left
            y2 = tab_76309[*py2++];
            *d2++ = pclp384[((y2 + c1))>>16];
            *d2++ = pclp384[((y2 - c2c3))>>16];
            *d2++ = pclp384[((y2 + c4))>>16];

            //up-right
            y1 = tab_76309[*py1++];
            *d1++ = pclp384[((y1 + c1))>>16];
            *d1++ = pclp384[((y1 - c2c3))>>16];
            *d1++ = pclp384[((y1 + c4))>>16];

            //down-right
            y2 = tab_76309[*py2++];
            *d2++ = pclp384[((y2 + c1))>>16];
            *d2++ = pclp384[((y2 - c2c3))>>16];
            *d2++ = pclp384[((y2 + c4))>>16];
        }

        d1  -= dotline;
        d2  -= dotline;

        py1 += width;
        py2 += width;
    }
}

Neon优化

#include "arm_neon.h"
const uint8_t Y_SUBS[8] = { 16, 16, 16, 16, 16, 16, 16, 16 };
const uint8_t UV_SUBS[8] = { 128, 128, 128, 128, 128, 128, 128, 128 };
void yv12_to_rgb24_neon(unsigned char *src, int width, int height, unsigned char *RGBOut) //仅仅支持宽度为16的倍数的数据帧
{
    int i, j;
    int nWH = width * height;
    unsigned char *pY1 = src;
    unsigned char *pY2 = src + width;
    unsigned char *pUV = src + nWH;

    uint8x8_t Y_SUBvec = vld1_u8(Y_SUBS);
    uint8x8_t UV_SUBvec = vld1_u8(UV_SUBS);

    //int width2 = width >> 1;
    int width3 = (width << 2) - width;
    int width9 = (width << 3) + width;
    //unsigned char *RGBOut1 = RGBOut;
   // unsigned char *RGBOut2 = RGBOut1 + width3;
    unsigned char *RGBOut1 = RGBOut + 3 * width * (height - 2);
    unsigned char *RGBOut2 = RGBOut1 + width3;

    unsigned char tempUV[8];
    // YUV 4:2:0
    for (j = 0; j < height; j += 2)
    {
        for (i = 0; i < width; i += 8)
        {
            tempUV[0] = pUV[0];
            tempUV[1] = pUV[2];
            tempUV[2] = pUV[4];
            tempUV[3] = pUV[6];

            tempUV[4] = pUV[1];
            tempUV[5] = pUV[3];
            tempUV[6] = pUV[5];
            tempUV[7] = pUV[7];

            pUV += 8;

            uint8x8_t nUVvec = vld1_u8(tempUV);
            int16x8_t nUVvec16 = vmovl_s8(vsub_s8(nUVvec, UV_SUBvec));//减后区间-128到127
            int16x4_t V_4 = vget_low_s16(nUVvec16);
            int16x4x2_t V16x4x2 = vzip_s16(V_4, V_4);
            //int16x8_t V16x8_;
            //memcpy(&V16x8_, &V16x4x2, 16);
            //int16x8_t* V16x8 = (int16x8_t*)(&V16x8_);
            int16x8_t* V16x8 = (int16x8_t*)(&V16x4x2);
            int16x4_t U_4 = vget_high_s16(nUVvec16);
            int16x4x2_t U16x4x2 = vzip_s16(U_4, U_4);

            int16x8_t* U16x8 = (int16x8_t*)(&U16x4x2);

            //公式1
            int16x8_t VV1 = vmulq_n_s16(*V16x8, 102);
            int16x8_t UU1 = vmulq_n_s16(*U16x8, 129);
            int16x8_t VVUU1 = vmlaq_n_s16(vmulq_n_s16(*V16x8, 52), *U16x8, 25);



            uint8x8_t nYvec;
            uint8x8x3_t RGB;
            uint16x8_t Y16;
             //上行
            nYvec = vld1_u8(pY1);
            pY1 += 8;
            //公式1
            Y16 = vmulq_n_u16(vmovl_u8(vqsub_u8(nYvec, Y_SUBvec)), 74);//公式1

            RGB.val[0] = vqmovun_s16(vshrq_n_s16(vaddq_u16(Y16, UU1), 6));
            RGB.val[1] = vqmovun_s16(vshrq_n_s16(vsubq_u16(Y16, VVUU1), 6));
            RGB.val[2] = vqmovun_s16(vshrq_n_s16(vaddq_u16(Y16, VV1), 6));
            vst3_u8(RGBOut1, RGB);
            RGBOut1 += 24;

            //下行
            nYvec = vld1_u8(pY2);
            pY2 += 8;
            //公式1
            Y16 = vmulq_n_u16(vmovl_u8(vqsub_u8(nYvec, Y_SUBvec)), 74);//公式1
            RGB.val[0] = vqmovun_s16(vshrq_n_s16(vaddq_u16(Y16, UU1), 6));
            RGB.val[1] = vqmovun_s16(vshrq_n_s16(vsubq_u16(Y16, VVUU1), 6));
            RGB.val[2] = vqmovun_s16(vshrq_n_s16(vaddq_u16(Y16, VV1), 6));
            vst3_u8(RGBOut2, RGB);
            RGBOut2 += 24;
        }
        //RGBOut1 += width3;
        //RGBOut2 += width3;
        RGBOut1 -= width9;
        RGBOut2 -= width9;
        pY1 += width;
        pY2 += width;
    }
}

使用ffmpeg中的转换函数实现

//ffmpeg变量
AVPicture pFrameYUV,pFrameBGR;
uint8_t * ptmp;
struct SwsContext* imgCtx = NULL;

void Init(){
//初始化ffmpeg
{
//LOGI_MATRIX(“Start->Sws”);
imgCtx = sws_getContext(instance->pWidth,instance->pHeight,AV_PIX_FMT_NV21,instance->pWidth,instance->pHeight,PIX_FMT_BGR24,SWS_BILINEAR,0,0,0);

}

void YV12ToBGR24_FFmpeg(unsigned char* pYUV,unsigned char* pBGR24,int width,int height)
{
    if (width < 1 || height < 1 || pYUV == NULL || pBGR24 == NULL)
        return ;
    //int srcNumBytes,dstNumBytes;
    //uint8_t *pSrc,*pDst;
    //LOGI_MATRIX("Start->trans");


    //pFrameYUV = avpicture_alloc();
    //srcNumBytes = avpicture_get_size(PIX_FMT_YUV420P,width,height);
    //pSrc = (uint8_t *)malloc(sizeof(uint8_t) * srcNumBytes);
    // LOGI_MATRIX("Start->avpicture_fill");

         avpicture_fill(&pFrameYUV,pYUV,AV_PIX_FMT_NV12,instance->pWidth,instance->pHeight);
                //U,V互换
        // ptmp =pFrameYUV.data[1];
        // pFrameYUV.data[1]=pFrameYUV.data[2];
        // pFrameYUV.data [2]=ptmp;

                //pFrameBGR = avcodec_alloc_frame();
                //dstNumBytes = avpicture_get_size(PIX_FMT_BGR24,width,height);
                //pDst = (uint8_t *)malloc(sizeof(uint8_t) * dstNumBytes);
                 // LOGI_MATRIX("Start->avpicture_fill");

    if (imgCtx != NULL){
    // LOGI_MATRIX("imgCtx->Sws");
        sws_scale(imgCtx,pFrameYUV.data,pFrameYUV.linesize,0,height,pFrameBGR.data,pFrameBGR.linesize);
        //
        //if(imgCtx){
          // sws_freeContext(imgCtx);
          // imgCtx = NULL;
      // }
        return ;
    }
    else{
        LOGI_MATRIX("imgCtx==NULL");
        //sws_freeContext(imgCtx);
       // imgCtx = NULL;
        return ;
    }
}

ffmpeg编译测试博客链接
希望对您有所帮助！