/*
 * This file contains the YUV output functions for YUV422 modes.
 */

#include <string.h>
#include "inttypes.h"
#include "ka_drawers.h"
#include <assert.h>

extern uint8_t ka_Y_Map[256];
extern uint8_t ka_UV_Map[256];

/**
 * Assumes src and dst are word aligned.
 */
static void translate(uint8_t* dst, uint8_t* src, int size, uint8_t* map)
{
  uint32_t* dst32 = (uint32_t*) dst;
  uint32_t* src32 = (uint32_t*) src;
  int i;

  for (i = size; i > 0;i-=4)
  {
    uint32_t vals = *src32++;
    uint32_t vald = map[vals >> 24];
    vald = map[(vals >> 16) & 0xff] | (vald << 8);
    vald = map[(vals >> 8) & 0xff] | (vald << 8);
    vald = map[vals & 0xff] | (vald << 8);
    *dst32++ = vald;
  }

  if (i > 0)
  {
    dst = (uint8_t*) dst32;
    src = (uint8_t*) src32;

    for (; i--;)
      *dst++ = map[*src++];
  }
}

/**
 * Converts an YUV422 source into a NV12 destination, 100% zoom.
 * NV12: Plane 0 is 8bpp Y, Plane 1 is 16bpp &Cr.Cb, 2x2 sub-sampled
 *
 * @param  paint  draw parameters
 */
void ka_drawyuv422_z1_NV12(const ka_paint_t* paint)
{
  int height;
  uint8_t *dst_y;
  uint16_t *dst_uv;
  uint8_t *y, *u, *v;
  int uv_skip, dst_uv_skip;
  int i;

  dst_y = paint->dst;
  dst_uv = (uint16_t*) paint->dst_u;
  dst_uv_skip = (paint->dst_u_bpr >> 1) - (paint->yc_width >> 1); // cf. int16_t

  y = paint->base.y;                // luminance address
  u = paint->base.cb;               // chroma b source address
  v = paint->base.cr;               // chroma r source address
  // Beware, destination is sub-sampled vertically, not source
  uv_skip = (paint->yc_width >> 1) + 2*paint->uv_skip;

  // Loop: copy 2 line of luma, 1 half-line of uv chroma
  for (height = paint->yc_height; height > 0; height -= 2)
  {
    // Copy luma row
    translate(dst_y, y, paint->yc_width, ka_Y_Map);
    y += paint->yc_bpr;
    dst_y += paint->dst_bpr;
    // Copy odd luma row (beware of uneven number of rows)
    if (height != 1)
    {
      translate(dst_y, y, paint->yc_width, ka_Y_Map);
      y += paint->yc_bpr;
      dst_y += paint->dst_bpr;
    }

    // Copy chroma, half-line of uv chroma
    // Note: should actually take mean of 2 uv lines
    for (i = paint->yc_width >> 1; i >= 8; i -= 8)
    {
      uint32_t val1, val2, val3, val4;

      val1 = ka_UV_Map[*u++];
      val1 |= ka_UV_Map[*u++] << 16;
      val2 = ka_UV_Map[*u++];
      val2 |= ka_UV_Map[*u++] << 16;
      val3 = ka_UV_Map[*u++];
      val3 |= ka_UV_Map[*u++] << 16;
      val4 = ka_UV_Map[*u++];
      val4 |= ka_UV_Map[*u++] << 16;

      val1 |= ka_UV_Map[*v++] << 8;
      val1 |= ka_UV_Map[*v++] << 24;
      val2 |= ka_UV_Map[*v++] << 8;
      val2 |= ka_UV_Map[*v++] << 24;
      val3 |= ka_UV_Map[*v++] << 8;
      val3 |= ka_UV_Map[*v++] << 24;
      val4 |= ka_UV_Map[*v++] << 8;
      val4 |= ka_UV_Map[*v++] << 24;

      ((uint32_t *)dst_uv)[0] = val1;
      ((uint32_t *)dst_uv)[1] = val2;
      ((uint32_t *)dst_uv)[2] = val3;
      ((uint32_t *)dst_uv)[3] = val4;
      dst_uv += 8;
    }

    while(i--)
    {
      *dst_uv++ = ka_UV_Map[*u++] | (ka_UV_Map[*v++] << 8);
    }

    u += uv_skip;
    v += uv_skip;
    dst_uv += dst_uv_skip;
  }

  assert(dst_y == paint->dst + paint->yc_height * paint->dst_bpr);
  assert(dst_uv == (uint16_t*) (paint->dst_u + (paint->yc_height >> 1) * paint->dst_u_bpr));
  assert(y == paint->base.y + paint->yc_height * paint->yc_bpr);
  assert(u == paint->base.cb + paint->yc_height * (paint->yc_bpr >> 1));
  assert(v == paint->base.cr + paint->yc_height * (paint->yc_bpr >> 1));
}

/**
 * Converts an YUV422 source into a NV21 destination, 100% zoom.
 * NV21: Plane 0 is 8bpp Y, Plane 1 is 16bpp &Cb.Cr, 2x2 sub-sampled
 *
 * @param  paint  draw parameters
 */
void ka_drawyuv422_z1_NV21(const ka_paint_t* paint)
{
  int height;
  uint8_t *dst_y;
  uint16_t *dst_uv;
  uint8_t *y, *u, *v;
  int uv_skip, dst_uv_skip;
  int i;

  dst_y = paint->dst;
  dst_uv = (uint16_t*) paint->dst_u;
  dst_uv_skip = (paint->dst_u_bpr >> 1) - (paint->yc_width >> 1); // cf. int16_t

  y = paint->base.y;                // luminance address
  u = paint->base.cb;               // chroma b source address
  v = paint->base.cr;               // chroma r source address
  // Beware, destination is sub-sampled vertically, not source
  uv_skip = (paint->yc_width >> 1) + 2*paint->uv_skip;

  // Loop: copy 2 line of luma, 1 half-line of uv chroma
  for (height = paint->yc_height; height > 0; height -= 2)
  {
    // Copy luma row
    translate(dst_y, y, paint->yc_width, ka_Y_Map);
    y += paint->yc_bpr;
    dst_y += paint->dst_bpr;
    // Copy odd luma row (beware of uneven number of rows)
    if (height != 1)
    {
      translate(dst_y, y, paint->yc_width, ka_Y_Map);
      y += paint->yc_bpr;
      dst_y += paint->dst_bpr;
    }

    // Copy chroma, half-line of uv chroma
    // Note: should actually take mean of 2 uv lines
    for (i = paint->yc_width >> 1; i >= 8; i -= 8)
    {
      uint32_t val1, val2, val3, val4;

      val1 = ka_UV_Map[*v++];
      val1 |= ka_UV_Map[*v++] << 16;
      val2 = ka_UV_Map[*v++];
      val2 |= ka_UV_Map[*v++] << 16;
      val3 = ka_UV_Map[*v++];
      val3 |= ka_UV_Map[*v++] << 16;
      val4 = ka_UV_Map[*v++];
      val4 |= ka_UV_Map[*v++] << 16;

      val1 |= ka_UV_Map[*u++] << 8;
      val1 |= ka_UV_Map[*u++] << 24;
      val2 |= ka_UV_Map[*u++] << 8;
      val2 |= ka_UV_Map[*u++] << 24;
      val3 |= ka_UV_Map[*u++] << 8;
      val3 |= ka_UV_Map[*u++] << 24;
      val4 |= ka_UV_Map[*u++] << 8;
      val4 |= ka_UV_Map[*u++] << 24;

      ((uint32_t *)dst_uv)[0] = val1;
      ((uint32_t *)dst_uv)[1] = val2;
      ((uint32_t *)dst_uv)[2] = val3;
      ((uint32_t *)dst_uv)[3] = val4;
      dst_uv += 8;
    }

    while(i--)
    {
      *dst_uv++ = ka_UV_Map[*v++] | (ka_UV_Map[*u++] << 8);
    }

    u += uv_skip;
    v += uv_skip;
    dst_uv += dst_uv_skip;
  }

  assert(dst_y == paint->dst + paint->yc_height * paint->dst_bpr);
  assert(dst_uv == (uint16_t*) (paint->dst_u + (paint->yc_height >> 1) * paint->dst_u_bpr));
  assert(y == paint->base.y + paint->yc_height * paint->yc_bpr);
  assert(u == paint->base.cb + paint->yc_height * (paint->yc_bpr >> 1));
  assert(v == paint->base.cr + paint->yc_height * (paint->yc_bpr >> 1));
}

/**
 * Converts an YUV422 source into a UYVY destination, 100% zoom.
 * UYVY: 32bpp words of &Y1.Cr.Y0.Cb, 2x1 sub-sampled
 *
 * @param  paint  draw parameters
 */
void ka_drawyuv422_z1_UYVY(const ka_paint_t* paint)
{
  int height;
  uint32_t *dst;
  uint8_t *y, *u, *v;
  int dst_skip, yc_skip, uv_skip;
  int i;

  dst = (uint32_t*) paint->dst;    // even row screen address

  y = paint->base.y;               // even row luminance address
  u = paint->base.cb;              // chroma b source address
  v = paint->base.cr;              // chroma r source address

  dst_skip = paint->dst_skip >> 2; // cf. int32_t
  yc_skip = paint->yc_skip;
  uv_skip = paint->uv_skip;

  // 1 row at a time
  for (height = paint->yc_height; height--;)
  {
    register uint32_t yuv1, yuv2;

    // 4 pixels at a time
    for (i = paint->yc_width >> 2; i--;)
    {
      // Chroma for the first block of top 2 pixels
      yuv1 = ka_UV_Map[*u++] | (ka_UV_Map[*v++] << 16);

      // Chroma for the second block of top 2 pixels
      yuv2 = ka_UV_Map[*u++] | (ka_UV_Map[*v++] << 16);

      // Top row
      yuv1 |= ka_Y_Map[*y++] << 8;
      yuv1 |= ka_Y_Map[*y++] << 24;
      yuv2 |= ka_Y_Map[*y++] << 8;
      yuv2 |= ka_Y_Map[*y++] << 24;

      *dst++ = yuv1;
      *dst++ = yuv2;
    }

    // 2 pixels at a time
    if (paint->yc_width & 2)
    {
      // Chroma for the block of 2 pixels
      yuv1 = ka_UV_Map[*u++] | (ka_UV_Map[*v++] << 16);
      // Top row
      yuv1 |= ka_Y_Map[*y++] << 8;
      yuv1 |= ka_Y_Map[*y++] << 24;
      *dst++ = yuv1;
    }

    u += uv_skip;
    v += uv_skip;
    y += yc_skip;
    dst += dst_skip;
  }

  assert(dst == (uint32_t*) (paint->dst + paint->yc_height * paint->dst_bpr));
  assert(y == paint->base.y + paint->yc_height * paint->yc_bpr);
  assert(u == paint->base.cb + paint->yc_height * (paint->yc_bpr >> 1));
  assert(v == paint->base.cr + paint->yc_height * (paint->yc_bpr >> 1));
}

/**
 * Converts an YUV422 source into a YUY2 destination, 100% zoom.
 * YUY2: 32bpp words of &Cr.Y1.Cb.Y0, 2x1 sub-sampled
 *
 * @param  paint  draw parameters
 */
void ka_drawyuv422_z1_YUY2(const ka_paint_t* paint)
{
  int height;
  uint32_t *dst;
  uint8_t *y, *u, *v;
  int dst_skip, yc_skip, uv_skip;
  int i;

  dst = (uint32_t*) paint->dst;               // even row screen address

  y = paint->base.y;               // even row luminance address
  u = paint->base.cb;               // chroma b source address
  v = paint->base.cr;               // chroma r source address

  dst_skip = paint->dst_skip >> 2; // cf. int32_t
  yc_skip = paint->yc_skip;
  uv_skip = paint->uv_skip;

  // 1 row at a time
  for (height = paint->yc_height; height--;)
  {
    register uint32_t yuv1, yuv2;

    // 4 pixels at a time
    for (i = paint->yc_width >> 2; i--;)
    {
      // Chroma for the first block of top 2 pixels
      yuv1 = (ka_UV_Map[*u++] << 8) | (ka_UV_Map[*v++] << 24);

      // Chroma for the second block of top 2 pixels
      yuv2 = (ka_UV_Map[*u++] << 8) | (ka_UV_Map[*v++] << 24);

      // Top row
      yuv1 |= ka_Y_Map[*y++] << 0;
      yuv1 |= ka_Y_Map[*y++] << 16;
      yuv2 |= ka_Y_Map[*y++] << 0;
      yuv2 |= ka_Y_Map[*y++] << 16;

      *dst++ = yuv1;
      *dst++ = yuv2;
    }

    // 2 pixels at a time
    if (paint->yc_width & 2)
    {
      // Chroma for the block of 2 pixels
      yuv1 = (ka_UV_Map[*u++] << 8) | (ka_UV_Map[*v++] << 24);
      // Top row
      yuv1 |= ka_Y_Map[*y++] << 0;
      yuv1 |= ka_Y_Map[*y++] << 16;
      *dst++ = yuv1;
    }

    u += uv_skip;
    v += uv_skip;
    y += yc_skip;
    dst += dst_skip;
  }

  assert(dst == (uint32_t*) (paint->dst + paint->yc_height * paint->dst_bpr));
  assert(y == paint->base.y + paint->yc_height * paint->yc_bpr);
  assert(u == paint->base.cb + paint->yc_height * (paint->yc_bpr >> 1));
  assert(v == paint->base.cr + paint->yc_height * (paint->yc_bpr >> 1));
}

/**
 * Converts an YUV422 source into a YV12 destination, 100% zoom.
 * YV12: Plane 0 is 8bpp Y, Plane 1 Cb, Plane 2 Cr, 2x2 sub-sampled
 *
 * @param  paint  draw parameters
 */
void ka_drawyuv422_z1_YV12(const ka_paint_t* paint)
{
  int height;
  uint8_t *dst_y, *dst_u, *dst_v;
  uint8_t *y, *u, *v;
  int uv_delta;

  dst_y = paint->dst;
  dst_u = paint->dst_u;
  dst_v = paint->dst_v;

  y = paint->base.y;                // luminance address
  u = paint->base.cb;               // chroma b source address
  v = paint->base.cr;               // chroma r source address
  // Beware, destination is sub-sampled vertically, not source
  uv_delta = paint->yc_width + 2*paint->uv_skip; // 2 u/v lines

  // Loop: copy 2 lines of luma, 1 half-line of u/v chroma
  for (height = paint->yc_height; height > 0; height -= 2)
  {
    // Copy luma row
    translate(dst_y, y, paint->yc_width, ka_Y_Map);
    y += paint->yc_bpr;
    dst_y += paint->dst_bpr;
    // Copy odd luma row (beware of uneven number of rows)
    if (height != 1)
    {
      translate(dst_y, y, paint->yc_width, ka_Y_Map);
      y += paint->yc_bpr;
      dst_y += paint->dst_bpr;
    }

    // Copy chroma, 1 half-line of u chroma, 1 half-line of v chroma
    // Note: should actually also take mean of 2 u/v lines.
    translate(dst_u, u, paint->yc_width >> 1, ka_UV_Map);
    translate(dst_v, v, paint->yc_width >> 1, ka_UV_Map);
    dst_u += paint->dst_u_bpr;
    dst_v += paint->dst_v_bpr;
    u += uv_delta;
    v += uv_delta;
  }

  assert(dst_y == paint->dst + paint->yc_height * paint->dst_bpr);
  assert(dst_u == paint->dst_u + (paint->yc_height >> 1) * paint->dst_u_bpr);
  assert(dst_v == paint->dst_v + (paint->yc_height >> 1) * paint->dst_v_bpr);
  assert(y == paint->base.y + paint->yc_height * paint->yc_bpr);
  assert(u == paint->base.cb + paint->yc_height * (paint->yc_bpr >> 1));
  assert(v == paint->base.cr + paint->yc_height * (paint->yc_bpr >> 1));
}

/**
 * Converts an YUV422 source into a YV16 destination, 100% zoom.
 * YV16: Plane 0 is 8bpp Y, Plane 1 Cb, Plane 2 Cr, 2x1 sub-sampled
 *
 * @param  paint  draw parameters
 */
void ka_drawyuv422_z1_YV16(const ka_paint_t* paint)
{
  int height;
  uint8_t *dst_y, *dst_u, *dst_v;
  uint8_t *y, *u, *v;
  int uv_delta;

  dst_y = paint->dst;
  dst_u = paint->dst_u;
  dst_v = paint->dst_v;

  y = paint->base.y;                // luminance address
  u = paint->base.cb;               // chroma b source address
  v = paint->base.cr;               // chroma r source address
  uv_delta = (paint->yc_width >> 1) + paint->uv_skip; // 1 u/v line

  // Loop: copy 1 lines of luma, 1 half-line of u/v chroma
  for (height = paint->yc_height; height > 0; height--)
  {
    // Copy luma row
    translate(dst_y, y, paint->yc_width, ka_Y_Map);
    y += paint->yc_bpr;
    dst_y += paint->dst_bpr;

    // Copy chroma, half-line of u chroma, half-line of v chroma
    // source sub-sampling = destination sub-sampling
    translate(dst_u, u, paint->yc_width >> 1, ka_UV_Map);
    translate(dst_v, v, paint->yc_width >> 1, ka_UV_Map);
    dst_u += paint->dst_u_bpr;
    dst_v += paint->dst_v_bpr;
    u += uv_delta;
    v += uv_delta;
  }

  assert(dst_y == paint->dst + paint->yc_height * paint->dst_bpr);
  assert(dst_u == paint->dst_u + paint->yc_height * paint->dst_u_bpr);
  assert(dst_v == paint->dst_v + paint->yc_height * paint->dst_v_bpr);
  assert(y == paint->base.y + paint->yc_height * paint->yc_bpr);
  assert(u == paint->base.cb + paint->yc_height * (paint->yc_bpr >> 1));
  assert(v == paint->base.cr + paint->yc_height * (paint->yc_bpr >> 1));
}
