;
; yuv444_argb32.s
; Copyright (C) 2002 P.Everett <peter@everett9981.freeserve.co.uk>
;
; This file is part of KinoAMP, a free RISCOS MPEG program stream decoder.
;
; KinoAMP is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; KinoAMP is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
;

 GET hdr.ka_drawer

; Thanks to Andr Timmermans for some improvements, and the zoom mode code.

; yuv-rgb matrix conversion and display output functions for 32 bit
; colour depth displays.
;
; The colour space conversion equations are,
;
;   Ur  = KUr(u - 128)
;   Vb  = KVb(v - 128)
;   UVg = KUg(u - 128) + KVg(v - 128)
;   Yc  = KYc(y - 16)
;    r  = Yc + Ur
;    g  = Yc - UVg
;    b  = Yc + Vc
;
; where (with 10 bit scaling),
;   KYc = 1192 = 1.164 * 1024
;   KUr = 1634 = 1.596 * 1024
;   KUg =  833 = 0.813 * 1024
;   KVb = 2066 = 2.018 * 1024
;   KVg =  400 = 0.391 * 1024
;
; These calculations are performed on initialisation and are stored in the
; ka_U_Table, ka_V_Table, and ka_Yc_Table. See the file custom.c
;
; Register and stack usage for both functions,
;
; on input to loops,
; v4 = dst1          even line screen address
; v5 = bytesperrow   vertical display step
; v6 = y1            even line luminance input address
; a3 = u             Cr chroma input address
; a4 = v             Cb chroma input address
; [sp+ 0] = uv_skip
; [sp+ 4] = yc_skip
; [sp+ 8] = dst_skip
; [sp+12] = width
; [sp+16] = height
;
; within loops,
; sl = Ur
; a2 = UVg
; a1 = Vb
; fp = Yc
; v2 = pix
; v1 = temp
; v3 = width
;
; note. The input arrays are byte arrays and the screen memory is a word array.
;
dst_skip            EQU  0 ; + sp        (paint + 32)
yc_skip             EQU  4 ; + sp        (paint + 36)
uv_skip             EQU  8 ; + sp        (paint + 40)
yc_width            EQU 12 ; + sp        (paint + 28)
yc_height           EQU 16 ; + sp        (paint + 24)

; In  a3 = u ptr
;     a4 = v ptr
;     ip = Yc0
;
; Out sl = Yc0 + Ur
;     a2 = Yc0 - (Ug + Vg)
;     a1 = Yc0 + Vb
;     a3 += 1
;     a4 += 1
;
; Corrupts v1

  MACRO
  Chroma $off
  LDRB  a1, [a3], #$off     ; temp = *u++
  LDRB  a2, [a4], #$off     ; temp = *v++
  LDR   v1, U_Tab           ; U table
  LDR   sl, [v1, a1, lsl #2]; (Ur<<16 + Ug)
  ADD   v1, v1, #256<<2     ; V table
  LDR   a1, [v1, a2, lsl #2]; (Vb<<16 + Vg)
  MOV   a2, sl, lsl #16     ; Ug
  ADD   sl, ip, sl, asr #16 ; Yc0 + Ur
  ADD   a2, a2, a1, lsl #16 ; Ug + Vg
  ADD   a1, ip, a1, asr #16 ; Yc0 + Vb
  SUB   a2, ip, a2, asr #16 ; Yc0 - (Ug + Vg)
  MEND

  AREA |A$$code|, CODE, READONLY

  IMPORT ka_U_Table
  IMPORT ka_Yc0_Table_8bit

  ALIGN 32
U_Tab        DCD ka_U_Table
Yc0_Tab      DCD ka_Yc0_Table_8bit


; ka_drawyuv444_z05_argb32 : 16m cols 50% zoom
; -------------------------
  EXPORT ka_drawyuv444_z05_argb32
  ALIGN 32
ka_drawyuv444_z05_argb32
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   v2, v2, v3          ; uv_skip += yc_bpr
  add   v1, v1, v3          ; we handle two lines in one go
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height_05loop
  str   lr, [sp, #yc_height]
width_05loop

  ldrb  fp, [v6], #2        ; Yc = *y1++

; pixel luminance
  Chroma 2
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display pixel
  str   v2, [v4], #4        ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width_05loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #2          ; height--
  bgt   height_05loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

; ka_drawyuv444_z1_argb32 : 16m cols 100% zoom
; ------------------------
  EXPORT ka_drawyuv444_z1_argb32
  ALIGN 32
ka_drawyuv444_z1_argb32
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height_loop
  str   lr, [sp, #yc_height]
width_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display west pixel
  str   v2, [v4], #4        ; *dst = pix, then right

; E pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display east pixel
  str   v2, [v4], #4        ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

; ka_drawyuv444_z2_argb32 : 16m cols 200% zoom
; ------------------------
  EXPORT ka_drawyuv444_z2_argb32
  ALIGN 32
ka_drawyuv444_z2_argb32
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5          ; dst_skip += dst_bpr
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height2_loop
  str   lr, [sp, #yc_height]
width2_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display west 4 pixels
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], #8        ; *dst = pix, then right

; E pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display east 4 pixels
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], #8        ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width2_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height2_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

; ka_drawyuv444_z3_argb32 : 16m cols 300% zoom
; ------------------------
  EXPORT ka_drawyuv444_z3_argb32
  ALIGN 32
ka_drawyuv444_z3_argb32
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5, lsl #1  ; dst_skip += 2*dst_bpr
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height3_loop
  str   lr, [sp, #yc_height]
width3_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display west 9 pixels
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4], #12       ; *dst = pix, then right

; E pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display east 9 pixels
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4], #12       ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width3_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height3_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

; ka_drawyuv444_z4_argb32 : 16m cols 400% zoom
; ------------------------
  EXPORT ka_drawyuv444_z4_argb32
  ALIGN 32
ka_drawyuv444_z4_argb32
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5, lsl #1  ; dst_skip += 3*dst_bpr
  add   a4, a4, v5
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height4_loop
  str   lr, [sp, #yc_height]
width4_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display west 16 pixels
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], #16       ; *dst = pix, then right

; E pixel luminance
  Chroma 1
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #8
  orr   v2, v2, lr, lsl #16
  orr   v2, v2, #&ff000000
; display east 16 pixels
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4, #8]        ; *(dst + 8) = pix
  str   v2, [v4, #12]       ; *(dst + 12) = pix
  str   v2, [v4], #16       ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width4_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height4_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

  ALIGN 32
  END
