;
; yuv444_abgr15.s
; Copyright (C) 2002 P.Everett <peter@everett9981.freeserve.co.uk>
;
; This file is part of KinoAMP, a free RISCOS MPEG program stream decoder.
;
; KinoAMP is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; KinoAMP is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
;

 GET hdr.ka_drawer

; Thanks to Andr Timmermans for some improvements, and the zoom mode code.

; yuv-rgb matrix conversion and display output functions for 16 bit
; colour depth displays.
;
; The colour space conversion equations are,
;
;   Ur  = KUr(u - 128)
;   Vb  = KVb(v - 128)
;   UVg = KUg(u - 128) + KVg(v - 128)
;   Yc  = KYc(y - 16)
;    r  = Yc + Ur
;    g  = Yc - UVg
;    b  = Yc + Vc
;
; where (with 10 bit scaling),
;   KYc = 1192 = 1.164 * 1024
;   KUr = 1634 = 1.596 * 1024
;   KUg =  833 = 0.813 * 1024
;   KVb = 2066 = 2.018 * 1024
;   KVg =  400 = 0.391 * 1024
;
; These calculations are performed on initialisation and are stored in the
; ka_U_Table, ka_V_Table, and ka_Yc_Table. See the file custom.c
;
; Register and stack usage for both functions,
;
; on input to loops,
; v4 = dst1          even line screen address
; v5 = bytesperrow   vertical display step
; v6 = y1            even line luminance input address
; a3 = u             Cr chroma input address
; a4 = v             Cb chroma input address
; [sp+ 0] = uv_skip
; [sp+ 4] = yc_skip
; [sp+ 8] = dst_skip
; [sp+12] = width
; [sp+16] = height
;
; within loops,
; a1 = Ur
; a2 = UVg
; sl = Vb
; fp = Yc
; v2 = pix
; v1 = temp
; v3 = width
;
; note. The input arrays are byte arrays and the screen memory is a word array.
;
dst_skip            EQU  0 ; + sp        (paint + 32)
yc_skip             EQU  4 ; + sp        (paint + 36)
uv_skip             EQU  8 ; + sp        (paint + 40)
yc_width            EQU 12 ; + sp        (paint + 28)
yc_height           EQU 16 ; + sp        (paint + 24)

; In  a3 = u ptr
;     a4 = v ptr
;     ip = Yc0
;
; Out a1 = Yc0 + Ur
;     a2 = Yc0 - (Ug + Vg)
;     sl = Yc0 + Vb
;     a3 += 1
;     a4 += 1
;
; Corrupts v1

  MACRO
  Chroma $off
  LDRB  a1, [a3], #$off     ; temp = *u++
  LDRB  a2, [a4], #$off     ; temp = *v++
  LDR   v1, U_Tab           ; U table
  LDR   a1, [v1, a1, lsl #2]; (Ur<<16 + Ug)
  ADD   v1, v1, #256<<2     ; V table
  LDR   sl, [v1, a2, lsl #2]; (Vb<<16 + Vg)
  MOV   a2, a1, lsl #16     ; Ug
  ADD   a1, ip, a1, asr #16 ; Yc0 + Ur
  ADD   a2, a2, sl, lsl #16 ; Ug + Vg
  ADD   sl, ip, sl, asr #16 ; Yc0 + Vb
  SUB   a2, ip, a2, asr #16 ; Yc0 - (Ug + Vg)
  MEND

  MACRO
  ChromaHQ $off
  LDRB  a1, [a3], #$off     ; temp = *u++
  LDRB  a2, [a4], #$off     ; temp = *v++
  LDR   v1, U_Tab           ; U table
  LDR   a1, [v1, a1, lsl #2]; (Ur<<16 + Ug)
  ADD   v1, v1, #256<<2     ; V table
  LDR   sl, [v1, a2, lsl #2]; (Vb<<16 + Vg)
  MOV   a2, a1, lsl #16     ; Ug
  MOV   a1, a1, asr #16
  ADD   a1, ip, a1, lsl #2  ; Yc0 + Ur
  ADD   a2, a2, sl, lsl #16 ; Ug + Vg
  MOV   sl, sl, asr #16
  MOV   a2, a2, asr #16
  ADD   sl, ip, sl, lsl #2  ; Yc0 + Vb
  SUB   a2, ip, a2, lsl #2  ; Yc0 - (Ug + Vg)
  MEND

  AREA |A$$code|, CODE, READONLY

  IMPORT ka_U_Table
  IMPORT ka_Yc0_Table_5bit
  IMPORT ka_Yc0_Table_5bit_hi_q

  ALIGN 32
U_Tab        DCD ka_U_Table
Yc0_Tab      DCD ka_Yc0_Table_5bit
Yc0_Tab_hi_q DCD ka_Yc0_Table_5bit_hi_q



; ka_drawyuv444_z05_abgr15 : 32k cols 50% zoom
; -----------------------
  EXPORT ka_drawyuv444_z05_abgr15
  ALIGN 32
ka_drawyuv444_z05_abgr15
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   v2, v2, v3          ; uv_skip += yc_bpr
  add   v1, v1, v3          ; yc_skip += yc_bpr
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height_05loop
  str   lr, [sp, #yc_height]
width_05loop

  ldrb  fp, [v6], #2        ; Yc = *y1++

; Chroma for the first pixel
  Chroma 2

; W1 pixel luminance
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  ldrb  lr, [sl, fp]        ; blue
  ldrb  fp, [v6], #2        ; Yc = *y1++
  orr   v2, v2, v1, lsl #5
  orr   v2, v2, lr, lsl #10
  orr   v2, v2, #&8000

; Chroma for the second pixel
  Chroma 2

; W2 pixel luminance
  ldrb  v1, [a1, fp]        ; red
  ldrb  lr, [a2, fp]        ; green
  ldrb  fp, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #16
  orr   v2, v2, lr, lsl #21
  orr   v2, v2, fp, lsl #26
  orr   v2, v2, #&80000000
; display double pixel
  str   v2, [v4], #4        ; *dst = pix, then right

  subs  v3, v3, #4          ; width--
  bgt   width_05loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #2          ; height--
  bgt   height_05loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

; ka_drawyuv444_z1_abgr15 : 32k cols 100% zoom
; ----------------------
  EXPORT ka_drawyuv444_z1_abgr15
  ALIGN 32
ka_drawyuv444_z1_abgr15
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height_loop
  str   lr, [sp, #yc_height]
width_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  tst   lr,#1
  addne fp, fp, #2*224+256+2*224
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #10
  orr   v2, v2, #&8000

; E pixel luminance
  Chroma 1
  tst   lr,#1
  addeq fp, fp, #2*224+256+2*224
  ldrb  v1, [a1, fp]        ; red
  orr   v2, v2, v1, lsl #16
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #21
  ldrb  v1, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #26
  orr   v2, v2, #&80000000
; display double pixel
  str   v2, [v4], #4        ; *dst++ = pix

  subs  v3, v3, #2          ; width--
  bgt   width_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

; ka_drawyuv444_z2_abgr15 : 32k cols 200% zoom
; ----------------------
  EXPORT ka_drawyuv444_z2_abgr15
  ALIGN 32
ka_drawyuv444_z2_abgr15
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5          ; dst_skip += dst_bpr
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height2_loop
  str   lr, [sp, #yc_height]
width2_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  tst   lr,#1
  addne fp, fp, #2*224+256+2*224
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #10
  orr   v2, v2, #&8000
; display west double pixel pair
  orr   v2, v2, v2, lsl #16 ; double pixel
  str   v2, [v4], v5        ; *dst = pix, then right
  str   v2, [v4], #4        ; *dst = pix, then right

; E pixel luminance
  Chroma 1
  tst   lr,#1
  addeq fp, fp, #2*224+256+2*224
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #10
  orr   v2, v2, #&8000
; display east double pixel pair
  orr   v2, v2, v2, lsl #16 ; double pixel
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4], #4        ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width2_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height2_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}


; ka_drawyuv444_z2_abgr15_hq : 32k cols 200% zoom, high quality dither
; -------------------------
  EXPORT ka_drawyuv444_z2_abgr15_hq
  ALIGN 32
ka_drawyuv444_z2_abgr15_hq
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5          ; dst_skip += dst_bpr
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab_hi_q

height2_loop_hi_q
  str   lr, [sp, #yc_height]
width2_loop_hi_q

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  ChromaHQ 1
  ldr   v2, [a1, fp, lsl#2] ; red
  ldr   v1, [a2, fp, lsl#2] ; green
  ldr   lr, [sl, fp, lsl#2] ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #5
  orr   v2, v2, lr, lsl #10
  orr   v2, v2, #&8000
; display west double pixel pair
  str   v2, [v4], v5        ; *dst = pix, then down
  mov   v2, v2, ror #16
  str   v2, [v4], #4        ; *dst = pix, then down

; E pixel luminance
  ChromaHQ 1
  ldr   v2, [a1, fp, lsl#2] ; red
  ldr   v1, [a2, fp, lsl#2] ; green
  ldr   lr, [sl, fp, lsl#2] ; blue
  orr   v2, v2, v1, lsl #5
  orr   v2, v2, lr, lsl #10
  orr   v2, v2, #&8000
; display east double pixel pair
  mov   v2, v2, ror #16
  str   v2, [v4], -v5       ; *dst = pix, then up
  mov   v2, v2, ror #16
  str   v2, [v4], #4        ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width2_loop_hi_q

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height2_loop_hi_q

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}


; ka_drawyuv444_z3_abgr15 : 32k cols 300% zoom
; ----------------------
  EXPORT ka_drawyuv444_z3_abgr15
  ALIGN 32
ka_drawyuv444_z3_abgr15
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5, lsl #1  ; dst_skip += 2*dst_bpr
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height3_loop
  str   lr, [sp, #yc_height]
width3_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  tst   lr,#1
  addne fp, fp, #2*224+256+2*224
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #10
  orr   v2, v2, #&8000
; E pixel luminance
  Chroma 1
  tst   lr,#1
  addeq fp, fp, #2*224+256+2*224
  ldrb  lr, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   lr, lr, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  orr   lr, lr, v1, lsl #10
  orr   lr, lr, #&8000
; create the 3 double pixels
  orr   fp, v2, lr, lsl #16 ; centre
  orr   v2, v2, v2, lsl #16 ; left
  orr   lr, lr, lr, lsl #16 ; right
; display the pixels
  str   fp, [v4, #4]        ; *(dst + 4) = centre
  str   lr, [v4, #8]        ; *(dst + 8) = right
  str   v2, [v4], v5        ; *dst = left, then down
  str   fp, [v4, #4]        ; *(dst + 4) = centre
  str   lr, [v4, #8]        ; *(dst + 8) = right
  str   v2, [v4], v5        ; *dst = left, then down
  str   fp, [v4, #4]        ; *(dst + 4) = centre
  str   lr, [v4, #8]        ; *(dst + 8) = right
  str   v2, [v4], #12       ; *dst = left, then right

; subtract 2*v5 from dst to position it for next
  sub   v4, v4, v5, lsl #1

  subs  v3, v3, #2          ; width--
  bgt   width3_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height3_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}


; ka_drawyuv444_z4_abgr15 : 32k cols 400% zoom
; ----------------------
  EXPORT ka_drawyuv444_z4_abgr15
  ALIGN 32
ka_drawyuv444_z4_abgr15
  stmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, lr}
  sub   sp, sp, #20

  add   a2, a1, #drawer_yc_bpr
  ldmia a2!, {v3, v4, v5, lr} ; yc_bpr, dst1, dst_bpr, yc_height
  ldr   v6, [a1, #0]        ; y1 = frame->base[0]
  ldmia a2, {a3-v2}         ; yc_width, dst_skip, yc_skip, uv_skip
  add   a4, a4, v5, lsl #1  ; dst_skip += 3*dst_bpr
  add   a4, a4, v5
  mov   v3, a3
  stmia sp, {a4-v3, lr}

  ldr   a3, [a1, #8]        ; u = frame->base[2]
  ldr   a4, [a1, #4]        ; v = frame->base[1]

  ldr   ip, Yc0_Tab

height4_loop
  str   lr, [sp, #yc_height]
width4_loop

  ldrb  fp, [v6], #1        ; Yc = *y1++

; W pixel luminance
  Chroma 1
  tst   lr,#1
  addeq fp, fp, #2*224+256+2*224
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  ldrb  fp, [v6], #1        ; Yc = *y1++
  orr   v2, v2, v1, lsl #10
  orr   v2, v2, #&8000
; display west double pixel pair
  orr   v2, v2, v2, lsl #16 ; double pixel
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], v5        ; *dst = pix, then down
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], #8        ; *dst = pix, then right

; E pixel luminance
  Chroma 1
  tst   lr,#1
  addeq fp, fp, #2*224+256+2*224
  ldrb  v2, [a1, fp]        ; red
  ldrb  v1, [a2, fp]        ; green
  orr   v2, v2, v1, lsl #5
  ldrb  v1, [sl, fp]        ; blue
  orr   v2, v2, v1, lsl #10
  orr   v2, v2, #&8000
; display east double pixel pair
  orr   v2, v2, v2, lsl #16 ; double pixel
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], -v5       ; *dst = pix, then up
  str   v2, [v4, #4]        ; *(dst + 4) = pix
  str   v2, [v4], #8        ; *dst = pix, then right

  subs  v3, v3, #2          ; width--
  bgt   width4_loop

  ldmia sp,{a2, v1, v2, v3, lr}
  add   a3, a3, v2          ; u += uv_skip
  add   a4, a4, v2          ; v += uv_skip
  add   v6, v6, v1          ; y1 += yc_skip
  add   v4, v4, a2          ; dst += dst_skip

  subs  lr, lr, #1          ; height--
  bgt   height4_loop

  add   sp, sp, #20
  ldmfd sp!, {v1, v2, v3, v4, v5, v6, sl, fp, ip, pc}

  ALIGN 32
  END
