mc.s 5.14 KB
#include "mc.h"

mc:
	addi	rtn2,	ret_reg, 0

	addi	dum2,	rzero, MC_SWITCH_TABLE

	la	dum,	x0y0
	sw	dum,	0(dum2)

	la	dum,	x1y0
	sw	dum,	4(dum2)

	la	dum,	x0y1
	sw	dum,	8(dum2)

	la	dum,	x1y1
	sw	dum,	12(dum2)

	nop
	nop					/* Break here to get input */
	nop

#define	xyh	dum2

	andi	xh,	mc_in_mvx, 1
	andi	yh,	mc_in_mvy, 1
	sll	xyh,	yh,  1
	or	xyh,	xyh, xh			/* XYH = (2*yh)+xh */

	sll	dum,	xyh,  2
	lw	mc4, MC_SWITCH_TABLE(dum)

	sra	dum,	mc_in_mvx, 1			/* toss the fraction */
	andi	dum,	dum, 0xf		/* remaining offset within MB */
	add	xLbase,	dum, mc_in_dat
	addi	xRbase,	xLbase, ROffset
	addi	xObase,	mc_out_dat, 0
	jal	mcFixYAddrs
	nop
	jalr	mc4
	nop
	jalr	mc4
	nop
	jalr	mc4
	nop
	jalr	mc4
	nop

	/* Chroma */

	sra	dum2,	mc_in_mvx, 31
	and	dum2,	dum2, xh
	add	mc_in_mvx,	dum2, mc_in_mvx
	sra	mc_in_mvx,	mc_in_mvx, 1

	sra	dum,	mc_in_mvy, 31
	and	dum,	dum, yh
	add	mc_in_mvy,	dum, mc_in_mvy
	sra	mc_in_mvy,	mc_in_mvy, 1

#ifdef MC_CHROMA_CHECK
	lw	dum,	MVCX(rzero)
	lw	dum2,	MVCY(rzero)

	bne	dum,  mc_in_mvx, error
	nop
	bne	dum2, mc_in_mvy, error
	nop
#endif

	andi	xh,	mc_in_mvx, 1
	andi	yh,	mc_in_mvy, 1
	sll	xyh,	yh,  1
	or	xyh,	xyh, xh			/* XYH = (2*yh)+xh */

	sll	dum,	xyh,  2
	lw	mc4, MC_SWITCH_TABLE(dum)

	sra	dum,	mc_in_mvx, 1			/* toss the fraction */
	andi	dum,	dum, 0x7		/* remaining offset within MB */
	sll	dum,	dum, 1			/* Fix for UV interleave */
	add	xLbase,	dum, mc_in_dat
	addi	xLbase,	xLbase, CrOffset
	addi	xRbase,	xLbase, RCrOffset
	addi	xObase,	mc_out_dat, OCrOffset

	jal	mcFixCrAddrs
	nop
	jalr	mc4
	nop
	jalr	mc4
	nop

	nop
	nop					/* Break here to write output */
	nop

	addi	ret_reg, rtn2, 0
	ret
	nop

mcFixYAddrs:
	andi	dum,	mc_in_mvx,	1
	blez	dum,	no_fix
	    addi	Lbasep1, xLbase, 1
	    addi	Rbasep1, xRbase, 1

	    andi	dum,	mc_in_mvx, 0x1f	/* What about 15.5 case? */
	    addi	dum,	dum, -30
	    blez 	dum,	no_fix
	    nop					/* Could be useful */
		addi	Lbasep1, Rbasep1, -16
no_fix:	ret
	nop

mcFixCrAddrs:
	andi	dum,	mc_in_mvx,	1
	blez	dum,	no_fix2
	    addi	Lbasep1, xLbase, 2
	    addi	Rbasep1, xRbase, 2

	    andi	dum,	mc_in_mvx, 0xf	/* What about 7.5 case? */
	    addi	dum,	dum, -14
	    blez 	dum,	no_fix2
	    nop					/* Could be useful */
		addi	Lbasep1, Rbasep1, -16

					/* We don't need to fix Rbasep1 since
					   it is page QW aligned already, so
					   LRQ has no effect! */
no_fix2: ret
	nop

error:
	nop
	nop					/* OOPS! */
	nop

x0y0:
	lqv	v0[0],	 0(xLbase)
	lrv	v0[0],	 0(xRbase)
	lqv	v1[0],	16(xLbase)
	lrv	v1[0],	16(xRbase)
	lqv	v2[0],	32(xLbase)
	lrv	v2[0],	32(xRbase)
	lqv	v3[0],	48(xLbase)
	lrv	v3[0],	48(xRbase)
finish:
	addi	xLbase,	xLbase, 64
	addi	xRbase,	xRbase, 64
	blez	mc_in_blend,	dont_blend
	nop

	lqv	t0[0],	 0(xObase)
	lqv	t1[0],	16(xObase)
	lqv	t2[0],	32(xObase)
	lqv	t3[0],	48(xObase)

	vaddb	v0,	v0, t0[1]		/* Avg */
	vaddb	v1,	v1, t1[1]		/* Avg */
	vaddb	v2,	v2, t2[1]		/* Avg */
	vaddb	v3,	v3, t3[1]		/* Avg */

dont_blend:
	sqv	v0[0],	 0(xObase)
	sqv	v1[0],	16(xObase)
	sqv	v2[0],	32(xObase)
	sqv	v3[0],	48(xObase)
	addi	xObase,	xObase, 64

	ret
	nop

x1y0:
	lqv	v0[0],	 0(xLbase)
	lrv	v0[0],	 0(xRbase)
	lqv	t1[0],	 0(Lbasep1)
	lrv	t1[0],	 0(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v0,	v0, t1[1]		/* Avg */

	lqv	v1[0],	 16(xLbase)
	lrv	v1[0],	 16(xRbase)
	lqv	t1[0],	 16(Lbasep1)
	lrv	t1[0],	 16(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v1,	v1, t1[1]		/* Avg */

	lqv	v2[0],	 32(xLbase)
	lrv	v2[0],	 32(xRbase)
	lqv	t1[0],	 32(Lbasep1)
	lrv	t1[0],	 32(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v2,	v2, t1[1]		/* Avg */

	lqv	v3[0],	 48(xLbase)
	lrv	v3[0],	 48(xRbase)
	lqv	t1[0],	 48(Lbasep1)
	lrv	t1[0],	 48(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v3,	v3, t1[1]		/* Avg */

	addi	Lbasep1, Lbasep1, 64
	addi	Rbasep1, Rbasep1, 64

	j	finish
	nop

x0y1:
	lqv	t1[0],	 0(xLbase)
	lrv	t1[0],	 0(xRbase)
	lqv	t2[0],	 16(xLbase)
	lrv	t2[0],	 16(xRbase)
	nop
	nop
	nop
	nop
	nop
	vaddb	v0,	t1, t2[1]		/* Avg */

	lqv	t3[0],	 32(xLbase)
	lrv	t3[0],	 32(xRbase)
	nop
	nop
	nop
	nop
	nop
	vaddb	v1,	t2, t3[1]		/* Avg */

	lqv	t1[0],	 48(xLbase)
	lrv	t1[0],	 48(xRbase)
	nop
	nop
	nop
	nop
	nop
	vaddb	v2,	t3, t1[1]		/* Avg */

	lqv	t2[0],	 64(xLbase)
	lrv	t2[0],	 64(xRbase)
	nop
	nop
	nop
	nop
	nop
	vaddb	v3,	t1, t2[1]		/* Avg */

	j	finish
	nop

x1y1:
	lqv	t1[0],	 0(xLbase)		/* Prefetch */
	lrv	t1[0],	 0(xRbase)
	lqv	t1p1[0], 0(Lbasep1)
	lrv	t1p1[0], 0(Rbasep1)

	lqv	t2[0],	 16(xLbase)
	lrv	t2[0],	 16(xRbase)
	lqv	t2p1[0], 16(Lbasep1)
	lrv	t2p1[0], 16(Rbasep1)
	nop
	nop
	nop
	nop
	vaddb	v0,	t1, t1p1[2]		/* Avg of 4 */
	vaccb	v0,	t2, t2p1[2]		/* Avg of 4 */

	lqv	t1[0],	 32(xLbase)
	lrv	t1[0],	 32(xRbase)
	lqv	t1p1[0], 32(Lbasep1)
	lrv	t1p1[0], 32(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v1,	t1, t1p1[2]		/* Avg of 4 */
	vaccb	v1,	t2, t2p1[2]		/* Avg of 4 */

	lqv	t2[0],	 48(xLbase)
	lrv	t2[0],	 48(xRbase)
	lqv	t2p1[0], 48(Lbasep1)
	lrv	t2p1[0], 48(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v2,	t1, t1p1[2]		/* Avg of 4 */
	vaccb	v2,	t2, t2p1[2]		/* Avg of 4 */

	lqv	t1[0],	 64(xLbase)
	lrv	t1[0],	 64(xRbase)
	lqv	t1p1[0], 64(Lbasep1)
	lrv	t1p1[0], 64(Rbasep1)
	nop
	nop
	nop
	nop
	nop
	vaddb	v3,	t1, t1p1[2]		/* Avg of 4 */
	vaccb	v3,	t2, t2p1[2]		/* Avg of 4 */

	addi	Lbasep1, Lbasep1, 64
	addi	Rbasep1, Rbasep1, 64

	j	finish
	nop

#include "mc_uname.h"