remap.s 4.96 KB

 #########################################################################
 #
 # Remap the DCT outputs to give 64 values appropriately mirror-imaged
 # for the Overlap-Add stage
 #
 # NOTES:
 # ------
 #
 # - This may be better off coupled into the DCT itself
 #
 # - This is another case were load/store alternates would help

		.ent	remap
.name	out0l,	$v1
.name	out1l,	$v2
.name	out2l,	$v3
.name	out3l,	$v4
.name	out0h,	$v5
.name	out1h,	$v6
.name	out2h,	$v7
.name	out3h,	$v8
.name	vzero, 	$v9
	
.name	rmapI,	$1
.name	rmapOl, $2
.name	rmapOh, $3


Remap:
		addi	rmapI,	zero, RSP_DCTOUT_OFFSET
		addi	rmapOh,	zero, RSP_OLAIN_OFFSET
		addi	rmapOl,	rmapOh, 128

		lsv	out0h[0], 0(rmapI)
		lsv	out0h[2], 4(rmapI)
		lsv	out0h[4], 8(rmapI)
		lsv	out0h[6], 12(rmapI)
		lsv	out0h[8], 16(rmapI)
		lsv	out0h[10], 20(rmapI)
		lsv	out0h[12], 24(rmapI)
		lsv	out0h[14], 28(rmapI)
		
		lsv	out0l[0], 2(rmapI)
		lsv	out0l[2], 6(rmapI)
		lsv	out0l[4], 10(rmapI)
		lsv	out0l[6], 14(rmapI)
		lsv	out0l[8], 18(rmapI)
		lsv	out0l[10], 22(rmapI)
		lsv	out0l[12], 26(rmapI)
		lsv	out0l[14], 30(rmapI)

		addi	rmapI, rmapI, 32
		
		lsv	out1h[0], 0(rmapI)
		lsv	out1h[2], 4(rmapI)
		lsv	out1h[4], 8(rmapI)
		lsv	out1h[6], 12(rmapI)
		lsv	out1h[8], 16(rmapI)
		lsv	out1h[10], 20(rmapI)
		lsv	out1h[12], 24(rmapI)
		lsv	out1h[14], 28(rmapI)
		
		lsv	out1l[0], 2(rmapI)
		lsv	out1l[2], 6(rmapI)
		lsv	out1l[4], 10(rmapI)
		lsv	out1l[6], 14(rmapI)
		lsv	out1l[8], 18(rmapI)
		lsv	out1l[10], 22(rmapI)
		lsv	out1l[12], 26(rmapI)
		lsv	out1l[14], 30(rmapI)

		addi	rmapI, rmapI, 32
		
		lsv	out2h[0], 0(rmapI)
		lsv	out2h[2], 4(rmapI)
		lsv	out2h[4], 8(rmapI)
		lsv	out2h[6], 12(rmapI)
		lsv	out2h[8], 16(rmapI)
		lsv	out2h[10], 20(rmapI)
		lsv	out2h[12], 24(rmapI)
		lsv	out2h[14], 28(rmapI)
		
		lsv	out2l[0], 2(rmapI)
		lsv	out2l[2], 6(rmapI)
		lsv	out2l[4], 10(rmapI)
		lsv	out2l[6], 14(rmapI)
		lsv	out2l[8], 18(rmapI)
		lsv	out2l[10], 22(rmapI)
		lsv	out2l[12], 26(rmapI)
		lsv	out2l[14], 30(rmapI)

		addi	rmapI, rmapI, 32
		
		lsv	out3h[0], 0(rmapI)
		lsv	out3h[2], 4(rmapI)
		lsv	out3h[4], 8(rmapI)
		lsv	out3h[6], 12(rmapI)
		lsv	out3h[8], 16(rmapI)
		lsv	out3h[10], 20(rmapI)
		lsv	out3h[12], 24(rmapI)
		lsv	out3h[14], 28(rmapI)
		
		lsv	out3l[0], 2(rmapI)
		lsv	out3l[2], 6(rmapI)
		lsv	out3l[4], 10(rmapI)
		lsv	out3l[6], 14(rmapI)
		lsv	out3l[8], 18(rmapI)
		lsv	out3l[10], 22(rmapI)
		lsv	out3l[12], 26(rmapI)
		lsv	out3l[14], 30(rmapI)

		sqv	out2l[0], 0(rmapOl)
		sqv	out2h[0], 0(rmapOh)
		sqv	out3l[0], 16(rmapOl)
		sqv	out3h[0], 16(rmapOh)

 # Invert the results

		vxor	vzero, vconst, vconst
		vsubc	out2l, vzero, out2l
		vsub	out2h, vzero, out2h
		vsubc	out3l, vzero, out3l
		vsub	out3h, vzero, out3h

 # This could be put into the DCT since I only want the -ve
		vsubc	out0l, vzero, out0l
		vsub	out0h, vzero, out0h
		vsubc	out1l, vzero, out1l
		vsub	out1h, vzero, out1h

		sqv	out0l[0], 96(rmapOl)
		sqv	out0h[0], 96(rmapOh)
		sqv	out1l[0], 112(rmapOl)
		sqv	out1h[0], 112(rmapOh)

 # Now individual writes for the reversed order stuff

 # out[48-i] = -dct[i], 0<=i<16

		ssv	out0l[2], 94(rmapOl)
		ssv	out0l[4], 92(rmapOl)
		ssv	out0l[6], 90(rmapOl)
		ssv	out0l[8], 88(rmapOl)
		ssv	out0l[10], 86(rmapOl)
		ssv	out0l[12], 84(rmapOl)
		ssv	out0l[14], 82(rmapOl)

		ssv	out1l[0], 80(rmapOl)
		ssv	out1l[2], 78(rmapOl)
		ssv	out1l[4], 76(rmapOl)
		ssv	out1l[6], 74(rmapOl)
		ssv	out1l[8], 72(rmapOl)
		ssv	out1l[10], 70(rmapOl)
		ssv	out1l[12], 68(rmapOl)
		ssv	out1l[14], 66(rmapOl)

		ssv	out0h[2], 94(rmapOh)
		ssv	out0h[4], 92(rmapOh)
		ssv	out0h[6], 90(rmapOh)
		ssv	out0h[8], 88(rmapOh)
		ssv	out0h[10], 86(rmapOh)
		ssv	out0h[12], 84(rmapOh)
		ssv	out0h[14], 82(rmapOh)

		ssv	out1h[0], 80(rmapOh)
		ssv	out1h[2], 78(rmapOh)
		ssv	out1h[4], 76(rmapOh)
		ssv	out1h[6], 74(rmapOh)
		ssv	out1h[8], 72(rmapOh)
		ssv	out1h[10], 70(rmapOh)
		ssv	out1h[12], 68(rmapOh)
		ssv	out1h[14], 66(rmapOh)

 # out[32-i] = -dct[i], 0<=i<16

		ssv	out2l[0], 64(rmapOl)
		ssv	out2l[2], 62(rmapOl)
		ssv	out2l[4], 60(rmapOl)
		ssv	out2l[6], 58(rmapOl)
		ssv	out2l[8], 56(rmapOl)
		ssv	out2l[10], 54(rmapOl)
		ssv	out2l[12], 52(rmapOl)
		ssv	out2l[14], 50(rmapOl)

		ssv	out2h[0], 64(rmapOh)
		ssv	out2h[2], 62(rmapOh)
		ssv	out2h[4], 60(rmapOh)
		ssv	out2h[6], 58(rmapOh)
		ssv	out2h[8], 56(rmapOh)
		ssv	out2h[10], 54(rmapOh)
		ssv	out2h[12], 52(rmapOh)
		ssv	out2h[14], 50(rmapOh)

		ssv	out3l[0], 48(rmapOl)
		ssv	out3l[2], 46(rmapOl)
		ssv	out3l[4], 44(rmapOl)
		ssv	out3l[6], 42(rmapOl)
		ssv	out3l[8], 40(rmapOl)
		ssv	out3l[10], 38(rmapOl)
		ssv	out3l[12], 36(rmapOl)
		ssv	out3l[14], 34(rmapOl)

		ssv	out3h[0], 48(rmapOh)
		ssv	out3h[2], 46(rmapOh)
		ssv	out3h[4], 44(rmapOh)
		ssv	out3h[6], 42(rmapOh)
		ssv	out3h[8], 40(rmapOh)
		ssv	out3h[10], 38(rmapOh)
		ssv	out3h[12], 36(rmapOh)
		ssv	out3h[14], 34(rmapOh)

 # out[16] = 0
		ssv	vzero[0], 32(rmapOl)
		ssv	vzero[0], 32(rmapOh)
		j	OLA
		nop

.unname	out0l
.unname	out1l
.unname	out2l
.unname	out3l
.unname	out0h
.unname	out1h
.unname	out2h
.unname	out3h
.unname	vzero
	
.unname	rmapI
.unname	rmapOl
.unname	rmapOh

		.end	remap