iquant.s 3.23 KB

 ##############################################################
 #
 # Inverse quantization
 #
 # NOTES:
 # ------
 #
 # - The CPU puts the inputs and the required coefficients
 #   (ie C[], D[] and scalefactors[]) into an order which
 #   is required by the DCT.
 #
 # - Code can be interleaved for big improvements in efficiency
 #   the current arrangement is for debugging only
 #
 # - Can save stores at the end and make DCT use results from
 #   registers

		.ent	iquant

.name	out0h,	$v1
.name	out1h,	$v2
.name	out0l,	$v3
.name	out1l,	$v4
.name	out2h,	$v5
.name	out3h,	$v6
.name	out2l,	$v7
.name	out3l,	$v8

.name	c0l,	$v9
.name	c1l,	$v10
.name	c2l, 	$v11
.name	c3l,	$v12
.name	c0h,	$v13
.name	c1h,	$v14
.name	c2h, 	$v15
.name	c3h,	$v16
.name	in0,	$v17
.name	in1,	$v18
.name	in2,	$v19
.name	in3,	$v20

.name	dctIA,	$1
.name	cAddr, 	$2
.name	dAddr,	$3
.name	scAddr,	$4
.name	iAddr,	$5

IQuant:
		addi	iAddr,	zero, RSP_IQIN_OFFSET
		addi	cAddr,	zero, RSP_IQC_OFFSET
		addi	dAddr,	zero, RSP_IQD_OFFSET
		addi	scAddr,	zero, RSP_IQSCALE_OFFSET
		addi	dctIA, 	zero, RSP_DCTIN_OFFSET
		
		lqv	in0[0], 0(iAddr)
		lqv	in1[0], 16(iAddr)
		lqv	in2[0], 32(iAddr)
		lqv	in3[0], 48(iAddr)
		
 # Invert the high bit

		vxor	in0, in0, vconst[5]
		vxor	in1, in1, vconst[5]
		vxor	in2, in2, vconst[5]
		vxor	in3, in3, vconst[5]
		
 # Add D[]
		lqv	c0l[0], 0(dAddr)
		lqv	c1l[0], 16(dAddr)
		lqv	c2l[0], 32(dAddr)
		lqv	c3l[0], 48(dAddr)

		vadd	in0, in0, c0l 
		vadd	in1, in1, c1l 
		vadd	in2, in2, c2l 
		vadd	in3, in3, c3l 

 # Multiply by C[]
 # 16x32 to give 32 multiplies - anyway to do this in less than 3 inst.?

		lqv	c0l[0], 0(cAddr)
		lqv	c1l[0], 16(cAddr)
		lqv	c2l[0], 32(cAddr)
		lqv	c3l[0], 48(cAddr)

		lqv	c0h[0], 64(cAddr)
		lqv	c1h[0], 80(cAddr)
		lqv	c2h[0], 96(cAddr)
		lqv	c3h[0], 112(cAddr)
 
		vmudh	$v0, in0, c0h
		vmadm	out0l, in0, c0l
		vsaw	out0h, $v0, out0h[0]

		vmudh	$v0, in1, c1h
		vmadm	out1l, in1, c1l
		vsaw	out1h, $v0, out1h[0]

		vmudh	$v0, in2, c2h
		vmadm	out2l, in2, c2l
		vsaw	out2h, $v0, out2h[0]

		vmudh	$v0, in3, c3h
		vmadm	out3l, in3, c3l
		vsaw	out3h, $v0, out3h[0]

 # Multiply by scalefactors

		lqv	c0l[0], 0(scAddr)
		lqv	c1l[0], 16(scAddr)
		lqv	c2l[0], 32(scAddr)
		lqv	c3l[0], 48(scAddr)

		lqv	c0h[0], 64(scAddr)
		lqv	c1h[0], 80(scAddr)
		lqv	c2h[0], 96(scAddr)
		lqv	c3h[0], 112(scAddr)

		vmudl	$v0, c0l, out0l
		vmadm	$v0, c0h, out0l
		vmadn	out0l, c0l, out0h
		vmadh	out0h, c0h, out0h
		
		vmudl	$v0, c1l, out1l
		vmadm	$v0, c1h, out1l
		vmadn	out1l, c1l, out1h
		vmadh	out1h, c1h, out1h
		
		vmudl	$v0, c2l, out2l
		vmadm	$v0, c2h, out2l
		vmadn	out2l, c2l, out2h
		vmadh	out2h, c2h, out2h
		
		vmudl	$v0, c3l, out3l
		vmadm	$v0, c3h, out3l
		vmadn	out3l, c3l, out3h
		vmadh	out3h, c3h, out3h
		
 #		sqv	out0h, 0(dctIA)
 #		sqv	out1h, 16(dctIA)
 #		sqv	out2h, 32(dctIA)
 #		sqv	out3h, 48(dctIA)
 #
 #		sqv	out0l, 64(dctIA)
 #		sqv	out1l, 80(dctIA)
 #		sqv	out2l, 96(dctIA)
 #		sqv	out3l, 112(dctIA)

		j	DCT32
		nop
	
.unname	c0l
.unname	c1l
.unname	c2l
.unname	c3l
.unname	c0h
.unname	c1h
.unname	c2h
.unname	c3h
.unname	in0
.unname	in1
.unname	in2
.unname	in3

.unname	out0l
.unname	out1l
.unname	out2l
.unname	out3l
.unname	out0h
.unname	out1h
.unname	out2h
.unname	out3h

.unname	dctIA
.unname	cAddr
.unname	dAddr
.unname	scAddr
.unname	iAddr
		.end	iquant