iquant.s
4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/*
* iquant.s Thu Aug 3 18:08:27 PDT 1995
*
*
* inverse quantization for MPEG
* Inputs: iq_in_dat - Input data address
* iq_out_dat - Output data address (can be the same as in_dat)
* iq_in_cbp - Coded Block Pattern
* iq_in_intra - Intra block flag
* iq_in_quant - Quant scale factor address
*
* Outputs:
* data at iq_out_dat - IQ'ed data
*/
/* #define _FIQUANT */
#include "iquant.h"
iq: nop
nop
iquant_init:
vxor vconsts, vconsts, vconsts
# setup addresses
addi consts_addr, rzero, IQ_CONST_BASE
# load vconsts
lqv vconsts[0], IQ_CONSTS(consts_addr)
bne iq_in_intra, rzero, iqmat
nop
niqmat:
addi qmat_addr, consts_addr, NONI_QMAT
j load_qmat
nop
iqmat:
addi qmat_addr, consts_addr, INTRA_QMAT
load_qmat:
lqv qmat_r0[0], 0(qmat_addr)
lqv qmat_r1[0], 16(qmat_addr)
lqv qmat_r2[0], 32(qmat_addr)
lqv qmat_r3[0], 48(qmat_addr)
lqv qmat_r4[0], 64(qmat_addr)
lqv qmat_r5[0], 80(qmat_addr)
lqv qmat_r6[0], 96(qmat_addr)
lqv qmat_r7[0], 112(qmat_addr)
# get qmat scale factor
lsv vconsts[QUANT_LOAD], 0(iq_in_quant)
### quant scaling ###
vmudh qscale, vconsts, vconsts[QUANT]
iquant_loop:
andi dummy, iq_in_cbp, 1
bgtz dummy, iquant_do
srl iq_in_cbp, iq_in_cbp, 1
blez iq_in_cbp, iquant_done
addi iq_in_dat, iq_in_dat, 128
j iquant_loop
addi iq_out_dat, iq_out_dat, 128
iquant_done:
jr return
nop
iquant_do:
lqv in_r0[0], 0(iq_in_dat)
lqv in_r1[0], 16(iq_in_dat)
lqv in_r2[0], 32(iq_in_dat)
lqv in_r3[0], 48(iq_in_dat)
### get DC term ###
vmudh dc, in_r0, vconsts[SHIFT_L]
row_scale:
lqv in_r4[0], 64(iq_in_dat)
vmudh out_r0, in_r0, qscale[SCALE2]
lqv in_r5[0], 80(iq_in_dat)
vmudh out_r1, in_r1, qscale[SCALE2]
lqv in_r6[0], 96(iq_in_dat)
vmudh out_r2, in_r2, qscale[SCALE2]
lqv in_r7[0], 112(iq_in_dat)
vmudh out_r3, in_r3, qscale[SCALE2]
addi iq_in_dat, iq_in_dat, 128
vmudh out_r4, in_r4, qscale[SCALE2]
vmudh out_r5, in_r5, qscale[SCALE2]
vmudh out_r6, in_r6, qscale[SCALE2]
vmudh out_r7, in_r7, qscale[SCALE2]
bne iq_in_intra, rzero, iquant
nop
### calculate sign (stored in in_r?) ###
get_sign:
vabs in_r0, in_r0, qscale[SCALE]
vabs in_r1, in_r1, qscale[SCALE]
vabs in_r2, in_r2, qscale[SCALE]
vabs in_r3, in_r3, qscale[SCALE]
vabs in_r4, in_r4, qscale[SCALE]
vabs in_r5, in_r5, qscale[SCALE]
vabs in_r6, in_r6, qscale[SCALE]
vabs in_r7, in_r7, qscale[SCALE]
### add sign ###
add_sign:
vadd out_r0, in_r0, out_r0
vadd out_r1, in_r1, out_r1
vadd out_r2, in_r2, out_r2
vadd out_r3, in_r3, out_r3
lqv in_r0[0], 0(iq_in_dat)
vadd out_r4, in_r4, out_r4
lqv in_r1[0], 16(iq_in_dat)
vadd out_r5, in_r5, out_r5
lqv in_r2[0], 32(iq_in_dat)
vadd out_r6, in_r6, out_r6
lqv in_r3[0], 48(iq_in_dat)
vadd out_r7, in_r7, out_r7
iquant:
#ifdef _FIQUANT
vmulq out_r0, out_r0, qmat_r0
vmulq out_r1, out_r1, qmat_r1
vmulq out_r2, out_r2, qmat_r2
vmulq out_r3, out_r3, qmat_r3
vmulq out_r4, out_r4, qmat_r4
sqv out_r0[0], 0(iq_out_dat)
vmulq out_r5, out_r5, qmat_r5
sqv out_r1[0], 16(iq_out_dat)
vmulq out_r6, out_r6, qmat_r6
sqv out_r2[0], 32(iq_out_dat)
vmulq out_r7, out_r7, qmat_r7
sqv out_r3[0], 48(iq_out_dat)
sqv out_r4[0], 64(iq_out_dat)
#else
vmulq out_r0, out_r0, qmat_r0
vmacq out_r0, vzero, vzero
vmulq out_r1, out_r1, qmat_r1
vmacq out_r1, vzero, vzero
vmulq out_r2, out_r2, qmat_r2
vmacq out_r2, vzero, vzero
sqv out_r0[0], 0(iq_out_dat)
vmulq out_r3, out_r3, qmat_r3
vmacq out_r3, vzero, vzero
sqv out_r1[0], 16(iq_out_dat)
vmulq out_r4, out_r4, qmat_r4
vmacq out_r4, vzero, vzero
sqv out_r2[0], 32(iq_out_dat)
vmulq out_r5, out_r5, qmat_r5
vmacq out_r5, vzero, vzero
sqv out_r3[0], 48(iq_out_dat)
vmulq out_r6, out_r6, qmat_r6
vmacq out_r6, vzero, vzero
sqv out_r4[0], 64(iq_out_dat)
vmulq out_r7, out_r7, qmat_r7
vmacq out_r7, vzero, vzero
#endif
sqv out_r5[0], 80(iq_out_dat)
sqv out_r6[0], 96(iq_out_dat)
sqv out_r7[0], 112(iq_out_dat)
blez iq_in_cbp, iquant_done
ssv dc[0], 0(iq_out_dat)
j iquant_loop
addi iq_out_dat, iq_out_dat, 128
#include "iquant_un.h"