iquant.s
3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/*
* iquant.s Thu Aug 3 18:08:27 PDT 1995
*
*
* inverse quantization for MPEG
* Inputs: iq_in_dat - Input data address
* iq_out_dat - Output data address (can be the same as in_dat)
* iq_in_cbp - Coded Block Pattern
* iq_in_intra - Intra block flag
* iq_in_quant - Quant scale factor address
*
* Outputs:
* data at iq_out_dat - IQ'ed data
*/
#include "iquant.h"
iq: nop
nop
iquant_init:
vxor vconsts, vconsts, vconsts
# setup addresses
addi consts_addr, rzero, IQ_CONST_BASE
# load vconsts
lqv vconsts, IQ_CONSTS(consts_addr)
bne iq_in_intra, rzero, iqmat
nop
niqmat:
addi qmat_addr, consts_addr, NONI_QMAT
j load_qmat
nop
iqmat:
addi qmat_addr, consts_addr, INTRA_QMAT
load_qmat:
lqv qmat_r0, 0(qmat_addr)
lqv qmat_r1, 16(qmat_addr)
lqv qmat_r2, 32(qmat_addr)
lqv qmat_r3, 48(qmat_addr)
lqv qmat_r4, 64(qmat_addr)
lqv qmat_r5, 80(qmat_addr)
lqv qmat_r6, 96(qmat_addr)
lqv qmat_r7, 112(qmat_addr)
# get qmat scale factor
lsv vconsts[QUANT_LOAD], 0(iq_in_quant)
### quant scaling ###
vmudh qscale, vconsts, vconsts[QUANT]
iquant_loop:
nop
nop
andi dummy, iq_in_cbp, 1
bgtz dummy, iquant_do
srl iq_in_cbp, iq_in_cbp, 1
iquant_ret:
blez iq_in_cbp, iquant_done
nop
addi iq_in_dat, iq_in_dat, 128
addi iq_out_dat, iq_out_dat, 128
j iquant_loop
nop
iquant_done:
jr return
nop
nop
iquant_do:
nop
nop
# get in data (8x8 matrix)
lqv in_r0, 0(iq_in_dat)
lqv in_r1, 16(iq_in_dat)
lqv in_r2, 32(iq_in_dat)
lqv in_r3, 48(iq_in_dat)
lqv in_r4, 64(iq_in_dat)
lqv in_r5, 80(iq_in_dat)
lqv in_r6, 96(iq_in_dat)
lqv in_r7, 112(iq_in_dat)
### get DC term ###
vmudh dc, in_r0, vconsts[SHIFT_L]
row_scale:
vmudh out_r0, in_r0, qscale[SCALE2]
vmudh out_r1, in_r1, qscale[SCALE2]
vmudh out_r2, in_r2, qscale[SCALE2]
vmudh out_r3, in_r3, qscale[SCALE2]
vmudh out_r4, in_r4, qscale[SCALE2]
vmudh out_r5, in_r5, qscale[SCALE2]
vmudh out_r6, in_r6, qscale[SCALE2]
vmudh out_r7, in_r7, qscale[SCALE2]
bne iq_in_intra, rzero, iquant
nop
### calculate sign (stored in in_r?) ###
get_sign:
vabs in_r0, in_r0, qscale[SCALE]
vabs in_r1, in_r1, qscale[SCALE]
vabs in_r2, in_r2, qscale[SCALE]
vabs in_r3, in_r3, qscale[SCALE]
vabs in_r4, in_r4, qscale[SCALE]
vabs in_r5, in_r5, qscale[SCALE]
vabs in_r6, in_r6, qscale[SCALE]
vabs in_r7, in_r7, qscale[SCALE]
### add sign ###
add_sign:
vadd out_r0, in_r0, out_r0
vadd out_r1, in_r1, out_r1
vadd out_r2, in_r2, out_r2
vadd out_r3, in_r3, out_r3
vadd out_r4, in_r4, out_r4
vadd out_r5, in_r5, out_r5
vadd out_r6, in_r6, out_r6
vadd out_r7, in_r7, out_r7
iquant: vmulq out_r0, out_r0, qmat_r0
vmacq out_r0, vzero, vzero
vmulq out_r1, out_r1, qmat_r1
vmacq out_r1, vzero, vzero
vmulq out_r2, out_r2, qmat_r2
vmacq out_r2, vzero, vzero
vmulq out_r3, out_r3, qmat_r3
vmacq out_r3, vzero, vzero
vmulq out_r4, out_r4, qmat_r4
vmacq out_r4, vzero, vzero
vmulq out_r5, out_r5, qmat_r5
vmacq out_r5, vzero, vzero
vmulq out_r6, out_r6, qmat_r6
vmacq out_r6, vzero, vzero
vmulq out_r7, out_r7, qmat_r7
vmacq out_r7, vzero, vzero
### align results ###
vmudm out_r0, out_r0, vconsts[SHIFT_R]
vmudm out_r1, out_r1, vconsts[SHIFT_R]
vmudm out_r2, out_r2, vconsts[SHIFT_R]
vmudm out_r3, out_r3, vconsts[SHIFT_R]
vmudm out_r4, out_r4, vconsts[SHIFT_R]
vmudm out_r5, out_r5, vconsts[SHIFT_R]
vmudm out_r6, out_r6, vconsts[SHIFT_R]
vmudm out_r7, out_r7, vconsts[SHIFT_R]
### store results ###
sqv out_r0, 0(iq_out_dat)
sqv out_r1, 16(iq_out_dat)
sqv out_r2, 32(iq_out_dat)
sqv out_r3, 48(iq_out_dat)
sqv out_r4, 64(iq_out_dat)
sqv out_r5, 80(iq_out_dat)
sqv out_r6, 96(iq_out_dat)
sqv out_r7, 112(iq_out_dat)
ssv dc[0], 0(iq_out_dat)
nop
nop
j iquant_ret
nop
#include "iquant_un.h"