iquant.s
4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/*
* miquant.s Thu Aug 3 18:08:27 PDT 1995
*
*
* inverse quantization for MPEG
*
*/
#include <rsp.h>
#include <rcp.h>
#include <sptask.h>
#define NONINTRA
#define DATBASE 0x04000000
.data DATBASE
#define ZSHIFT 0
### vconsts[8] = { 1<<ZSHIFT, 2<<ZSHIFT, ~1, -2047, 2048, 0, 0x1000, 0 }
.half 1
.half 2
.half 0xfffe
.half -2047
.half 2048
.half 0
.half 0x1000
.half 8
### parameters ###
.word 0 /* flags */
.word 0
.word 0 /* quant */
.word 0
### qmat ###
.space 128
### in ###
.space 128
### out ###
.space 128
##########################################################################
.base TASKBASE
### Register Usage ###
### 1. Vector Registers ###
.name qmat_r0, $v2
.name qmat_r1, $v3
.name qmat_r2, $v4
.name qmat_r3, $v5
.name qmat_r4, $v6
.name qmat_r5, $v7
.name qmat_r6, $v8
.name qmat_r7, $v9
.name in_r0, $v10
.name in_r1, $v11
.name in_r2, $v12
.name in_r3, $v13
.name in_r4, $v14
.name in_r5, $v15
.name in_r6, $v16
.name in_r7, $v17
.name out_r0, $v18
.name out_r1, $v19
.name out_r2, $v20
.name out_r3, $v21
.name out_r4, $v22
.name out_r5, $v23
.name out_r6, $v24
.name out_r7, $v25
.name dc, $v1
###################################
# .name tmp1 $v26
# .name tmp2 $v27
# .name tmp3 $v28
# .name tmp4 $v29
###################################
.name vnull, $v0
.name quants, $v30
.name vconsts, $v31
### Scalar Registers ###
.name dev_null, $0
.name intra_flag, $10
.name dat_base, $20
.name parm_addr, $21
.name consts_addr, $22
.name qmat_addr, $23
.name in_addr, $24
.name out_addr, $25
#define SCALE vconsts[0]
#define SCALE2 vconsts[1]
#define MASK vconsts[2]
#define IQ_MIN vconsts[3]
#define IQ_MAX vconsts[4]
#define QUANT vconsts[5]
#define SHIFT_R vconsts[6]
#define SHIFT_L vconsts[7]
iquant_start:
vxor vnull, vnull, vnull
vxor vconsts, vconsts, vconsts
# setup addresses
addi dat_base, $0, DATBASE
addi consts_addr, dat_base, 0
addi parm_addr, consts_addr, 16
addi qmat_addr, parm_addr, 16
addi in_addr, qmat_addr, 128
addi out_addr, in_addr, 128
# load vconsts
lqv vconsts, 0(consts_addr)
iquant_loop:
# get intra_flag
lh intra_flag, 0(parm_addr)
# get qmat scale factor
lsv vconsts[10], 8(parm_addr)
# get in and qmat data (8x8 matrix each)
lqv qmat_r0, 0(qmat_addr)
lqv qmat_r1, 16(qmat_addr)
lqv qmat_r2, 32(qmat_addr)
lqv qmat_r3, 48(qmat_addr)
lqv qmat_r4, 64(qmat_addr)
lqv qmat_r5, 80(qmat_addr)
lqv qmat_r6, 96(qmat_addr)
lqv qmat_r7, 112(qmat_addr)
lqv in_r0, 0(in_addr)
lqv in_r1, 16(in_addr)
lqv in_r2, 32(in_addr)
lqv in_r3, 48(in_addr)
lqv in_r4, 64(in_addr)
lqv in_r5, 80(in_addr)
lqv in_r6, 96(in_addr)
lqv in_r7, 112(in_addr)
### quant scaling ###
vmudh quants, vconsts, QUANT
### get DC term ###
vmudh dc, in_r0, SHIFT_L
row_scale:
vmudh out_r0, in_r0, quants[1]
vmudh out_r1, in_r1, quants[1]
vmudh out_r2, in_r2, quants[1]
vmudh out_r3, in_r3, quants[1]
vmudh out_r4, in_r4, quants[1]
vmudh out_r5, in_r5, quants[1]
vmudh out_r6, in_r6, quants[1]
vmudh out_r7, in_r7, quants[1]
bne intra_flag, dev_null, iquant
nop
#ifdef NONINTRA
### calculate sign (stored in in_r?) ###
get_sign:
vabs in_r0, in_r0, quants[0]
vabs in_r1, in_r1, quants[0]
vabs in_r2, in_r2, quants[0]
vabs in_r3, in_r3, quants[0]
vabs in_r4, in_r4, quants[0]
vabs in_r5, in_r5, quants[0]
vabs in_r6, in_r6, quants[0]
vabs in_r7, in_r7, quants[0]
### add sign ###
add_sign:
vadd out_r0, in_r0, out_r0
vadd out_r1, in_r1, out_r1
vadd out_r2, in_r2, out_r2
vadd out_r3, in_r3, out_r3
vadd out_r4, in_r4, out_r4
vadd out_r5, in_r5, out_r5
vadd out_r6, in_r6, out_r6
vadd out_r7, in_r7, out_r7
#endif
iquant: vmulq out_r0, out_r0, qmat_r0
vmacq out_r0, vnull, vnull
vmulq out_r1, out_r1, qmat_r1
vmacq out_r1, vnull, vnull
vmulq out_r2, out_r2, qmat_r2
vmacq out_r2, vnull, vnull
vmulq out_r3, out_r3, qmat_r3
vmacq out_r3, vnull, vnull
vmulq out_r4, out_r4, qmat_r4
vmacq out_r4, vnull, vnull
vmulq out_r5, out_r5, qmat_r5
vmacq out_r5, vnull, vnull
vmulq out_r6, out_r6, qmat_r6
vmacq out_r6, vnull, vnull
vmulq out_r7, out_r7, qmat_r7
vmacq out_r7, vnull, vnull
### align results ###
vmudm out_r0, out_r0, SHIFT_R
vmudm out_r1, out_r1, SHIFT_R
vmudm out_r2, out_r2, SHIFT_R
vmudm out_r3, out_r3, SHIFT_R
vmudm out_r4, out_r4, SHIFT_R
vmudm out_r5, out_r5, SHIFT_R
vmudm out_r6, out_r6, SHIFT_R
vmudm out_r7, out_r7, SHIFT_R
### store results ###
sqv out_r0, 0(out_addr)
sqv out_r1, 16(out_addr)
sqv out_r2, 32(out_addr)
sqv out_r3, 48(out_addr)
sqv out_r4, 64(out_addr)
sqv out_r5, 80(out_addr)
sqv out_r6, 96(out_addr)
sqv out_r7, 112(out_addr)
ssv dc[0], 0(out_addr)
nop
nop
j iquant_loop
nop