remap.s
4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#########################################################################
#
# Remap the DCT outputs to give 64 values appropriately mirror-imaged
# for the Overlap-Add stage
#
# NOTES:
# ------
#
# - This may be better off coupled into the DCT itself
#
# - This is another case were load/store alternates would help
.ent remap
.name out0l, $v1
.name out1l, $v2
.name out2l, $v3
.name out3l, $v4
.name out0h, $v5
.name out1h, $v6
.name out2h, $v7
.name out3h, $v8
.name vzero, $v9
.name rmapI, $1
.name rmapOl, $2
.name rmapOh, $3
Remap:
addi rmapI, zero, RSP_DCTOUT_OFFSET
addi rmapOh, zero, RSP_OLAIN_OFFSET
addi rmapOl, rmapOh, 128
lsv out0h[0], 0(rmapI)
lsv out0h[2], 4(rmapI)
lsv out0h[4], 8(rmapI)
lsv out0h[6], 12(rmapI)
lsv out0h[8], 16(rmapI)
lsv out0h[10], 20(rmapI)
lsv out0h[12], 24(rmapI)
lsv out0h[14], 28(rmapI)
lsv out0l[0], 2(rmapI)
lsv out0l[2], 6(rmapI)
lsv out0l[4], 10(rmapI)
lsv out0l[6], 14(rmapI)
lsv out0l[8], 18(rmapI)
lsv out0l[10], 22(rmapI)
lsv out0l[12], 26(rmapI)
lsv out0l[14], 30(rmapI)
addi rmapI, rmapI, 32
lsv out1h[0], 0(rmapI)
lsv out1h[2], 4(rmapI)
lsv out1h[4], 8(rmapI)
lsv out1h[6], 12(rmapI)
lsv out1h[8], 16(rmapI)
lsv out1h[10], 20(rmapI)
lsv out1h[12], 24(rmapI)
lsv out1h[14], 28(rmapI)
lsv out1l[0], 2(rmapI)
lsv out1l[2], 6(rmapI)
lsv out1l[4], 10(rmapI)
lsv out1l[6], 14(rmapI)
lsv out1l[8], 18(rmapI)
lsv out1l[10], 22(rmapI)
lsv out1l[12], 26(rmapI)
lsv out1l[14], 30(rmapI)
addi rmapI, rmapI, 32
lsv out2h[0], 0(rmapI)
lsv out2h[2], 4(rmapI)
lsv out2h[4], 8(rmapI)
lsv out2h[6], 12(rmapI)
lsv out2h[8], 16(rmapI)
lsv out2h[10], 20(rmapI)
lsv out2h[12], 24(rmapI)
lsv out2h[14], 28(rmapI)
lsv out2l[0], 2(rmapI)
lsv out2l[2], 6(rmapI)
lsv out2l[4], 10(rmapI)
lsv out2l[6], 14(rmapI)
lsv out2l[8], 18(rmapI)
lsv out2l[10], 22(rmapI)
lsv out2l[12], 26(rmapI)
lsv out2l[14], 30(rmapI)
addi rmapI, rmapI, 32
lsv out3h[0], 0(rmapI)
lsv out3h[2], 4(rmapI)
lsv out3h[4], 8(rmapI)
lsv out3h[6], 12(rmapI)
lsv out3h[8], 16(rmapI)
lsv out3h[10], 20(rmapI)
lsv out3h[12], 24(rmapI)
lsv out3h[14], 28(rmapI)
lsv out3l[0], 2(rmapI)
lsv out3l[2], 6(rmapI)
lsv out3l[4], 10(rmapI)
lsv out3l[6], 14(rmapI)
lsv out3l[8], 18(rmapI)
lsv out3l[10], 22(rmapI)
lsv out3l[12], 26(rmapI)
lsv out3l[14], 30(rmapI)
sqv out2l[0], 0(rmapOl)
sqv out2h[0], 0(rmapOh)
sqv out3l[0], 16(rmapOl)
sqv out3h[0], 16(rmapOh)
# Invert the results
vxor vzero, vconst, vconst
vsubc out2l, vzero, out2l
vsub out2h, vzero, out2h
vsubc out3l, vzero, out3l
vsub out3h, vzero, out3h
# This could be put into the DCT since I only want the -ve
vsubc out0l, vzero, out0l
vsub out0h, vzero, out0h
vsubc out1l, vzero, out1l
vsub out1h, vzero, out1h
sqv out0l[0], 96(rmapOl)
sqv out0h[0], 96(rmapOh)
sqv out1l[0], 112(rmapOl)
sqv out1h[0], 112(rmapOh)
# Now individual writes for the reversed order stuff
# out[48-i] = -dct[i], 0<=i<16
ssv out0l[2], 94(rmapOl)
ssv out0l[4], 92(rmapOl)
ssv out0l[6], 90(rmapOl)
ssv out0l[8], 88(rmapOl)
ssv out0l[10], 86(rmapOl)
ssv out0l[12], 84(rmapOl)
ssv out0l[14], 82(rmapOl)
ssv out1l[0], 80(rmapOl)
ssv out1l[2], 78(rmapOl)
ssv out1l[4], 76(rmapOl)
ssv out1l[6], 74(rmapOl)
ssv out1l[8], 72(rmapOl)
ssv out1l[10], 70(rmapOl)
ssv out1l[12], 68(rmapOl)
ssv out1l[14], 66(rmapOl)
ssv out0h[2], 94(rmapOh)
ssv out0h[4], 92(rmapOh)
ssv out0h[6], 90(rmapOh)
ssv out0h[8], 88(rmapOh)
ssv out0h[10], 86(rmapOh)
ssv out0h[12], 84(rmapOh)
ssv out0h[14], 82(rmapOh)
ssv out1h[0], 80(rmapOh)
ssv out1h[2], 78(rmapOh)
ssv out1h[4], 76(rmapOh)
ssv out1h[6], 74(rmapOh)
ssv out1h[8], 72(rmapOh)
ssv out1h[10], 70(rmapOh)
ssv out1h[12], 68(rmapOh)
ssv out1h[14], 66(rmapOh)
# out[32-i] = -dct[i], 0<=i<16
ssv out2l[0], 64(rmapOl)
ssv out2l[2], 62(rmapOl)
ssv out2l[4], 60(rmapOl)
ssv out2l[6], 58(rmapOl)
ssv out2l[8], 56(rmapOl)
ssv out2l[10], 54(rmapOl)
ssv out2l[12], 52(rmapOl)
ssv out2l[14], 50(rmapOl)
ssv out2h[0], 64(rmapOh)
ssv out2h[2], 62(rmapOh)
ssv out2h[4], 60(rmapOh)
ssv out2h[6], 58(rmapOh)
ssv out2h[8], 56(rmapOh)
ssv out2h[10], 54(rmapOh)
ssv out2h[12], 52(rmapOh)
ssv out2h[14], 50(rmapOh)
ssv out3l[0], 48(rmapOl)
ssv out3l[2], 46(rmapOl)
ssv out3l[4], 44(rmapOl)
ssv out3l[6], 42(rmapOl)
ssv out3l[8], 40(rmapOl)
ssv out3l[10], 38(rmapOl)
ssv out3l[12], 36(rmapOl)
ssv out3l[14], 34(rmapOl)
ssv out3h[0], 48(rmapOh)
ssv out3h[2], 46(rmapOh)
ssv out3h[4], 44(rmapOh)
ssv out3h[6], 42(rmapOh)
ssv out3h[8], 40(rmapOh)
ssv out3h[10], 38(rmapOh)
ssv out3h[12], 36(rmapOh)
ssv out3h[14], 34(rmapOh)
# out[16] = 0
ssv vzero[0], 32(rmapOl)
ssv vzero[0], 32(rmapOh)
j OLA
nop
.unname out0l
.unname out1l
.unname out2l
.unname out3l
.unname out0h
.unname out1h
.unname out2h
.unname out3h
.unname vzero
.unname rmapI
.unname rmapOl
.unname rmapOh
.end remap