src.s
7.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
/**************************************************************************
* *
* Copyright (C) 1994, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
*************************************************************************/
/*
* File: src.s
* Creator: byron@engr.sgi.com
* Create Date: around 6/94
*
* This file contains the code for reampling and volume scaling
*
*/
#include "../graphics/rsp.h"
.base RSPBOOTBASE
#include "src_regs.h"
#include "src_dmem.h"
# -------------------------------------------------------------------------
init:
#
# first we load constants...
#
addi dmembase, zero, 0
addi $2, zero, 0
addi $3, zero, eval(NUM_OF_CONSTS - 1)
mtc0 dmembase, DMA_CACHE
mtc0 $2, DMA_DRAM
mtc0 $3, DMA_READ_LENGTH
wait1: bc0t wait1 # wait for transfer to complete
nop
#
# next, load channel parameters...
#
addi dmembase, zero, ICA
addi $2, zero, ICA
addi $3, zero, eval(NUM_OF_CH_PARMS - 1)
mtc0 dmembase, DMA_CACHE
mtc0 $2, DMA_DRAM
mtc0 $3, DMA_READ_LENGTH
wait2: bc0t wait2 # wait for transfer to complete
nop
#
# next, load coef table
#
addi dmembase, zero, COEF_TAB
addi $2, zero, COEF_TAB
addi $3, zero, eval(COEF_TAB_SIZEB - 1)
mtc0 dmembase, DMA_CACHE
mtc0 $2, DMA_DRAM
mtc0 $3, DMA_READ_LENGTH
wait3: bc0t wait3 # wait for transfer to complete
nop
#
# next, load channel parameters...
#
addi dmembase, zero, INPUT_SND_DATA
addi $2, zero, INPUT_SND_DATA
addi $3, zero, eval(SND_BUFF_SIZEB - 1)
mtc0 dmembase, DMA_CACHE
mtc0 $2, DMA_DRAM
mtc0 $3, DMA_READ_LENGTH
wait4: bc0t wait4 # wait for transfer to complete
nop
lqv vMULINC[0], C_MULINC(zero)
lqv vONE[0], C_ONE(zero)
lqv vSHIFT[0], C_SHIFT(zero)
lqv vZEROFOUR[0], C_ZEROFOUR(zero)
lqv vONEFIVE[0], C_ONEFIVE(zero)
lqv vTWOSIX[0], C_TWOSIX(zero)
lqv vTHREESEVEN[0], C_THREESEVEN(zero)
lqv vRAMP, C_RAMP(zero)
addi iCAptr, zero, ICA
addi CPHptr, zero, CPH
addi tVol, zero, VT
addi loopctl, zero, OUTPUT_COUNT
lh loopctl, 0(loopctl)
lqv vfcvol, FCVOL(zero)
lqv vicvol, ICVOL(zero)
lqv vchparmz, VT(zero)
# -------------------------------------------------------------------------
#
# to do:
# - design better filter table(s)
# - linear interpolation between filter phases or
# - rounding to filter phase
# - scripted envelope segments
# - lfo's
# - filters
# - double precision dot product
#
# - clean up vector usage
# these guys can share registers:
# vRAMP
# vMULINC
# vica
# vphaddr
# the following can be consolidated if other operations like
# pitch inc/ramps and env scripting for next 8-frame can be
# interleaved with current frame.
# vdata0-3
# vcoef0-3
# vout0-3
#
#
# -------------------------------------------------------------------------
initVol: # establish the initial volume ramp...
lh tVol, 2(tVol)
andi tVol, tVol, 0x4000 # check integer bit of target volume
vmudn vtemp0, vicvol, vchparmz[1] # scale int vol by rate
vmadl vtemp0, vfcvol, vchparmz[1] # scale frac vol by rate
vmadm vtemp1, vicvol, vSHIFT[0] # save ms vol*rate
vmudn vtemp0, vtemp0, vSHIFT[3] # shift ls left 2
vmadh vtemp1, vtemp1, vSHIFT[3] # shift ms left 2
vsubc vtemp0, vtemp0, vfcvol # get ls delta vol
vsub vtemp1, vtemp1, vicvol # get ms delta vol
vmudl vtemp0, vRAMP, vtemp0[7] # load delta vol*ramp
vmadn vtemp0, vRAMP, vtemp1[7]
vmadm vtemp1, vRAMP, vSHIFT[0]
vmadm vfcvol, vONE, vfcvol[7] # add curr frac vol
vmadh vicvol, vONE, vicvol[7] # add curr int vol
vmadn vfcvol, vONE, vSHIFT[0] # save curr frac vol
# -------------------------------------------------------------------------
incCA: vmudm vfco, vONE, vfco[7] # load last inc'd vfco in acc[15-0]
vmadh vfco, vONE, vico[7] # load last inc'd vico in acc[31-16]
vmadm vico, vMULINC, vchparmz[4] # inc CO 8 times
vmadn vfco, vMULINC, vSHIFT[0] # put right value in vfco
vmudn vica, vONE, vchparmz[2] # load sound base address into acc[31-16]
vmadn vica, vico, vSHIFT[2] # base address + (current offset << 2)
incPH:
vmudl vphaddr, vfco, vchparmz[5] # find initial phase addr
vmudn vphaddr, vphaddr, vSHIFT[4]
vmadn vphaddr, vONE, vchparmz[3] # add base coef table addr
stAddr: sqv vica[0], 0(iCAptr)
sqv vphaddr[0], 0(CPHptr)
# -------------------------------------------------------------------------
# note that the address increment stuff (from incCA above) is duplicated
# in the main loop below to take advantage of cp parallelism
lh CAptr, 0(iCAptr)
loop: vmudm vfco, vONE, vfco[7]
lh PHptr, 0(CPHptr)
vmadh vfco, vONE, vico[7]
ldv vdata0[0], 0(CAptr)
vmadm vico, vMULINC, vchparmz[4]
ldv vcoef0[0], 0(PHptr)
vmadn vfco, vMULINC, vSHIFT[0]
lh CAptr, 8(iCAptr)
vmudn vica, vONE, vchparmz[2]
lh PHptr, 8(CPHptr)
vmadn vica, vico, vSHIFT[2]
ldv vdata0[8], 0(CAptr)
vmudl vphaddr, vfco, vchparmz[5]
ldv vcoef0[8], 0(PHptr)
lh CAptr, 2(iCAptr)
lh PHptr, 2(CPHptr)
ldv vdata1[0], 0(CAptr)
vmudn vphaddr, vphaddr, vSHIFT[4]
ldv vcoef1[0], 0(PHptr)
lh CAptr, 10(iCAptr)
lh PHptr, 10(CPHptr)
vmadn vphaddr, vONE, vchparmz[3]
ldv vdata1[8], 0(CAptr)
ldv vcoef1[8], 0(PHptr)
lh CAptr, 4(iCAptr)
lh PHptr, 4(CPHptr)
ldv vdata2[0], 0(CAptr)
ldv vcoef2[0], 0(PHptr)
lh CAptr, 12(iCAptr)
lh PHptr, 12(CPHptr)
ldv vdata2[8], 0(CAptr)
ldv vcoef2[8], 0(PHptr)
lh CAptr, 6(iCAptr)
vol: vmudl vtemp0, vfcvol, vchparmz[1]
lh PHptr, 6(CPHptr)
vmadn vtemp0, vicvol, vchparmz[1]
ldv vdata3[0], 0(CAptr)
ldv vcoef3[0], 0(PHptr)
lh CAptr, 14(iCAptr)
lh PHptr, 14(CPHptr)
ldv vdata3[8], 0(CAptr)
ldv vcoef3[8], 0(PHptr)
# for some reason, I cannot move the following instructions up in parallel
# the loads without breaking the envelope; should track down some day
vmadm vtemp1, vicvol, vSHIFT[0]
vmudn vfcvol, vtemp0, vSHIFT[3]
bgtz tVol, atkVol # if integer bit set, volume is increasing
vmadh vicvol, vtemp1, vSHIFT[3]
vge vicvol, vicvol, vchparmz[0]
j blab
vmulf vout0, vdata0, vcoef0 # copy of instruction before blab
atkVol: vcl vicvol, vicvol, vchparmz[0]
vmulf vout0, vdata0, vcoef0
blab: vmulf vout1, vdata1, vcoef1
vmulf vout2, vdata2, vcoef2
vmulf vout3, vdata3, vcoef3
vadd vout0, vout0, vout0[1q]
vadd vout1, vout1, vout1[1q]
vadd vout2, vout2, vout2[1q]
vadd vout3, vout3, vout3[1q]
vadd vout0, vout0, vout0[2h]
vadd vout1, vout1, vout1[2h]
vadd vout2, vout2, vout2[2h]
vadd vout3, vout3, vout3[2h]
vmudn vout, vZEROFOUR, vout0[0h]
vmadn vout, vONEFIVE, vout1[0h]
vmadn vout, vTWOSIX, vout2[0h]
vmadn vout, vTHREESEVEN, vout3[0h]
addi OutPtr, OutPtr, 0x10
addi loopctl, loopctl, -8
sqv vica[0], 0(iCAptr)
vmulf vout, vout, vicvol sqv vphaddr[0], 0(CPHptr)
blez loopctl, done
sqv vout, 0(OutPtr)
j loop
lh CAptr, 0(iCAptr) # copy of beginning of loop
done:
#
# dump output back to dram
#
addi dmembase, zero, OUTPUT_SND_DATA
addi $2, zero, OUTPUT_SND_DATA
addi $3, zero, eval(SND_BUFF_SIZEB - 1)
mtc0 dmembase, DMA_CACHE
mtc0 $2, DMA_DRAM
mtc0 $3, DMA_WRITE_LENGTH
nop
nop
break