gmtx.s
15.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
############################################################################
#
# Process the G_MTX command.
#
# in_bufp holds pointer to data
#
#define FASTMTX
#ifdef FASTMTX
.name param, $1
.name do_load, $7
.name do_proj, $8 # flag for if we're doing PROJECTION...
.name mstack_sz, $12 # whichever matrix stack size we need...
.name mstack_p, $19 # pointer to whichever matrix stack we need...
.name mat_p, $20 # pointer to whichever mat_p we are using...
.name mtx0, $v29
.name mtx1, $v28
.name mtx2, $v27
.name mtx3, $v26
case_G_MTX:
sbv vconst[6],RSP_STATE_L_LEN(rsp_state) # hi bit causes light recalc
andi do_proj, param, G_MTX_PROJECTION # doing projection matix?
bne do_proj, zero, mtx_StartProj # if yes, do projection
andi do_load, param, G_MTX_LOAD # do load or mul?
### BRANCH OCCURS TO startProj: IF DOING PROJECTION MATRIX
.unname do_proj
.name do_push, $8
addi mat_p, zero, RSP_CURR_MMTX_OFFSET # point to model matrix
andi do_push, param, G_MTX_PUSH # do push or not?
beq do_push, zero, mtx_PushDone # skip push if not
lqv mtx3[0], 48(in_bufp) # get part 4/4 of matrix
### BRANCH OCCURS TO mtx_PushDone: IF NOT PUSHING
.unname do_push
.name mstack_max, $8
.unname param
.name mstack_next, $1
lw mstack_p, RSP_STATE_MMTX_STACK_P(rsp_state) # top of matrix stack
lw mstack_max, RSP_STATE_MMTX_STACK_MAX(rsp_state) # end of matrix stack
addi $17, zero, 1 # do a DMA WRITE
addi mstack_next,mstack_p, 64 # point to next spot on stack
beq mstack_p, mstack_max, mtx_PushDone # skip if stack full
addi mstack_sz, zero, 63 # 64 byte matrix
### BRANCH OCCURS TO mtx_PushDone IF STACK FULL
jal DMAproc # dma current matrix to stack
sw mstack_next, RSP_STATE_MMTX_STACK_P(rsp_state) # update stack pointer
### BRANCH OCCURS TO SUBROUTINE DMAproc:
jal DMAwait # wait for DMA to finish
.unname mstack_next
.name Mp, $1 ###########################
.name Tp, $2 ## IMPORTANT ##
.name Np, $3 ## These register names ##
.name Ni, $v5 ## should match the ones ##
.name Nf, $v6 ## in the MatCat routine ##
###########################
mtx_PushDone:
lqv mtx1[0], 16(in_bufp) # get part 2/4 of matrix
beq do_load, zero, mtx_Mul # branch to mul if multiplying
lqv mtx2[0], 32(in_bufp) # get part 3/4 of matrix
### BRANCH OCCURS TO mtx_Mul: IF MULTIPLYING NEW MATRIX
sqv mtx3[0], 48(mat_p) # store part 4/4 of matrix
lqv mtx0[0], 0(in_bufp) # load part 1/4 of matrix
sqv mtx1[0], 16(mat_p) # store part 2/4 of matrix
mtx_Store:
addi $3, zero, RSP_CURR_MPMTX_OFFSET # where to put MP matrix
sqv mtx2[0], 32(mat_p) # store part 3/4 of matrix
sqv mtx0[0], 0(mat_p) # store part 1/4 of matrix
mtx_MxP:
addi $1, zero, RSP_CURR_MMTX_OFFSET # Model matrix ptr for MP mult
addi $2, zero, RSP_CURR_PMTX_OFFSET # Project matrix ptr for MP mult
j MatCat # concatenate M and P matrix
lh return, GFXDONE(zero) # return to GfxDone
mtx_StartProj:
lqv mtx3[0], 48(in_bufp) # get part 4/4 of matrix
j mtx_PushDone # return to load & multiply proj mtx
addi mat_p, zero, RSP_CURR_PMTX_OFFSET # point to projection matrix
### BRANCH OCCURS TO mtx_PushDone:
mtx_Mul:
addiu $3, zero, ((RSP_SCRATCH_OFFSET+15) & 0xfffffff0) # put multiplied mtx here
addu $1, zero, in_bufp # ptr to mew matrix
jal MatCat # concatenate new and old matrix
addu $2, zero, mat_p # old matrix pointer
### BRANCH OCCURS TO SUBROUTINE MatCat:
sqv Nf[0], 48(mat_p) # store part 4/4 of mult'd matrix
sqv Ni[0], 16(mat_p) # store part 2/4 of mult'd matrix
lqv mtx2[0], 0(Np) # get part 3/4 of mult'd matrix
j mtx_Store # continue storing mult'd matrix
lqv mtx0[0], -32(Np) # get part 1/4 of mult'd matrix
### BRANCH OCCURS TO mtx_Store:
.unname do_load
.unname mstack_sz
.unname mstack_p
.unname mat_p
.unname mtx0
.unname mtx1
.unname mtx2
.unname mtx3
.unname mstack_max
.unname Mp
.unname Tp
.unname Np
.unname Ni
.unname Nf
#else /* FASTMTX ***********************************************************/
.name param, $5
case_G_MTX:
add param, zero, $1 # move param somewhere safe
.name tmp, $7
.name do_proj, $8 # flag for if we're doing PROJECTION...
.name mmat_p, $9 # pointer to MODELVIEW top of stack in DMEM
.name pmat_p, $10 # pointer to PROJECTION top of stack in DMEM
.name mpmat_p, $11 # pointer to MODELVIEW*PROJECTION in DMEM
.name mstack_sz, $12 # whichever matrix stack size we need...
.name np, $13
.name pp, $14
.name mstack_p, $19 # pointer to whichever matrix stack we need...
.name mat_p, $20 # pointer to whichever mat_p we are using...
.name mtx0, $v29
.name mtx1, $v28
.name mtx2, $v27
.name mtx3, $v26
.ent case_G_MTX
# get stack and matrix pointers...
addi np, rsp_state, RSP_STATE_MMTX_N
addi pp, rsp_state, RSP_STATE_MMTX_STACK_P
addi mmat_p, zero, RSP_CURR_MMTX_OFFSET
addi pmat_p, zero, RSP_CURR_PMTX_OFFSET
addu mat_p, mmat_p, zero
#
# check for PROJECTION matrix, correct pointers for the stacks, etc...
andi do_proj, param, G_MTX_PROJECTION
beq do_proj, zero, mtx_TryPush
andi tmp, param, G_MTX_PUSH # delay slot...
addu mat_p, pmat_p, zero
# THERE IS NO PROJECTION MTX STACK!
# Mon Jan 2 17:00:43 PST 1995
addi tmp, zero, 0 # don't allow PROJECTION push!
mtx_TryPush:
beq tmp, zero, mtx_NoPush
# note delay slot
lw mstack_p, 0(pp)
lb mstack_sz, 0(np)
# push current matrix before doing anything...
.name ten, $15
.name mat_sz, $18
# check matrix stack depth, bail if > 10
addi ten, zero, 10
beq ten, mstack_sz, mtx_PushDone
addi mat_sz, zero, 63 # delay slot
# fire off DMA transfer (do we need to wait? YES!)
jal DMAproc
addi $17, zero, 1
addi mstack_p, mstack_p, 64
jal DMAwait # wait for DMA
addi mstack_sz, mstack_sz, 1
mtx_PushDone:
sw mstack_p, 0(pp)
sb mstack_sz, 0(np)
.unname mstack_p
.unname mstack_sz
.unname np
.unname pp
.unname mat_sz
.unname ten
.name out_p, $12
.name four, $13
.name i, $14
.name jj, $15
.name in_p, $16
#.name tmpi, $17
#.name tmpf, $18
.name tmpdata, $v1
mtx_NoPush:
addi mpmat_p, zero, RSP_CURR_MPMTX_OFFSET # MOVED FROM ABOVE
#
# determine where to load the incoming matrix. Either to
# scratch (for MUL case), or directly to the right place (LOAD)...
#
addi i, zero, 15
addiu out_p, zero, ((RSP_SCRATCH_OFFSET+15) & 0xfffffff0)
andi tmp, param, G_MTX_LOAD
beq tmp, zero, p_loop # branch if MUL
addu in_p, in_bufp, zero # in delay slot
addu out_p, mat_p, zero
# load incoming matrix:
p_loop: llv tmpdata[0], 0(in_p) # read int, frac
addi in_p, in_p, 4
ssv tmpdata[0], 0(out_p) # write int
ssv tmpdata[2], 32(out_p) # write frac
addi out_p, out_p, 2
bgtz i, p_loop
addi i, i, -1 # delay slot
sb i,RSP_L_NUM(zero) # set sign bit (for lighting)
bne tmp, zero, mtx_MxP # branch if LOAD
# note delay slot
.unname tmpdata
#.unname tmpi
#.unname tmpf
#
# this code assumes we can't MUL on a projection matrix.
# a reasonable assumption, but we might remove it later if
# code space permits (ha!).
#
# Order of multiply: C' = MC
# where C is current, and M is the new (incoming) matrix
#
# fill $1, $2, $3 (assumes no save of $31 needed!)
addu $3, mpmat_p, zero # delay slot
addiu $1, zero, ((RSP_SCRATCH_OFFSET+15) & 0xfffffff0)
jal MatCat
addu $2, mat_p, zero # delay slot
# read matrix back, restore $31
# quad loads require proper alignment...
lqv mtx0[0], 0(mpmat_p) # move to correct DMEM
lqv mtx1[0], 16(mpmat_p) # 2 rows at a time...
lqv mtx2[0], 32(mpmat_p)
lqv mtx3[0], 48(mpmat_p)
sqv mtx0[0], 0(mat_p) # store back to DMEM
sqv mtx1[0], 16(mat_p)
sqv mtx2[0], 32(mat_p)
sqv mtx3[0], 48(mat_p)
.unname out_p
.unname four
.unname i
.unname jj
.unname in_p
# compute MxP, ...
mtx_MxP:
# fill $1, $2, $3 (assumes no save of $31 needed!)
addi $3, zero, RSP_CURR_MPMTX_OFFSET
addi $1, zero, RSP_CURR_MMTX_OFFSET
jal MatCat
addi $2, zero, RSP_CURR_PMTX_OFFSET #delay slot
j GfxDone # all finished.
nop
.end case_G_MTX
.unname tmp
.unname do_proj
.unname mmat_p
.unname pmat_p
.unname mpmat_p
.unname mat_p
.unname mtx0
.unname mtx1
.unname mtx2
.unname mtx3
.unname param
#endif /* FASTMTX */
/******* utility routines *****/
#####################################################################
#
# 4x4 matrix multiply routine: N = MT
# At this point,
# $1 holds DMEM pointer to M (row major)
# $2 holds DMEM pointer to T (row major)
# $3 holds DMEM pointer to N (row major)
#
# Uses registers: $18, $19, $16
# $v1, $v2, $v3, $v4, $v5, $v6
#
# WARNING: Does the write while we compute, so don't let
# the destination be the same as one of the sources.
#
# WARNING: Be careful these registers used don't overlap with
# any global registers.
#
# the following implementation is optimized for code space.
#
#define FASTMATCAT
#ifdef FASTMATCAT
.name Mp, $1 # pointer to M matrix ##########################
.name Tp, $2 # pointer to T matrix ## IMPORTANT ##
.name Np, $3 # pointer to N matrix ## These register names ##
.name Ni, $v5 # N element integer (summed vector) ## must match the names ##
.name Nf, $v6 # N element frac (summed vector) ## in the mtx routine ##
##########################
.name Mdone, $18 # loop ends when this == Mp (4 loops)
.name ALLdone, $19 # function ends when this == Np (2 loops)
.name Mi, $v1 # M row integer (vector)
.name Mf, $v2 # M row frac (vector)
.name Ti, $v3 # T col integer (vector)
.name Tf, $v4 # T col frac (vector)
.ent MatCat
MatCat:
addi ALLdone,Np,16 # did whole thing when Np == Np0 + 16 (2 loops
matCatHalf:
vmudh Ni,vconst,vconst[0] # zero accumulator
addi Mdone,Mp,8 # inner loop ends when Mp == Mp0 + 8 (4 loops)
matCatRow:
ldv Ti[0], 0(Tp) # load row of T mtx (2 times)
ldv Tf[0], 32(Tp) #
lqv Mi[0], 0(Mp) # load same column element of 2 rows of M mtx
lqv Mf[0], 32(Mp) #
ldv Ti[8], 0(Tp) # load another copy of same T mtx row
ldv Tf[8], 32(Tp) #
vmadl Nf, Tf, Mf[0h] # multiply 2 M elemnts * 2 T rows
addi Mp, Mp, 2 # next Mp element
vmadm Nf, Ti, Mf[0h] #
addi Tp, Tp, 8 # next Tp element
vmadn Nf, Tf, Mi[0h] #
vmadh Ni, Ti, Mi[0h] #
bne Mp, Mdone, matCatRow # Loop over 4 column elements
vmadn Nf, vconst, vconst[0] # needed to obtain sign of frac
### LOOP OCCURS 4 TIMES TO matCatRow:
addi Tp, Tp, -32 # start at top of T matrix again
addi Mp, Mp, 8 # do bottom half of mtx M & mtx N
sqv Ni[0], 0(Np) # store half of completed matrix
sqv Nf[0], 32(Np) # store half of completed matrix
bne Np, ALLdone, matCatHalf # finished after 2nd half done
addi Np, Np, 16 # ready to calculate bottom half of Matrix N
### LOOP OCCURS HERE 2 TIMES TO matCatHalf:
jr return
nop
.end MatCat
.unname Mp
.unname Tp
.unname Np
.unname Mdone
.unname ALLdone
.unname Mi
.unname Mf
.unname Ti
.unname Tf
.unname Ni
.unname Nf
#else /* FASTMATCAT */
.name Mp, $1 # pointer to M matrix
.name Tp, $2 # pointer to T matrix
.name Np, $3 # pointer to N matrix
.name moff, $18 # row offset for M (and N) (also loop counter)
.name toff, $19 # col offset for T (and N) (also loop counter)
.name taddr, $16 # temporary address register
.name Mi, $v1 # M row integer (vector)
.name Mf, $v2 # M row frac (vector)
.name Ti, $v3 # T col integer (vector)
.name Tf, $v4 # T col frac (vector)
.name Ni, $v5 # N element integer (summed vector)
.name Nf, $v6 # N element frac (summed vector)
.ent MatCat
MatCat:
addi moff, zero, 24 # could be programmed for vtx
#
# loop over rows in M: (last to first)
#
rowloop: add taddr, Mp, moff # get M row
ldv Mf, 32(taddr)
ldv Mi, 0(taddr)
addi toff, zero, 6 # counter is byte offset
#
# loop over columns in T: (last to first)
#
colloop: add taddr, Tp, toff # get T column
lsv Ti[0], ( 0 + 0)(taddr)
lsv Tf[0], ( 0 + 32)(taddr)
lsv Ti[2], ( 8 + 0)(taddr)
lsv Tf[2], ( 8 + 32)(taddr)
lsv Ti[4], (16 + 0)(taddr)
lsv Tf[4], (16 + 32)(taddr)
lsv Ti[6], (24 + 0)(taddr)
lsv Tf[6], (24 + 32)(taddr)
# multiply vectors (compute N pointer at same time...)
vmudl Nf, Mf, Tf
add taddr, Np, moff
vmadm Nf, Mi, Tf
add taddr, taddr, toff
vmadn Nf, Mf, Ti
vmadh Ni, Mi, Ti
# sum partial products (and decrement loop count)
addi toff, toff, -2
vaddc Nf, Nf, Nf[1q]
vadd Ni, Ni, Ni[1q]
vaddc Nf, Nf, Nf[2h]
vadd Ni, Ni, Ni[2h]
# store N
ssv Nf[0], 32(taddr)
bgez toff, colloop
ssv Ni[0], 0(taddr) # delay slot
addi moff, moff, -8
bgez moff, rowloop
nop
jr return
nop
.end MatCat
.unname Mp
.unname Tp
.unname Np
.unname toff
.unname taddr
.unname moff
.unname Mi
.unname Mf
.unname Ti
.unname Tf
.unname Ni
.unname Nf
#endif /* FASTMATCAT */
#
# end of matrix multiply...
#
#######################################################################
# ########################### CALLED BY VTX AND CLIP #####################
# This routine loads vpscale and vptrans with the screen scalefactors.
.name gmode, $3
.name voutp, $7
.name vtmp, $v3
.name vpscale, $v0
.name vptrans, $v1
.name vp, $8
.name wscl, $v19
.ent getScaleTrans
getScaleTrans:
# get OpenGL scale
addi vp, zero, RSP_VIEWPORT_OFFSET # dmembase repl by 0
lqv vtmp[0], VOPENGL_OFFSET(zero) # dmembase repl by 0
lsv wscl[0], RSP_STATE_PERSPNORM(rsp_state)
lh gmode,(RSP_STATE_RENDER)(rsp_state) # for fog
# load the viewport. Remember that these guys have 1 bit of
# fraction, so we must account for that later...
#
ldv vpscale[0], RSP_VIEWPORT_SX(vp)
ldv vptrans[0], RSP_VIEWPORT_TX(vp)
ldv vpscale[8], RSP_VIEWPORT_SX(vp)
ldv vptrans[8], RSP_VIEWPORT_TX(vp)
# correct the vpscale to match OpenGL... (bogus)
jr return
vmudh vpscale, vpscale, vtmp
.end getScaleTrans
.unname wscl
.unname vp
.unname gmode
.unname voutp
.unname vtmp
.unname vpscale
.unname vptrans
# ########################### CALLED BY VTX AND FLIGHT ###################
# This routine loads the matrix
.name tmp, $8
.name mtx0, $v8
.name mtx1, $v9
.name mtx2, $v10
.name mtx3, $v11
.name mtf0, $v12
.name mtf1, $v13
.name mtf2, $v14
.name mtf3, $v15
.ent getMatrix
getMatrix:
# get MP matrix: (load twice)
addi tmp, zero, RSP_CURR_MPMTX_OFFSET
ldv mtx3[0], 24(tmp)
ldv mtx3[8], 24(tmp)
ldv mtf3[0], 56(tmp)
ldv mtf3[8], 56(tmp)
getMatrix3:
ldv mtx0[0], 0(tmp)
ldv mtx1[0], 8(tmp)
ldv mtx2[0], 16(tmp)
ldv mtf0[0], 32(tmp)
ldv mtf1[0], 40(tmp)
ldv mtf2[0], 48(tmp)
ldv mtx0[8], 0(tmp)
ldv mtx1[8], 8(tmp)
ldv mtx2[8], 16(tmp)
ldv mtf0[8], 32(tmp)
ldv mtf1[8], 40(tmp)
jr return
ldv mtf2[8], 48(tmp)
.end getMatrix
.unname tmp
.unname mtx0
.unname mtx1
.unname mtx2
.unname mtx3
.unname mtf0
.unname mtf1
.unname mtf2
.unname mtf3