gtvtx.s
10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
/*
* Copyright 1995, Silicon Graphics, Inc.
* ALL RIGHTS RESERVED
*
* UNPUBLISHED -- Rights reserved under the copyright laws of the United
* States. Use of a copyright notice is precautionary only and does not
* imply publication or disclosure.
*
* U.S. GOVERNMENT RESTRICTED RIGHTS LEGEND:
* Use, duplication or disclosure by the Government is subject to restrictions
* as set forth in FAR 52.227.19(c)(2) or subparagraph (c)(1)(ii) of the Rights
* in Technical Data and Computer Software clause at DFARS 252.227-7013 and/or
* in similar or successor clauses in the FAR, or the DOD or NASA FAR
* Supplement. Contractor/manufacturer is Silicon Graphics, Inc.,
* 2011 N. Shoreline Blvd. Mountain View, CA 94039-7311.
*
* THE CONTENT OF THIS WORK CONTAINS CONFIDENTIAL AND PROPRIETARY
* INFORMATION OF SILICON GRAPHICS, INC. ANY DUPLICATION, MODIFICATION,
* DISTRIBUTION, OR DISCLOSURE IN ANY FORM, IN WHOLE, OR IN PART, IS STRICTLY
* PROHIBITED WITHOUT THE PRIOR EXPRESS WRITTEN PERMISSION OF SILICON
* GRAPHICS, INC.
*
*/
/*
* File: gtvtx.s
* Creator: hsa@sgi.com
* Create Date: Thu Oct 12 11:05:46 PDT 1995
*
* This file processes the 'vertex list' of the object, in the TURBO 3D
* ucode.
*
*/
###########################################################################
#
# Transform, project, and viewport map the points in the points buffer.
#
# This version does 2 points at a time, 2 per vector register,
#
# WARNING: many of the constants, pipelining, etc. reflect the
# layout of the points buffer, etc. Be careful.
#
.name n, $1
.name v0, $2
.name voutp, $3
.name tmp, $4
.name i, $5
.name mtx0, $v0
.name mtx1, $v1
.name mtx2, $v2
.name mtx3, $v3
.name mtf0, $v4
.name mtf1, $v5
.name mtf2, $v6
.name mtf3, $v7
.name wscl, $v8
.name vpscale, $v9
.name vptrans, $v10
.name vin12, $v11
.name vout12i, $v12
.name vout12f, $v13
.name persp12i, $v14
.name persp12f, $v15
.name invW12i, $v16
.name invW12f, $v17
.name scrn12i, $v18
.name scrn12f, $v19
.name vin34, $v20
.name vout34i, $v21
.name vout34f, $v22
.name persp34i, $v23
.name persp34f, $v24
.name invW34i, $v25
.name invW34f, $v26
.name scrn34i, $v27
.name scrn34f, $v28
VtxProc:
lb n, RSP_STATE_VTXCOUNT(rsp_state)
lb v0, RSP_STATE_VTXV0(rsp_state)
addi in_bufp, zero, RSP_POINTS_OFFSET
# bail out if no vertices:
beq n, zero, xfm_done_done
sw return, RSP_RETURN_SAVE(zero)
addi i, n, 0 # initialize loop counter
# handle loads where v0 is not zero...
addi voutp, zero, RSP_POINTS_OFFSET
sll tmp, v0, 4 # offset = v0 * sizeof(point_buffer)
add voutp, voutp, tmp # voutp = v0*16
add in_bufp, in_bufp, tmp # in_bufp = v0*16
# load first points to transform
ldv vin12[0], 0(in_bufp)
ldv vin12[8], 16(in_bufp)
ldv vin34[0], (32 + 0)(in_bufp)
ldv vin34[8], (32 + 16)(in_bufp)
# get transformation matrix
addi tmp, zero, RSP_CURR_MPMTX_OFFSET
ldv mtx0[0], 0(tmp)
ldv mtx1[0], 8(tmp)
ldv mtx2[0], 16(tmp)
ldv mtx3[0], 24(tmp)
ldv mtf0[0], 32(tmp)
ldv mtf1[0], 40(tmp)
ldv mtf2[0], 48(tmp)
ldv mtf3[0], 56(tmp)
ldv mtx0[8], 0(tmp)
ldv mtx1[8], 8(tmp)
ldv mtx2[8], 16(tmp)
ldv mtx3[8], 24(tmp)
ldv mtf0[8], 32(tmp)
ldv mtf1[8], 40(tmp)
ldv mtf2[8], 48(tmp)
ldv mtf3[8], 56(tmp)
.name vtmp, $v29
# get OpenGL scale:
lqv vtmp[0], VOPENGL_OFFSET(zero)
# load the viewport. Remember that these guys have 1 bit of
# fraction, so we must account for that later...
addi tmp, zero, RSP_VIEWPORT_OFFSET
ldv vpscale[0], RSP_VIEWPORT_SX(tmp)
ldv vptrans[0], RSP_VIEWPORT_TX(tmp)
ldv vpscale[8], RSP_VIEWPORT_SX(tmp)
ldv vptrans[8], RSP_VIEWPORT_TX(tmp)
# get perspective normalization scale:
lsv wscl[0], RSP_STATE_PERSPNORM(rsp_state)
# correct the vpscale to match OpenGL... (bogus)
vmudh vpscale, vpscale, vtmp
.unname vtmp
xfm_loop:
# do MP matrix multiplication:
# This is clever. We multiply each ROW of the matrix
# by one of the scalar point coordinates, using the
# accumulator to sum up the matrix columns.
# This is the fastest [1x4][4x4] multiply.
vmudn vout12f, mtf0, vin12[0h]
vmadh vout12f, mtx0, vin12[0h]
vmadn vout12f, mtf1, vin12[1h]
vmadh vout12f, mtx1, vin12[1h]
vmadn vout12f, mtf2, vin12[2h]
vmadh vout12f, mtx2, vin12[2h]
vmadn vout12f, mtf3, vconst[1] # w = 1.0
vmadh vout12i, mtx3, vconst[1] # w = 1.0
vmudn vout34f, mtf0, vin34[0h]
vmadh vout34f, mtx0, vin34[0h]
vmadn vout34f, mtf1, vin34[1h]
vmadh vout34f, mtx1, vin34[1h]
vmadn vout34f, mtf2, vin34[2h]
vmadh vout34f, mtx2, vin34[2h]
vmadn vout34f, mtf3, vconst[1] # w = 1.0
vmadh vout34i, mtx3, vconst[1] # w = 1.0
addi in_bufp, in_bufp, (4*RSP_PTS_LEN) # next vtx
# scale down w:
vmudl persp12f, vout12f, wscl[0]
vmadm persp12i, vout12i, wscl[0]
vmadn persp12f, vconst, vconst[0]
vmudl persp34f, vout34f, wscl[0]
vmadm persp34i, vout34i, wscl[0]
vmadn persp34f, vconst, vconst[0]
# calculate 1/w:
vrcph invW12i[3], persp12i[3]
vrcpl invW12f[3], persp12f[3]
vrcph invW12i[3], persp12i[7]
vrcpl invW12f[7], persp12f[7]
vrcph invW12i[7], vconst[0]
vrcph invW34i[3], persp34i[3]
vrcpl invW34f[3], persp34f[3]
vrcph invW34i[3], persp34i[7]
vrcpl invW34f[7], persp34f[7]
vrcph invW34i[7], vconst[0]
vmudn invW12f, invW12f, vconst[2]
vmadh invW12i, invW12i, vconst[2]
vmadn invW12f, vconst, vconst[0]
vmudn invW34f, invW34f, vconst[2]
vmadh invW34i, invW34i, vconst[2]
vmadn invW34f, vconst, vconst[0]
# no newton's on w-divide?
# project (multiply by 1/w):
vmudl persp12f, vout12f, invW12f[3h]
vmadm persp12f, vout12i, invW12f[3h]
vmadn persp12f, vout12f, invW12i[3h]
vmadh persp12i, vout12i, invW12i[3h]
vmudl persp34f, vout34f, invW34f[3h]
vmadm persp34f, vout34i, invW34f[3h]
vmadn persp34f, vout34f, invW34i[3h]
vmadh persp34i, vout34i, invW34i[3h]
# scale down x,y to compensate for prev scaling down of w (DxF -> D)
vmudl persp12f, persp12f, wscl[0]
ldv vout12i[0], VCONST_SCREENCLAMP(zero)
vmadm persp12i, persp12i, wscl[0]
ldv vout12i[8], VCONST_SCREENCLAMP(zero)
vmadn persp12f, vconst, vconst[0]
vmudl persp34f, persp34f, wscl[0]
vmadm persp34i, persp34i, wscl[0]
vmadn persp34f, vconst, vconst[0]
# image space (viewport scale and translate)
#
# The viewport scale and translate has a built-in multiplier
# of 4.0 which converts screen coords to S11.2. (BOGUS!)
# screen translate:
vmudh scrn12f, vptrans, vconst[1] # use accumulator
# screen scale:
# (load next points while we do this)
vmadn scrn12f, persp12f, vpscale # adds to translate
ldv vin12[0], 0(in_bufp) # load first vert to transform
vmadh scrn12i, persp12i, vpscale
ldv vin12[8], RSP_PTS_LEN(in_bufp) # load 2nd vert to transform
vmadn scrn12f, vconst, vconst[0]
vmudh scrn34f, vptrans, vconst[1]
vmadn scrn34f, persp34f, vpscale
ldv vin34[0], (32+0)(in_bufp)
vmadh scrn34i, persp34i, vpscale
ldv vin34[8], (32+RSP_PTS_LEN)(in_bufp)
vmadn scrn34f, vconst, vconst[0]
# clamp to screen coordinates:
vlt scrn12i, scrn12i, vout12i[0q] # clamp xy 0x3fe, z max
addi i, i, -1
vlt scrn34i, scrn34i, vout12i[0q]
# round the screen coordinates to nearest integer pixel.
# this helps prevent cracks in the reduced-precision triangle setup.
vadd scrn12i, scrn12i, vconst[2]
vadd scrn34i, scrn34i, vconst[2]
vand scrn12i, scrn12i, vconst[6]
vand scrn34i, scrn34i, vconst[6]
# output transformed vertex information:
# (only screen points were modified)
sdv scrn12i[0], RSP_PTS_XS(voutp)
ssv scrn12f[4], (0*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
# maybe write the 2nd point:
blez i, xfm_done
addi i, i, -1
sdv scrn12i[8], (1*RSP_PTS_LEN+RSP_PTS_XS)(voutp)
ssv scrn12f[12], (1*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
# maybe write the 3rd point:
blez i, xfm_done
addi i, i, -1
sdv scrn34i[0], (2*RSP_PTS_LEN+RSP_PTS_XS)(voutp)
ssv scrn34f[4], (2*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
# maybe write the 4th point:
blez i, xfm_done
addi i, i, -1
sdv scrn34i[8], (3*RSP_PTS_LEN+RSP_PTS_XS)(voutp)
ssv scrn34f[12], (3*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
# prepare for the next pair of points:
bgtz i, xfm_loop # for (i=n_pts; i>0; i--) {
addi voutp, voutp, (4*RSP_PTS_LEN)
xfm_done:
# check state for write-back
lb i, RSP_STATE_FLAG(rsp_state) # prepare for vtx
lb n, RSP_STATE_VTXCOUNT(rsp_state)
lb v0, RSP_STATE_VTXV0(rsp_state)
andi i, i, GT_FLAG_XFM_ONLY
addi in_bufp, zero, RSP_POINTS_OFFSET
# check state, see if we need to write back transformed points
beq i, zero, xfm_done_done
# Write back the transformed points. This assumes that
# triCount was 0, and we can use the triangle pointer to point
# to a buffer to write the points.
# handle stores where v0 is not zero...
sll tmp, v0, 4 # offset = v0 * sizeof(point_buffer)
add in_bufp, in_bufp, tmp # in_bufp = v0*16
# write back transformed vertices to where gtTriN points:
add $19, zero, gfx2
jal AddrFixup
addi $20, zero, in_bufp
sll n, n, 4
addi $18, n, -1
jal DMAproc
addi $17, zero, 1
# we don't need to DMAwait here, there is one immediately
# following in the main routine that called us...
xfm_done_done:
lw return, RSP_RETURN_SAVE(zero)
jr return
nop
.unname n
.unname v0
.unname voutp
.unname tmp
.unname i
.unname mtx0
.unname mtx1
.unname mtx2
.unname mtx3
.unname mtf0
.unname mtf1
.unname mtf2
.unname mtf3
.unname wscl
.unname vpscale
.unname vptrans
.unname vin12
.unname vout12i
.unname vout12f
.unname persp12i
.unname persp12f
.unname invW12i
.unname invW12f
.unname scrn12i
.unname scrn12f
.unname vin34
.unname vout34i
.unname vout34f
.unname persp34i
.unname persp34f
.unname invW34i
.unname invW34f
.unname scrn34i
.unname scrn34f
#
#
#
############################################################################