gtvtx.s 10.1 KB

/*
 * Copyright 1995, Silicon Graphics, Inc.
 * ALL RIGHTS RESERVED
 *
 * UNPUBLISHED -- Rights reserved under the copyright laws of the United
 * States.   Use of a copyright notice is precautionary only and does not
 * imply publication or disclosure.
 *
 * U.S. GOVERNMENT RESTRICTED RIGHTS LEGEND:	
 * Use, duplication or disclosure by the Government is subject to restrictions
 * as set forth in FAR 52.227.19(c)(2) or subparagraph (c)(1)(ii) of the Rights
 * in Technical Data and Computer Software clause at DFARS 252.227-7013 and/or
 * in similar or successor clauses in the FAR, or the DOD or NASA FAR
 * Supplement.  Contractor/manufacturer is Silicon Graphics, Inc.,
 * 2011 N. Shoreline Blvd. Mountain View, CA 94039-7311.
 *
 * THE CONTENT OF THIS WORK CONTAINS CONFIDENTIAL AND PROPRIETARY
 * INFORMATION OF SILICON GRAPHICS, INC. ANY DUPLICATION, MODIFICATION,
 * DISTRIBUTION, OR DISCLOSURE IN ANY FORM, IN WHOLE, OR IN PART, IS STRICTLY
 * PROHIBITED WITHOUT THE PRIOR EXPRESS WRITTEN PERMISSION OF SILICON
 * GRAPHICS, INC.
 *
 */
	
/*
 * File:		gtvtx.s
 * Creator:		hsa@sgi.com
 * Create Date:		Thu Oct 12 11:05:46 PDT 1995
 *
 * This file processes the 'vertex list' of the object, in the TURBO 3D
 * ucode.
 *
 */

 ###########################################################################
 #
 # Transform, project, and viewport map the points in the points buffer.
 #
 # This version does 2 points at a time, 2 per vector register,
 #
 # WARNING: many of the constants, pipelining, etc. reflect the
 # layout of the points buffer, etc. Be careful.
 #

.name n,	$1
.name v0,	$2
.name voutp,	$3
.name tmp,	$4
.name i,	$5
	
.name mtx0,	$v0
.name mtx1,	$v1
.name mtx2,	$v2
.name mtx3,	$v3
.name mtf0,	$v4
.name mtf1,	$v5
.name mtf2,	$v6
.name mtf3,	$v7
.name wscl,	$v8
.name vpscale,	$v9
.name vptrans,	$v10
	
.name vin12,	$v11
.name vout12i,	$v12
.name vout12f,	$v13
.name persp12i,	$v14
.name persp12f,	$v15
.name invW12i,	$v16
.name invW12f,	$v17
.name scrn12i,	$v18
.name scrn12f,	$v19
	
.name vin34,	$v20
.name vout34i,	$v21
.name vout34f,	$v22
.name persp34i,	$v23
.name persp34f,	$v24
.name invW34i,	$v25
.name invW34f,	$v26
.name scrn34i,	$v27
.name scrn34f,	$v28
	
VtxProc:
		lb	n, RSP_STATE_VTXCOUNT(rsp_state)
		lb	v0, RSP_STATE_VTXV0(rsp_state)
 		addi	in_bufp, zero, RSP_POINTS_OFFSET
	
	# bail out if no vertices:
		beq	n, zero, xfm_done_done
		sw	return, RSP_RETURN_SAVE(zero)

		addi	i, n, 0			# initialize loop counter
	
	# handle loads where v0 is not zero...
		addi	voutp, zero, RSP_POINTS_OFFSET
		sll	tmp, v0, 4	# offset = v0 *	sizeof(point_buffer)
		add	voutp, voutp, tmp	# voutp = v0*16
		add	in_bufp, in_bufp, tmp	# in_bufp = v0*16

	# load first points to transform
		ldv	vin12[0],  0(in_bufp)
		ldv	vin12[8], 16(in_bufp)
		ldv	vin34[0], (32 +  0)(in_bufp)
		ldv	vin34[8], (32 + 16)(in_bufp)
	
	# get transformation matrix
                addi    tmp, zero, RSP_CURR_MPMTX_OFFSET
                ldv     mtx0[0],  0(tmp)
                ldv     mtx1[0],  8(tmp)
                ldv     mtx2[0], 16(tmp)
                ldv     mtx3[0], 24(tmp)
                ldv     mtf0[0], 32(tmp)
                ldv     mtf1[0], 40(tmp)
                ldv     mtf2[0], 48(tmp)
                ldv     mtf3[0], 56(tmp)
                ldv     mtx0[8],  0(tmp)
                ldv     mtx1[8],  8(tmp)
                ldv     mtx2[8], 16(tmp)
                ldv     mtx3[8], 24(tmp)
                ldv     mtf0[8], 32(tmp)
                ldv     mtf1[8], 40(tmp)
                ldv     mtf2[8], 48(tmp)
                ldv     mtf3[8], 56(tmp)
	
.name vtmp,	$v29
	# get OpenGL scale:
		lqv     vtmp[0], VOPENGL_OFFSET(zero)

	# load the viewport. Remember that these guys have 1 bit of
	# fraction, so we must account for that later...
		addi    tmp, zero, RSP_VIEWPORT_OFFSET
		ldv     vpscale[0], RSP_VIEWPORT_SX(tmp)
		ldv     vptrans[0], RSP_VIEWPORT_TX(tmp)
		ldv     vpscale[8], RSP_VIEWPORT_SX(tmp)
		ldv     vptrans[8], RSP_VIEWPORT_TX(tmp)
	
	# get perspective normalization scale:
		lsv     wscl[0], RSP_STATE_PERSPNORM(rsp_state)
	
	# correct the vpscale to match OpenGL... (bogus)
		vmudh   vpscale, vpscale, vtmp
.unname vtmp
	
xfm_loop:
 	# do MP matrix multiplication:	
	# This is clever. We multiply each ROW of the matrix
	# by one of the scalar point coordinates, using the
	# accumulator to sum up the matrix columns.
	# This is the fastest [1x4][4x4] multiply.
		vmudn	vout12f, mtf0, vin12[0h]
		vmadh	vout12f, mtx0, vin12[0h]
		vmadn	vout12f, mtf1, vin12[1h]
		vmadh	vout12f, mtx1, vin12[1h]
		vmadn	vout12f, mtf2, vin12[2h]
		vmadh	vout12f, mtx2, vin12[2h]
		vmadn	vout12f, mtf3, vconst[1]	# w = 1.0
		vmadh	vout12i, mtx3, vconst[1]	# w = 1.0
	
		vmudn	vout34f, mtf0, vin34[0h]
		vmadh	vout34f, mtx0, vin34[0h]
		vmadn	vout34f, mtf1, vin34[1h]
		vmadh	vout34f, mtx1, vin34[1h]
		vmadn	vout34f, mtf2, vin34[2h]
		vmadh	vout34f, mtx2, vin34[2h]
		vmadn	vout34f, mtf3, vconst[1]	# w = 1.0
		vmadh	vout34i, mtx3, vconst[1]	# w = 1.0
	
	addi	in_bufp, in_bufp, (4*RSP_PTS_LEN)	# next vtx

        # scale down w:
		vmudl   persp12f, vout12f, wscl[0]
                vmadm   persp12i, vout12i, wscl[0]
                vmadn   persp12f, vconst, vconst[0]
	
		vmudl   persp34f, vout34f, wscl[0]
                vmadm   persp34i, vout34i, wscl[0]
                vmadn   persp34f, vconst, vconst[0]

	# calculate 1/w:
		vrcph	invW12i[3], persp12i[3]
		vrcpl	invW12f[3], persp12f[3]
		vrcph	invW12i[3], persp12i[7]
		vrcpl	invW12f[7], persp12f[7]
		vrcph	invW12i[7], vconst[0]
	
		vrcph	invW34i[3], persp34i[3]
		vrcpl	invW34f[3], persp34f[3]
		vrcph	invW34i[3], persp34i[7]
		vrcpl	invW34f[7], persp34f[7]
		vrcph	invW34i[7], vconst[0]
	
		vmudn	invW12f, invW12f, vconst[2]
		vmadh	invW12i, invW12i, vconst[2]
		vmadn	invW12f, vconst, vconst[0]

		vmudn	invW34f, invW34f, vconst[2]
		vmadh	invW34i, invW34i, vconst[2]
		vmadn	invW34f, vconst, vconst[0]

	# no newton's on w-divide?

	# project (multiply by 1/w):
		vmudl	persp12f, vout12f, invW12f[3h]
		vmadm	persp12f, vout12i, invW12f[3h]
		vmadn	persp12f, vout12f, invW12i[3h]
		vmadh	persp12i, vout12i, invW12i[3h]
	
		vmudl	persp34f, vout34f, invW34f[3h]
		vmadm	persp34f, vout34i, invW34f[3h]
		vmadn	persp34f, vout34f, invW34i[3h]
		vmadh	persp34i, vout34i, invW34i[3h]
	
        # scale down x,y to compensate for prev scaling down of w (DxF -> D)
		vmudl   persp12f, persp12f, wscl[0]
	ldv	vout12i[0], VCONST_SCREENCLAMP(zero)
                vmadm   persp12i, persp12i, wscl[0]
	ldv	vout12i[8], VCONST_SCREENCLAMP(zero)
                vmadn   persp12f, vconst, vconst[0]
	
		vmudl   persp34f, persp34f, wscl[0]
                vmadm   persp34i, persp34i, wscl[0]
                vmadn   persp34f, vconst, vconst[0]

	# image space (viewport scale and translate)
	#
 	# The viewport scale and translate has a built-in multiplier
	# of 4.0 which converts screen coords to S11.2. (BOGUS!)

	# screen translate:
		vmudh	scrn12f, vptrans, vconst[1]	# use accumulator
	
	# screen scale:
	# (load next points while we do this)
		vmadn	scrn12f, persp12f, vpscale	# adds to translate
	ldv	vin12[0], 0(in_bufp)		# load first vert to transform
		vmadh	scrn12i, persp12i, vpscale
	ldv	vin12[8], RSP_PTS_LEN(in_bufp)	# load 2nd vert to transform
		vmadn	scrn12f, vconst, vconst[0]

		vmudh	scrn34f, vptrans, vconst[1]
		vmadn	scrn34f, persp34f, vpscale
	ldv	vin34[0], (32+0)(in_bufp)
		vmadh	scrn34i, persp34i, vpscale
	ldv	vin34[8], (32+RSP_PTS_LEN)(in_bufp)
		vmadn	scrn34f, vconst, vconst[0]
	
	# clamp to screen coordinates:
		vlt	scrn12i, scrn12i, vout12i[0q]	# clamp xy 0x3fe, z max
	addi	i, i, -1

		vlt	scrn34i, scrn34i, vout12i[0q]
	
	# round the screen coordinates to nearest integer pixel.
	# this helps prevent cracks in the reduced-precision triangle setup.
		vadd	scrn12i, scrn12i, vconst[2]
		vadd	scrn34i, scrn34i, vconst[2]
		vand	scrn12i, scrn12i, vconst[6]
		vand	scrn34i, scrn34i, vconst[6]

	# output transformed vertex information:
	# (only screen points were modified)
 		sdv	scrn12i[0], RSP_PTS_XS(voutp)
 		ssv	scrn12f[4], (0*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
	
	# maybe write the 2nd point:
		blez	i, xfm_done
		addi	i, i, -1
 		sdv	scrn12i[8], (1*RSP_PTS_LEN+RSP_PTS_XS)(voutp)
 		ssv	scrn12f[12], (1*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
			
	# maybe write the 3rd point:
		blez	i, xfm_done
		addi	i, i, -1
 		sdv	scrn34i[0], (2*RSP_PTS_LEN+RSP_PTS_XS)(voutp)
 		ssv	scrn34f[4], (2*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
			
	# maybe write the 4th point:
		blez	i, xfm_done
		addi	i, i, -1
 		sdv	scrn34i[8], (3*RSP_PTS_LEN+RSP_PTS_XS)(voutp)
 		ssv	scrn34f[12], (3*RSP_PTS_LEN+RSP_PTS_ZSF)(voutp)
			
	# prepare for the next pair of points:
		bgtz	i, xfm_loop		# for (i=n_pts; i>0; i--) {
		addi	voutp, voutp, (4*RSP_PTS_LEN)
	
xfm_done:
	# check state for write-back
		lb	i, RSP_STATE_FLAG(rsp_state)	# prepare for vtx
		lb	n, RSP_STATE_VTXCOUNT(rsp_state)
		lb	v0, RSP_STATE_VTXV0(rsp_state)
		andi	i, i, GT_FLAG_XFM_ONLY
 		addi	in_bufp, zero, RSP_POINTS_OFFSET

	# check state, see if we need to write back transformed points
		beq	i, zero, xfm_done_done

	# Write back the transformed points. This assumes that
	# triCount was 0, and we can use the triangle pointer to point
	# to a buffer to write the points.

	# handle stores where v0 is not zero...
		sll	tmp, v0, 4	# offset = v0 *	sizeof(point_buffer)
		add	in_bufp, in_bufp, tmp	# in_bufp = v0*16

	# write back transformed vertices to where gtTriN points:
		add	$19, zero, gfx2
		jal	AddrFixup
		addi	$20, zero, in_bufp
		sll	n, n, 4
 		addi	$18, n, -1
		jal	DMAproc
		addi	$17, zero, 1

	# we don't need to DMAwait here, there is one immediately
	# following in the main routine that called us...

xfm_done_done:
		lw	return, RSP_RETURN_SAVE(zero)
		jr	return
		nop	
	
.unname n
.unname v0
.unname voutp
.unname tmp
.unname i
	
.unname mtx0
.unname mtx1
.unname mtx2
.unname mtx3
.unname mtf0
.unname mtf1
.unname mtf2
.unname mtf3
.unname wscl
.unname vpscale
.unname vptrans
	
.unname vin12
.unname vout12i
.unname vout12f
.unname persp12i
.unname persp12f
.unname invW12i
.unname invW12f
.unname scrn12i
.unname scrn12f

.unname vin34
.unname vout34i
.unname vout34f
.unname persp34i
.unname persp34f
.unname invW34i
.unname invW34f
.unname scrn34i
.unname scrn34f
	
 #
 #
 #
 ############################################################################