glxsetup.s 21.4 KB


	/****** NOTE:
	 ******
	 ******	This code is the optimized version for HARDWARE 2!
	 ******
	 ****** It won't run (100%) on hardware 1.
	 ******
	 ******/
	
 ##########################################################################
 #
 # Triangle Setup Routine.
 # When entering this code we have a points buffer full of points,
 # and registers r1, r2, r3 point to the three vertices of a triangle.
 #
 ##########################################################################
	
#ifdef SETUP_ALONE
#include <rsp.h>
#include "mbi.h"

		.text	beginSetup
	
#include "gdmem.h"
#include "gfx_regs.h"
#endif	

 # ########################### CLIP TEST #################################
.name   minp,           $1
.name   midp,           $2
.name   maxp,           $3
.name   tmp,            $8
.name   tmp2,           $9
.name   ccor,           $11     # OR of all points' clip codes
.name   ccand,          $12     # AND of all points' clip codes
.name	rendState,	$13

                .ent    clipAndSetup
clipAndSetup:
 # ########################### CLIP TEST #################################
#ifdef	CLIPSWITCH
	lb	tmp2, (RSP_STATE_RENDER+1)(rsp_state)
#endif	
        lh      ccor, (RSP_PTS_CC)(maxp)	# or Clip Codes together &
        lh      tmp, (RSP_PTS_CC)(midp)	# and Clip Codes together
#ifdef	CLIPSWITCH
	bgez	tmp2, beginSetup
#endif
        lh      tmp2, (RSP_PTS_CC)(minp)	#
	and	ccand, ccor, tmp		#
	or      ccor, ccor, tmp                 #
	and	ccand, ccand, tmp2		#
#ifdef NEAR_CLIP_OFF
	andi	ccand, ccand, 0x7030		# only see reject + xyz, - xy
#else /* NEAR_CLIP_OFF */
	andi	ccand, ccand, 0x7070		# only see reject +/- xyz
#endif /* NEAR_CLIP_OFF */
#ifdef	RETGFX1
	/* return_save => gfx1 の変更に伴う処理 */
	bne	ccand, zero, SetupReject
#else
	bne	ccand, zero, GfxDone		# Trivial rejection ?
#endif
	or      ccor, ccor, tmp2                #
 ### BRANCH OCCURS TO GfxDone: IF TRIVIALLY REJECTED

 	andi	ccor, ccor, 0x4343		# only see clip/accept +/- xyz
	bne     ccor, zero, startClip           # if ccor is 0, no clipping

 ### JUMP OCCURS to doClip or startClip: IF clipping is neccessary
 ### NOTE: delay slot is first instruction of beginSetup:
        
                .end    clipAndSetup

.unname ccor
.unname ccand
.unname minp
.unname midp
.unname maxp
.unname tmp
.unname tmp2

 # ########################### END CLIP TEST #############################

#define	LOWX	0	/* used to index elements of edge vectors */
#define	LOWY	1
#define MIDX	2
#define MIDY	3
#define HIGHX	4
#define HIGHY	5

/* scalar registers: */
.name	minp,		$1
.name	midp,		$2
.name	maxp,		$3
.name	miny, 		$9

.name	tmp,		$7
.name	flatp,		$4
.name	rdp_cmd,	$5
.name	rdp_flg,	$6
.name	dscratchp,	$8
.name	midy, 		$10
.name	maxy, 		$11
.name	negR, 		$12

/* these are "global", used for both edge and attribute setup */
.name	DxXDyi,		$v0
.name	DxXDyf,		$v1
.name	yf,		$v2
.name	xHighf,		$v3
.name	EDel,		$v4
.name	invri,		$v27
.name	invrf,		$v26

/* these registers are dynamic, allocated and released as they are used */
.name	ri,		$v29
.name	rf,		$v28
.name	Hd,		$v9
.name	Md,		$v10
.name	td,		$v12
.name	vmin, 		$v13
.name	vmid, 		$v14	
.name	vmax, 		$v15	
	
.name	frontrej, $14
.name	backrej,  $15
.name	bsignr,   $17
	
.name	jnk,	$v16
	
.name	allWi,	$v5
.name	allWf,	$v6
.name   wscl,   $v21
	
.name	nearWi,	$v19
.name	nearWf,	$v20
	

		.ent	beginSetup

beginSetup:
	#  
	#  vmin[0|1] = [ XS0 | YS0 ]   小
	#  vmid[0|1] = [ XS1 | YS1 ]   中
	#  vmax[0|1] = [ XS2 | YS2 ]   大
	#  rendState = STATE_RENDER
	#  
	#  スクリーン座標系の XS,YS をレジスタに代入する
	#  RENDER モードフラグを取得する
	#
	llv	vmin[0], RSP_PTS_XS(minp)
	llv	vmid[0], RSP_PTS_XS(midp)	# element 1 is y
	llv	vmax[0], RSP_PTS_XS(maxp)	# element 0 is x,
	addi	dscratchp, zero, RSP_SETUP_TMP_OFFSET
	lw	rendState, RSP_STATE_RENDER(rsp_state)
	lsv     wscl[0], RSP_STATE_PERSPNORM(rsp_state)
#ifdef	NEWCOM
	lsv	allWi[0], RSP_PTS_INVW_INT (minp)	
			vsub	Md, vmid, vmin
	lsv	allWf[0], RSP_PTS_INVW_FRAC(minp)
			vsub	Hd, vmax, vmin
	lsv	allWi[2], RSP_PTS_INVW_INT (midp)
			vsub	td, vmin, vmid
	lsv	allWf[2], RSP_PTS_INVW_FRAC(midp)
	lsv	allWi[4], RSP_PTS_INVW_INT (maxp)
	lsv	allWf[4], RSP_PTS_INVW_FRAC(maxp)
#else	
	lsv	allWi[0], RSP_PTS_W_INT (minp)	
			vsub	Md, vmid, vmin
	lsv	allWf[0], RSP_PTS_W_FRAC(minp)
			vsub	Hd, vmax, vmin
	lsv	allWi[2], RSP_PTS_W_INT (midp)
			vsub	td, vmin, vmid
	lsv	allWf[2], RSP_PTS_W_FRAC(midp)
	lsv	allWi[4], RSP_PTS_W_INT (maxp)
	lsv	allWf[4], RSP_PTS_W_FRAC(maxp)
#endif
	# compute the partial products...
	# careful with the math here...
			vmudh	jnk, Hd, Md[1]
	lh	miny, RSP_PTS_YS(minp)		# get the y's (BEGIN SETUP)
			vmadh	jnk, td, Hd[1]
	lh	midy, RSP_PTS_YS(midp)
			vsar	rf, rf, rf[1]
	lh	maxy, RSP_PTS_YS(maxp)
			vsar	ri, ri, ri[0]
	#
	# Y-SORT をテーブル引きで行なう & できるだけ早く CULL を判定する
	# 全体のバランスを考えて CULL_BACK の方を優先し, 先に処理する.
	#
	sll	backrej, rendState, 18	# G_CULL_BACK フラグを最上位に
#ifdef	NEWCOM
	mfc2	bsignr, jnk[0]
	sh	minp, RSP_SETUP_TMP_OFFSET+0($0)
#else
		                vmudl   allWf, allWf, wscl[0]
	mfc2	bsignr, jnk[0]
		                vmadm   allWi, allWi, wscl[0]
	sh	minp, RSP_SETUP_TMP_OFFSET+0($0)
		                vmadn   allWf, vconst, vconst[0]
#endif
	sh	midp, RSP_SETUP_TMP_OFFSET+2($0)

	and	backrej, bsignr, backrej
	bltz	backrej, BackReject

	sh	maxp, RSP_SETUP_TMP_OFFSET+4($0)
	#
	# tmp = [ maxy<midy | midy<miny | miny<maxy ]
	#
	#  0:	maxy>=midy,midy>=miny,miny>=maxy    miny=midy=maxy
	#  1:	maxy>=midy,midy>=miny,miny< maxy    miny<midy<maxy
	#  2:	maxy>=midy,midy< miny,miny>=maxy    midy<maxy<miny
	#  3:	maxy>=midy,midy< miny,miny< maxy    midy<miny<maxy
	#  4:	maxy< midy,midy>=miny,miny>=maxy    maxy<miny<midy
	#  5:	maxy< midy,midy>=miny,miny< maxy    miny<maxy<midy
	#  6:	maxy< midy,midy< miny,miny>=maxy    maxy<midy<miny
	#  7:	maxy< midy,midy< miny,miny< maxy    --------------
	#
			vsubc	wscl,   allWf, allWf[1]
	slt	tmp,  maxy, midy
#ifdef	NEWCOM
			vge	nearWi, allWi, allWi[1]
#else
			vlt	nearWi, allWi, allWi[1]
#endif
	slt	negR, midy, miny
			vmrg	nearWf, allWf, allWf[1]
	add	tmp,  tmp,  tmp
	add	tmp,  tmp,  negR
	slt	negR, miny, maxy
	add	tmp,  tmp,  tmp
	add	tmp,  tmp,  negR
			vsubc	wscl,   nearWf, allWf[2]
	lbu	maxp, RSP_YSORT_MAX(tmp)
#ifdef	NEWCOM
			vge	nearWi, nearWi, allWi[2]
#else
			vlt	nearWi, nearWi, allWi[2]
#endif
	lbu	midp, RSP_YSORT_MID(tmp)	
			vmrg	nearWf, nearWf, allWf[2]	
	lbu	minp, RSP_YSORT_MIN(tmp)	
	lh	maxp, RSP_SETUP_TMP_OFFSET(maxp)
	lh	midp, RSP_SETUP_TMP_OFFSET(midp)
	lh	minp, RSP_SETUP_TMP_OFFSET(minp)
	lbu	negR, RSP_YSORT_NEG(tmp)
#ifdef	NEWCOM
			vrcph	jnk[0],    nearWi[0]
	llv	vmax[0], RSP_PTS_XS(maxp)	# element 0 is x,
			vrcpl	nearWf[0], nearWf[0]
	llv	vmid[0], RSP_PTS_XS(midp)	# element 1 is y
			vrcph	nearWi[0], vconst[0]
#else
			vmudl	nearWf, nearWf, vconst1[5]
	llv	vmax[0], RSP_PTS_XS(maxp)	# element 0 is x,
			vmadm	nearWi, nearWi, vconst1[5]
	llv	vmid[0], RSP_PTS_XS(midp)	# element 1 is y
			vmadn	nearWf, vconst, vconst[0]		
#endif
	blez	negR, posiR
	llv	vmin[0], RSP_PTS_XS(minp)
		vsubc	rf, vconst, rf		# negate R
		vsub	ri, vconst, ri
	posiR:
	beq	bsignr, $0, SetupReject
		vsub	EDel, vmax, vmid	# delay slot, low deltas
	sll	frontrej, rendState, 19
		vsub	Md,   vmid, vmin
	nor	frontrej, frontrej, $0
		vsub	Hd,   vmax, vmin
	or	frontrej, bsignr, frontrej
		vmov	ri[3], ri[0]
#ifdef	DMANOWAIT
 #-YASU
 #  
 #  FIFO バッファへの DMA 終了確認及び CMD_END の更新
 #  
 #-YASU
checkFIFO:
	mfc0	tmp, DMA_BUSY
		vmov	rf[3], rf[0]
	bgez	frontrej, BackReject
		vmov	EDel[MIDX],  Md[0]
	bne	tmp, zero, checkFIFO
	lw	negR, RSP_STATE_FIFO_OUTP(rsp_state)
	mtc0	negR, CMD_END
#else
		vmov	rf[3], rf[0]
	bgez	frontrej, BackReject
		vmov	EDel[MIDX],  Md[0]
#ifdef	OUTPUT_DUMP
	jal	OutputOpen
	addi	$18, zero, 176-RSP_OUTPUT_END
#endif
#endif
	ssv	vmax[2], 2(outp)
		vmov	EDel[MIDY],  Md[1]
	ssv	vmid[2], 4(outp)
		vmov	EDel[HIGHX], Hd[0]
	ssv	vmin[2], 6(outp)
		vmov	EDel[HIGHY], Hd[1]

	#
	# 1/r の計算
	#
	jal	NewtonDiv
	mfc2	rdp_flg, ri[5]	# tmp の bit7 には ri の符号が入る
	
.unname jnk
.unname	td
.unname	vmin
.unname	vmid
.unname	vmax
	
.unname	negR
.unname	frontrej
.unname	backrej
	
.unname	Hd
.unname	Md
	
.unname ri
.unname rf
			
.name	toutp,	$16

.name	invEDeli,	$v7
.name	invEDelf,	$v8
.name	EDeli,		$v10
.name	EDelf,		$v9
.name	invW1f,	$v11
.name	invW1i,	$v12
.name 	xi,	$v13
.name 	xf,	$v14

.name	vtmpi,	$v15
.name	vtmpf,	$v16
	
.name	ptTX2i,	$v22	# these registers hold S, T, 1/W
.name	ptTX2f,	$v29	# for each vertex.
.name	invW2f,	$v24
.name	invW2i,	$v25
	
	# メジャーの判定
	lb	rdp_cmd, RSP_STATE_TRI(rsp_state)

	# Ldx/Ldy, Mdx/Mdy, Hdx/Hdy:
			vmudm	EDeli, EDel,   vconst[4]
	lsv	xi[(LOWX*2)],  RSP_PTS_XS(midp)	
			vmadn	EDelf, vconst, vconst[0]	
	lsv	xi[(MIDX*2)],  RSP_PTS_XS(minp) # same as high
			vrcp	invEDelf[LOWY], EDel[LOWY]
	lsv	xi[(HIGHX*2)], RSP_PTS_XS(minp)
			vrcph	invEDeli[LOWY], vconst[0]
	ori	rdp_cmd, rdp_cmd, G_TRI_FILL
	
	# stick in tile number
	lb	tmp, RSP_STATE_TEX_TILE(rsp_state)
			vrcp	invEDelf[MIDY], EDel[MIDY]	# 1.0/Mdy
	ssv	nearWi[0], 68(dscratchp)
			vrcph	invEDeli[MIDY], vconst[0]
	lsv	invW1f[0], RSP_PTS_INVW_FRAC(minp)
			vrcp	invEDelf[HIGHY], EDel[HIGHY]	# 1.0/Hdy
	lsv	invW1f[8], RSP_PTS_INVW_FRAC(midp)
			vrcph	invEDeli[HIGHY], vconst[0]
	
	#
	# We used to shift down the rcp results all the way,
	# then do the multiply. If we don't shift it down all the
	# way, do the mult, then shift some more, we get better
	# precision on the degenerate cases.
	#
	lsv	invW1i[0], RSP_PTS_INVW_INT(minp)
			vmudm	xi, xi, vconst[4]
	lsv	invW1i[8], RSP_PTS_INVW_INT(midp)
			vmadn	xf, vconst, vconst[0]
	andi	rdp_flg, rdp_flg, 0x80	
	or	rdp_flg, rdp_flg, tmp	# 3 cycles after load
	 		vmudl	invEDelf, invEDelf, vconst1[4]	# make S15.16
	sb	rdp_cmd, 0(outp)	# output rdp command
	 		vmadm	invEDeli, invEDeli, vconst1[4]
	sb	rdp_flg, 1(outp)	# output poly flag
	  		vmadn	invEDelf, vconst, vconst[0]
	ssv	xi[(LOWX*2)],   8(outp)	# output xLow
			vmudl	allWf, invW1f, nearWf[0]
	ssv	xf[(LOWX*2)],  10(outp)
			vmadm	allWf, invW1i, nearWf[0]
	ssv	xi[(HIGHX*2)], 16(outp)	# output xHigh
			vmadn	allWf, invW1f, nearWi[0]
	ssv	xf[(HIGHX*2)], 18(outp)
			vmadh	allWi, invW1i, nearWi[0]
.unname	doreject
.unname	bsignr	
	ssv	nearWf[0], 76(dscratchp)
			vmudl	DxXDyf, invEDelf, EDelf[0q]	# Ldx / Ldy
	ssv	xi[(MIDX*2)],  24(outp)	# output xMid
			vmadm	DxXDyf, invEDeli, EDelf[0q]	# Mdx / Mdy
	ssv	xf[(MIDX*2)],  26(outp)
			vmadn	DxXDyf, invEDelf, EDeli[0q]	# Hdx / Hdy
.unname	EDelf
.unname xi
.unname xf
.name	ptTX1i,	$v9	# these registers hold S, T, 1/W
	llv	ptTX1i[0], RSP_PTS_S(minp)
			vmadh	DxXDyi, invEDeli, EDeli[0q]
	llv	ptTX1i[8], RSP_PTS_S(midp)
.unname	EDeli
.name	ptTX1f,	$v10	# for each vertex.
	 		vmudl	invEDelf, invEDelf, vconst[4]
	lsv	ptTX1i[ 4], VCONST1_OFFSET($0)	# = vmov ptTX1i[2],vconst1[0]
			vmadm	invEDeli, invEDeli, vconst[4]
	lsv	ptTX1i[12], VCONST1_OFFSET($0)	# = vmov ptTX1i[6],vconst1[0]
			vmadn	invEDelf, vconst, vconst[0]
	addi	return, gfx1, 0			# OutputClose 用リターンの設定
	 		vmudl	DxXDyf, DxXDyf, vconst[4]
			vmadm	DxXDyi, DxXDyi, vconst[4]
	lsv	invW2f[0], RSP_PTS_INVW_FRAC(maxp)
			vmadn	DxXDyf, vconst, vconst[0]
	lsv	invW2i[0], RSP_PTS_INVW_INT(maxp)
			vmudm	vtmpf,  ptTX1i, allWf[0h]
	llv	ptTX2i[0], RSP_PTS_S(maxp)
			vmadh	ptTX1i, ptTX1i, allWi[0h]
	lsv	ptTX2i[ 4], VCONST1_OFFSET($0)	# = vmov ptTX2i[2],vconst1[0]
			vmadn	ptTX1f, vconst, vconst[0]

	addi	outp, outp, 32	# increment output pointer
			vcr	DxXDyi, DxXDyi, vconst1[6]
	ssv	DxXDyf[(LOWY*2)],  14-32(outp)
			vmudh	EDel, EDel, vconst[5]
	ssv	DxXDyf[(HIGHY*2)], 22-32(outp)
			vmudl	vtmpf, invW2f, nearWf[0]

.unname miny
.unname midy
.unname maxy
	
.name	stmaxi,	$v17
.name	stmaxf,	$v18
	
	ssv	DxXDyf[(MIDY*2)], 30-32(outp)
			vmadm	nearWf, invW2i, nearWf[0]
 	sdv	ptTX1i[8],  32(dscratchp)
			vmadn	nearWf, invW2f, nearWi[0]
	ssv	DxXDyi[(LOWY*2)],  12-32(outp)
			vmadh	nearWi, invW2i, nearWi[0]
	ssv	DxXDyi[(HIGHY*2)], 20-32(outp)
			vabs	allWi,  ptTX1i, ptTX1i
	ssv	DxXDyi[(MIDY*2)],  28-32(outp)
			vxor	allWf,	vconst, vconst
.unname xHighi
.unname vtmp

.unname	invW1f
.unname	invW1i
.unname	invW2f
.unname	invW2i
.unname wscl
	
	andi	tmp, rdp_cmd, (G_RDP_TRI_ZBUFF_MASK|G_RDP_TRI_TXTR_MASK|G_RDP_TRI_SHADE_MASK)
			vmudm	ptTX2f, ptTX2i, nearWf[0]  # Delay slot
	sdv	ptTX1i[0],  16(dscratchp)
			vmadh	ptTX2i, ptTX2i, nearWi[0]
	sdv	ptTX1f[8],  40(dscratchp)
			vmadn	ptTX2f, vconst, vconst[0]
	llv	nearWf[0],  40(dscratchp)
			# delay-V
#ifdef	OUTPUT_DUMP
	blez	tmp, SetupReject
#else
	blez	tmp, OutputClose
#endif
			vmov	nearWi[0], allWi[4]
 	sdv	ptTX2i[0],  48(dscratchp)
			vmov	nearWi[1], allWi[5]
 	sdv	ptTX1f[0],  24(dscratchp)
			vabs	ptTX2i, ptTX2i, ptTX2i
 	sdv	ptTX2f[0],  56(dscratchp)

.unname	toutp	
.unname	vtmpi
.unname	vtmpf
.name	aminf,	$v16
.name	amidf,	$v24
.name	amaxf,	$v28
.name	amin,	$v15
.name	amid,	$v23
.name	amax,	$v25

	 		vadd	aminf, allWf, vconst1[5]
	andi	tmp, rendState, G_SHADING_SMOOTH
	 		vadd	amidf, allWf, vconst1[5]
	bne	tmp, zero, smoothShade
	 		vadd	amaxf, allWf, vconst1[5]
	
	luv	amin[0], RSP_PTS_R_NX(flatp)
			vge	stmaxi, allWi , ptTX2i
	luv	amid[0], RSP_PTS_R_NX(flatp)
			vmrg	stmaxf, ptTX1f, ptTX2f
	j	flatShade
	luv	amax[0], RSP_PTS_R_NX(flatp)
smoothShade:
	luv	amin[0], RSP_PTS_R_NX(minp)
			vge	stmaxi, allWi , ptTX2i
	luv	amid[0], RSP_PTS_R_NX(midp)
			vmrg	stmaxf, ptTX1f, ptTX2f
	luv	amax[0], RSP_PTS_R_NX(maxp)
flatShade:	
.unname	ptTX1i
.unname	ptTX1f
.unname	ptTX2i
.unname	ptTX2f
.unname	allWi
.unname	allWf
.name	vjunk,	$v5
.name	Hdai,	$v9
.name	Hdaf,	$v10
.name	Mdai,	$v11
.name	Mdaf,	$v12
.name	adei,	$v13
.name	adef,	$v14
.name	tMdai, 	$v21
.name	tMdaf, 	$v22
			# delay-V
	ldv	aminf[8], (16 +  8)(dscratchp)
			# delay-V
	lsv	aminf[14], RSP_PTS_ZSF(minp)
			vmudm	amin, amin, vconst[7]
	ldv	amidf[8], (16 + 24)(dscratchp)
			vmudm	amid, amid, vconst[7]
	lsv	amidf[14], RSP_PTS_ZSF(midp)
			vmudm	amax, amax, vconst[7]	# multiply by 512
	ldv	amin[8],  (16 +  0)(dscratchp)
			vge	stmaxi, stmaxi, nearWi
	ldv	amid[8],  (16 + 16)(dscratchp)
			vmrg	stmaxf, stmaxf, nearWf

	lsv	amin[14], RSP_PTS_ZS(minp)
			# delay-V
	lsv	amid[14], RSP_PTS_ZS(midp)
			# delay-V
	ldv	amaxf[8], (16 + 40)(dscratchp)
			# delay-V
	lsv	amaxf[14], RSP_PTS_ZSF(maxp)
			# delay-V
	ldv	amax[8],  (16 + 32)(dscratchp)
			vsubc	Mdaf,  amidf, aminf			
	lsv	amax[14], RSP_PTS_ZS(maxp)
	 		vsub	Mdai,  amid,  amin
	slv	stmaxi[0], 64(dscratchp)
			vsubc	tMdaf, aminf, amidf
	slv	stmaxf[0], 72(dscratchp)
			vsub	tMdai, amin,  amid
.unname	stmaxi
.unname	stmaxf
.unname	nearWi
.unname	nearWf
.name	tHdai, 	$v19
.name	tHdaf, 	$v20
	sdv	amin [0],  0(outp)	# 0
			vsubc	tHdaf, aminf, amaxf
	sdv	aminf[0], 16(outp)	# 16
			vsub	tHdai, amin,  amax
	andi	tmp, rdp_cmd, G_RDP_TRI_SHADE_MASK
			vsubc	Hdaf,  amaxf, aminf
	andi	rdp_flg, rdp_cmd, G_RDP_TRI_ZBUFF_MASK	# delay
			vsub	Hdai,  amax,  amin

			vmudn	vjunk, Mdaf, EDel[HIGHX]
			vmadh	vjunk, Mdai, EDel[HIGHX]
			vmadn	vjunk, tHdaf, EDel[MIDX]
			vmadh	vjunk, tHdai, EDel[MIDX]
			vsar	Mdai, Mdai, Mdai[0]
			vsar	Mdaf, Mdaf, Mdaf[1]
.unname	tHdai
.unname	tHdaf
			vmudn	vjunk, Hdaf, EDel[MIDY]
			vmadh	vjunk, Hdai, EDel[MIDY]
			vmadn	vjunk, tMdaf, EDel[HIGHY]
			vmadh	vjunk, tMdai, EDel[HIGHY]
			vsar	Hdai, Hdai, Hdai[0]
			vsar	Hdaf, Hdaf, Hdaf[1]
.unname	tMdai
.unname	tMdaf
.name vtmpf,	$v19
.name coordMi,	$v20
.name coordMf,	$v21
			vmudl	vjunk, Mdaf, invrf[3]
	addi	$16, zero, 0x0800
			vmadm	vjunk, Mdai, invrf[3]
	mtc2	$16, vtmpf[0]
			vmadn	Mdaf,  Mdaf, invri[3]
	ldv	coordMi[8], 64(dscratchp)
			vmadh	Mdai,  Mdai, invri[3]
	ldv	coordMf[8], 72(dscratchp)

			vmudl	vjunk, Hdaf, invrf[3]
			vmadm	vjunk, Hdai, invrf[3]
			vmadn	Hdaf,  Hdaf, invri[3]
	sdv	Mdaf[0],   56(outp)	
			vmadh	Hdai,  Hdai, invri[3]
	sdv	Mdai[0],   40(outp)	
			vmudn	vjunk, Mdaf, vconst[1]
			vmadh	vjunk, Mdai, vconst[1]
	sdv	Hdaf[0],   24(outp)	
			vmadl	vjunk, Hdaf, DxXDyf[HIGHY]
	sdv	Hdai[0],    8(outp)
			vmadm	vjunk, Hdai, DxXDyf[HIGHY]
			vmadn	adef,  Hdaf, DxXDyi[HIGHY]
			vmadh	adei,  Hdai, DxXDyi[HIGHY]

.unname	invri
.unname	invrf	
.unname	amaxf
.unname	amid
.unname	amidf
.unname	amax
.unname vjunk
.name absdxi,	$v24
.name absdyi,	$v25
.name absdxf,	$v26
.name absdyf,	$v27
			vabs	absdyi, Mdai, Mdai
	                vmudl   coordMf, coordMf, vtmpf[0]
	
	                vmadm   coordMi, coordMi, vtmpf[0]
	sdv	adef[0],  48(outp)
	                vmadn   coordMf, vconst, vconst[0]
	sdv	adei[0],  32(outp)
			vabs	absdxi, Hdai, Hdai
	beq	tmp, $0, outputTXTR
	                vmudm   absdyi, absdyi, vtmpf[0]
	addi	outp, outp, 64
  outputTXTR:
	                vmadn   absdyf, vconst, vconst[0]
	andi	tmp, rdp_cmd, G_RDP_TRI_TXTR_MASK	# delay	
	                vmudm   absdxi, absdxi, vtmpf[0]
	blez	tmp, outputZBUF
	                vmadn   absdxf, vconst, vconst[0]
.name scalei,	$v5
.name scalef,	$v6
			vmudn	vtmpf,  coordMf, vconst[1]
			vmadh	vtmpf,  coordMi, vconst[1]
			vmadn	vtmpf,  absdyf,  vconst[1]
			vmadh	vtmpf,  absdyi,  vconst[1]
			vmadn	scalef, absdxf,  vconst[2]
			vmadh	scalei, absdxi,  vconst[2]	
.unname absdxi
.unname absdyi
.unname absdxf
.unname absdyf
.unname coordMi
.unname coordMf
.name	vres1i,	$v20
.name	vres1f,	$v21
.name	vres2i,	$v22
.name	vres2f,	$v23
				# DELAY-V
				# DELAY-V
	addi	outp, outp, 64	# increment output pointer
				vsubc	vtmpf,  scalef, scalef[5]
				vge	scalei, scalei, scalei[5]
				vmrg	scalef, scalef, scalef[5]
		vmudn	vres1f, adef,   vconst1[4]
		vmadh	vres1i, adei,   vconst1[4]
		vmadn	vres1f, vconst, vconst[0]
				vsubc	vtmpf,  scalef, scalef[6]
				vge	scalei, scalei, scalei[6]
				vmrg	scalef, scalef, scalef[6]
	ssv	vres1i[14],    8(outp)
		vmudn	vres2f, aminf,  vconst1[4]
	ssv	vres1f[14],   10(outp)
		vmadh	vres2i, amin,   vconst1[4]
		vmadn	vres2f, vconst, vconst[0]
		                vmudl   scalef, scalef, vconst1[3]
			        vmadm   scalei, scalei, vconst1[3]
				vmadn   scalef, vconst, vconst[0]
			# DELAY-V
	ssv	vres2i[14],  0(outp)
			# DELAY-V
	ssv	vres2f[14],  2(outp)
				vrcph	vtmpf[0],  scalei[4]
				vrcpl	scalef[0], scalef[4]
				vrcph	scalei[0], vconst[0]
				# DELAY-V
				# DELAY-V
				vmudn	scalef, scalef, vconst[2]
				vmadh	scalei, scalei, vconst[2]
		vmudn	vres1f, Hdaf,   vconst1[4]
		vmadh	vres1i, Hdai,   vconst1[4]
		vmadn	vres1f, vconst, vconst[0]
				vlt	scalei, scalei, vconst[1]
				vmrg	scalef, scalef, vconst[0]
.unname vtmpf
		vmudn	vres2f, Mdaf,   vconst1[4]
	ssv	vres1i[14],    4(outp)
		vmadh	vres2i, Mdai,   vconst1[4]
	ssv	vres1f[14],    6(outp)	
		vmadn	vres2f, vconst, vconst[0]
			vmudl	vres1f, aminf, scalef[0]
			vmadm	vres1f, amin,  scalef[0]
			vmadn	vres1f, aminf, scalei[0]
	ssv	vres2i[14],   12(outp)	
	ssv	vres2f[14],   14(outp)
			vmadh	vres1i, amin,  scalei[0]

			vmudl	vres2f, Hdaf, scalef[0]
			vmadm	vres2f, Hdai, scalef[0]
			vmadn	vres2f, Hdaf, scalei[0]
	sdv	vres1f[8], 16-64(outp)
			vmadh	vres2i, Hdai, scalei[0]
	sdv	vres1i[8],  0-64(outp)

			vmudl	vres1f, Mdaf, scalef[0]
			vmadm	vres1f, Mdai, scalef[0]
			vmadn	vres1f, Mdaf, scalei[0]
	sdv	vres2f[8],   24-64(outp)	
			vmadh	vres1i, Mdai, scalei[0]
	sdv	vres2i[8],    8-64(outp)

			vmudl	vres2f, adef, scalef[0]
			vmadm	vres2f, adei, scalef[0]
			vmadn	vres2f, adef, scalei[0]
	sdv	vres1f[8],   56-64(outp)
			vmadh	vres2i, adei, scalei[0]
	sdv	vres1i[8],   40-64(outp)
			# DELAY-V
			# DELAY-S
			# DELAY-V
			# DELAY-S
			# DELAY-V
	sdv	vres2f[8],   48-64(outp)	# 48
#ifdef	OUTPUT_DUMP
		beq	rdp_flg, zero, SetupReject
#else
		beq	rdp_flg, zero, OutputClose
#endif
	sdv	vres2i[8],   32-64(outp)	# 32
#ifdef	OUTPUT_DUMP
		jr	gfx1
#else
		j	OutputClose
#endif
	addi	outp, outp, 16	# increment output pointer
	
.unname scalei
.unname scalef
	
  outputZBUF:	
#ifdef	OUTPUT_DUMP
	blez	rdp_flg, SetupReject
#else
	blez	rdp_flg, OutputClose
#endif
				vmudn	adef, adef,   vconst1[4]
				vmadh	adei, adei,   vconst1[4]
				vmadn	adef, vconst, vconst[0]
	
				vmudn	aminf, aminf,  vconst1[4]
				vmadh	amin,  amin,   vconst1[4]
				vmadn	aminf, vconst, vconst[0]

	ssv	adei[14],    8(outp)	# output z stuff.
				vmudn	Hdaf, Hdaf,   vconst1[4]
	ssv	adef[14],   10(outp)
				vmadh	Hdai, Hdai,   vconst1[4]
				vmadn	Hdaf, vconst, vconst[0]
	
	ssv	amin [14],   0(outp)
				vmudn	Mdaf, Mdaf,   vconst1[4]
	ssv	aminf[14],   2(outp)
				vmadh	Mdai, Mdai,   vconst1[4]
	addi	outp, outp, 16	# increment output pointer
				vmadn	Mdaf, vconst, vconst[0]
	ssv	Hdai[14],    4-16(outp)
	ssv	Hdaf[14],    6-16(outp)	
	ssv	Mdai[14],   12-16(outp)	
#ifdef	OUTPUT_DUMP
	j	SetupReject
#else
	j	OutputClose
#endif
	ssv	Mdaf[14],   14-16(outp)	

		/* return_save => gfx1 の変更に伴う処理 */
		/* BackReject の場合 return = 0 でリターンする */
BackReject:	addi	return, zero, 0
SetupReject:	jr	gfx1
		nop
		.end	beginSetup

/* un-name scalar registers: */
.unname	minp
.unname	midp
.unname	maxp
.unname	flatp
.unname	rdp_cmd
.unname	rdp_flg
.unname	tmp
.unname	dscratchp
.unname	rendState
	
/* un-name vector registers: */
.unname	DxXDyi
.unname	DxXDyf
.unname	yf
.unname	xHighf
.unname	EDel
.unname	invEDeli
.unname	invEDelf
	
.unname	Hdai
.unname	Hdaf
.unname	Mdai
.unname	Mdaf
.unname	adei
.unname	adef
.unname	amin
.unname	aminf

#if 0
	# test for thorough register un-naming.
.name r1, $1
.name r2, $2
.name r3, $3
.name r4, $4
.name r5, $5
.name r6, $6
.name r7, $7
.name r8, $8
.name r9, $9
.name r10, $10
.name r11, $11
.name r12, $12
.name r13, $13
.name r14, $14
.name r15, $15
.name r16, $16
.name r17, $17
.name r18, $18
.name r19, $19
.name r20, $20

.name vv0, $v0
.name vv1, $v1
.name vv2, $v2
.name vv3, $v3
.name vv4, $v4
.name vv5, $v5
.name vv6, $v6
.name vv7, $v7
.name vv8, $v8
.name vv9, $v9
.name vv10, $v10
.name vv11, $v11
.name vv12, $v12
.name vv13, $v13
.name vv14, $v14
.name vv15, $v15
.name vv16, $v16
.name vv17, $v17
.name vv18, $v18
.name vv19, $v19
.name vv20, $v20
.name vv21, $v21
.name vv22, $v22
.name vv23, $v23
.name vv24, $v24
.name vv25, $v25
.name vv26, $v26
.name vv27, $v27
.name vv28, $v28
.name vv29, $v29
	
#endif