gline.s 20.8 KB

Raw Blame History Permalink

 ##########################################################################
 #
 # Line Setup Routine.
 # When entering this code we have a points buffer full of points,
 # and registers r1, r2 point to the two vertices of a line.
 # r4 points to the vertex with the normal for flat-shaded case.
 #
 # Kevin Luster, kluster@sgi.com
 #
 ##########################################################################

 # pointers to vertices
.name 	minp,		$1
.name	maxp,		$2
.name	flatp,		$4

 # point y coordinates
.name 	miny,		$3
.name	maxy,		$17

.name	rdp_cmd,	$5
.name	rdp_flg,	$6
.name	temp,		$7

.name 	minx,		$8
.name	maxx,		$9

.name 	Hdx, 		$10
.name	Hdy,		$11

.name	xHigh,		$12
.name	xMid,		$13

.name	yHigh,		$14
.name	yMid,		$15
.name	yLow,		$16

.name	attrmask,	$18

 # vector registers

.name	vzero,		$v0
.name	vtmp,		$v1

.name	vHdxi,		$v2
.name	vHdxf,		$v3

.name	aMini,		$v4
.name	aMinf,		$v5

.name	aDeli,		$v6

.name	iHdyi,		$v8
.name	iHdyf,		$v9

.name	dxyHi,		$v10
.name	dxyHf,		$v11

.name	dxyMi,		$v12
.name	dxyMf,		$v13

.name	xHi,		$v14
.name	xHf,		$v15

.name	xMi,		$v16
.name	xMf,		$v17

.name	iHdxi,		$v20
.name	iHdxf,		$v21

.name	aMaxi,		$v22
.name	aMaxf,		$v23

.name	aDelf,		$v24

.name	dadei,		$v26
.name	dadef,		$v27

.name	dadyi,		$v28
.name	dadyf,		$v29

.ent	lineSetup

lineSetup:

	# Store what's in RSP_STATE_TRI off into rdp_cmd
	lb	rdp_cmd, 	RSP_STATE_TRI(rsp_state)
	ori	rdp_cmd, 	rdp_cmd,	G_TRI_FILL

	# set up a zero register for use in various places
	vxor	vzero,	vzero,	vzero

	# open for output, do this here before register 18
	# gets used for output values

#if !(defined(OUTPUT_DRAM)||defined(OUTPUT_FIFO))
	jal	OutputOpen
	addi	$18, zero, 184 	# worst case guess (delay slot)
#endif /* !(OUTPUT_DRAM || OUTPUT_FIFO) */


sort:	# Sort input points along the y direction
	lh	miny,	RSP_PTS_YS(minp)
	lh	maxy, 	RSP_PTS_YS(maxp)

	slt	temp, 	maxy, 	miny

	# jump to done if points already sorted
	blez	temp, 	sortDone

	# swap y values
	addi	temp, 	miny, 	0
	addi	miny, 	maxy, 	0
	addi	maxy, 	temp, 	0

	# swap point buffer pointers
	addi	temp, 	minp, 	0
	addi	minp, 	maxp, 	0
	addi	maxp, 	temp, 	0

.unname temp
.name rendState,	$7

sortDone:

	# load in point x values
	lh	minx,	RSP_PTS_XS(minp)
	lh	maxx, 	RSP_PTS_XS(maxp)

	# check to see if we need to do any attribute setup

	andi	attrmask,	rdp_cmd,	(G_RDP_TRI_ZBUFF_MASK | G_RDP_TRI_TXTR_MASK | G_RDP_TRI_SHADE_MASK)
	blez	attrmask,	SetupDone

	# do some preliminary setup for the attribute
	# calculation. Code is here since we rename the
	# minp and maxp registers later

	# load in rgba values, will get trashed if branch
	# not taken..

	luv	aMini[0], RSP_PTS_R_NX(minp)

	#
	lw	rendState, RSP_STATE_RENDER(rsp_state)
	andi	rendState, rendState, G_SHADING_SMOOTH
	bgtz	rendState, smoothShade

	# delay slot, value will get trashed if branch
	# not taken..
	luv	aMaxi[0], RSP_PTS_R_NX(maxp)

	# ok, we're doing flat shading, so load in
	# the flat vertex rgba values instead
#ifdef	F3DEX_GBI
	luv	aMini[0], RSP_PTS_R_NX(gfx0)
	luv	aMaxi[0], RSP_PTS_R_NX(gfx0)
#else
	luv	aMini[0], RSP_PTS_R_NX(flatp)
	luv	aMaxi[0], RSP_PTS_R_NX(flatp)
#endif

.unname rendState
.name	temp,		$7

smoothShade:

	# zero out the fractional registers since we need to
	# do some massaging on the rgba values before loading
	# in the Z values

	vadd	aMinf,	vzero,	vzero
	vadd	aMaxf,	vzero,	vzero

	# Now multiply the rgba attribute values by 1/128 to
	# get their real values I x F

	vmudm	aMini,	aMini,	vconst[7]
	vmudm	aMaxi,	aMaxi,	vconst[7]

	# at this point the rgba values are loaded and ready to
	# go. Now need to load in the Z values into the integer
	# and fractional locations

	lsv	aMini[8],	RSP_PTS_ZS(minp)
	lsv	aMaxi[8],	RSP_PTS_ZS(maxp)
	lsv	aMinf[8],	RSP_PTS_ZSF(minp)
	lsv	aMaxf[8],	RSP_PTS_ZSF(maxp)

	# load in S and T values for both points
	# there are no fractional values to load

	llv	aMini[10],	RSP_PTS_S(minp)
	llv	aMaxi[10],	RSP_PTS_S(maxp)

	# Now that all values are loaded, we do an IF subtract
	# to get what the attribute delta values are

	vsubc	aDelf,	aMaxf,	aMinf
	vsub 	aDeli,	aMaxi,	aMini

	# now multiply aDel down so that we get maximal precision
	# later when computing the attribute slopes IF X F

	vmudl	aDelf,	aDelf,	vconst1[2]
	vmadm	aDeli,	aDeli,	vconst1[2]
	vmadn	aDelf,	vconst,	vconst[0]

.unname minp
.unname maxp

.name	sXH,		$1
.name	sXL,		$2

SetupDone:

	# Now load the user specified scissor
	# X coordinates. For veritical lines
	# we use these values, for horizontal
	# lines they may be changed

	lh	sXH,	RSP_STATE_SCISSOR_XH(rsp_state)
	lh	sXL,	RSP_STATE_SCISSOR_XL(rsp_state)

	# now compute Hdy and Hdx
	sub	Hdy,	maxy,	miny
	sub	Hdx,	maxx,	minx

	# now compute Hdx/Hdy

	# load scaler registers into vector registers
	# get rid of this later

	mtc2	Hdy,		vtmp[0]
	mtc2	Hdx,		vHdxi[0]
	vmov	vHdxf[0], 	vconst[0]

	# now compute 1/Hdx

	vrcp	iHdxf[0], vHdxi[0]
	vrcph	iHdxi[0], vconst[0]

 	# jump over all of the below calculations if
	# we have an exactly horizontal line

	beq	Hdy,	zero,	Horizontal

	# now compute 1/Hdy, Hdy is stored in vtmp

	vrcp	iHdyf[0], vtmp[0]
	vrcph	iHdyi[0], vconst[0]

	# shift Hdx right by 15 so that
	# the multiplication later by 1/Hdy
	# lines up correctly. We don't lose
	# any precision in this shift since
	# Hdx was loaded into the integer
	# portion of the register pair.

	vmudl	vHdxf, vHdxf,  vconst[2]
	vmadm	vHdxi, vHdxi,  vconst[2]
	vmadn	vHdxf, vconst, vconst[0]

	# now multiply 1/Hdy by Hdx to get slope.

	vmudl	dxyHf,	iHdyf, 	vHdxf[0]
	vmadm	dxyHf,	iHdyi,	vHdxf[0]
	vmadn	dxyHf,	iHdyf,	vHdxi[0]
	vmadh	dxyHi,	iHdyi,	vHdxi[0]

	# now do edge walker clear of low slope bits (ick)

	vand	dxyHf,	dxyHf,	vconst1[1]

	# if we don't need to do any attribute goop then
	# jump around it

	blez	attrmask,	dadeDone

	# Now we set up dade. This code is common
	# to horizontal and vertical lines, so
	# we do it here

	# note that aDelf has been aligned such that
	# we get maximal precision out of this multiply,
	# we don't lose any bits of either operand

	# do an IF x IF on 1/Hdy and aDelf
        vmudl   dadef,	aDelf,	iHdyf[0]
        vmadm   dadef,	aDeli,  iHdyf[0]
        vmadn   dadef,	aDelf,	iHdyi[0]
        vmadh	dadei,	aDeli,  iHdyi[0]

	# now we check to see if the slope is going to cause
	# a problem when we do the subpixel adjustment. We
	# compare the absolute value of the integer part of
	# the slope with 1877 and clamp if it is greater

.unname	flatp
.name	adxyHi,	$4

dadeDone:

	# Absolute value code compliments of alesha@anya.Princeton.EDU
	# (Alexei Lebedev), wins because it needs one fewer register.
	# computes: temp=x>>31; absx = x^temp + (temp & 1);

	mfc2	adxyHi,	dxyHi[0]
	sra 	temp,	adxyHi,	31
	xor 	adxyHi,	temp,	adxyHi
	andi 	temp,	temp,	1
	add 	adxyHi,	adxyHi,	temp

	# at this point adxyHi contains |dxyHi|, now we check it against
	# the hardcoded maximum value allowed and possibly clamp Hdy

	mfc2	temp,	vconst1[12]
	slt	adxyHi,	adxyHi,	temp
	bgtz	adxyHi,	SlopeFine
	nop

	# slope is bad, so we zero out Hdy, everything
	# else afterwards should compute out fine

	xor	Hdy,	Hdy,	Hdy

.unname	adxyHi

	# Now do the decision as to whether we're doing
	# horizontal or vertical code. For now we're just
	# using Rich Webb's scaler branching algorithm.

SlopeFine:

	bgtz	Hdx,	HdxPositive

.unname	vHdxf
.unname	vHdxi

HdxNegative:

	add	temp,	Hdy,	Hdx
	bgez	temp,	Vertical
	nop
	j	Horizontal

HdxPositive:

	sub	temp,	Hdy,	Hdx
	bltz	temp,   Horizontal
	nop

	# All the code above this point should be
	# shareable between the horizontal and
	# vertical sections...

Vertical:

	# stuff away fact that we have a vertical line
	# used later when doing subpixel backups

	addi	temp,	zero,	0
	sb	temp,	(2+RSP_SCRATCH_OFFSET)(zero)

	# now we set up the attribute values

	# We are going to eventually use the attribute
	# values stored in aMin{i,f} and since those are
	# the ones we want, we don't have to do anything
	# else for setup

	# now set up XHigh and XMid

	# do the line thickness +/- in scaler
	# land first, so that don't have to dork with
	# double precision vector operations

	# hack for line width. See comment in ../gimm.s
	lh	$21, CLIP_STATE_TABLE(zero)
	sub	xHigh,	minx,	$21
	add	xMid,	minx,	$21
#if 0
	addi	xHigh,	minx,	0xfffd
	addi	xMid,	minx,	0x0003
#endif

	mtc2	xHigh, 	xHi[0]
	vmov	xHf[0],	vconst[0]

	mtc2	xMid, 	xMi[0]
	vmov	xMf[0],	vconst[0]

	# now do the shift to the right by 2
	# to get the subpixel part into the
	# fractional register.
	# accomplish this by doing an IF x F

	vmudl	xHf, 	xHf,   	vconst[4]
	vmadm	xHi,	xHi,	vconst[4]
	vmadn	xHf,	vconst, vconst[0]

	vmudl	xMf, 	xMf,   	vconst[4]
	vmadm	xMi,	xMi,	vconst[4]
	vmadn	xMf,	vconst, vconst[0]

	jal	AttrBackup

	# delay slot
	addi	temp,	miny,	0

	# hard coded left major triangle

	addi	rdp_flg, zero, 0x80

	# set up yHigh, yMid, yLow here so that we
	# can have a common output routine later

	addi	yLow,	maxy,	0
	addi	yMid,	maxy,	0
	addi	yHigh,	miny,	0

.name	dadxi,	$v2
.name	dadxf,	$v3

	# check to see if we need to do any attribute setup

	blez	attrmask,	VdadxDone

	# delay slot
	# DxMdy = DxHdy
	vadd 	dxyMi,	vzero,	dxyHi
	# fractional part pushed down to
	# delay slot below..

	# set up the other attribute slope registers here

	# in vertical lines, dadx = 0
	vxor	dadxi,	dadxi,	dadxi
	vxor	dadxf,	dadxf,	dadxf

	# in vertical lines, dady = dade

	vadd	dadyi,	vzero,	dadei
	vadd	dadyf,	vzero,	dadef

VdadxDone:

	j 	Exit

	# delay slot
	vadd	dxyMf,	vzero,	dxyHf

Horizontal:

	# stuff away fact that we have a horizontal line
	# used later when doing subpixel backups

	addi	temp,	zero,	1
	sb	temp,	(2+RSP_SCRATCH_OFFSET)(zero)

	# check to see if we need to do any attribute setup

	blez	attrmask,	HdadxDone

	# now we set up the attribute values

	# note that dade has already been setup

	# Do it here since code should be the
	# same for both types of Horizontal lines

	# note that aDelf has been aligned such that
	# we get maximal precision out of this multiply,
	# we don't lose any bits of either operand

	# do an IF x IF on 1/Hdx and aDelf
        vmudl   dadxf,	aDelf,	iHdxf[0]
        vmadm   dadxf,	aDeli,  iHdxf[0]
        vmadn   dadxf,	aDelf,	iHdxi[0]
        vmadh	dadxi,	aDeli,  iHdxi[0]

HdadxDone:

	# Now setup the scissor coordinates
	# For now we've got hard coded scissor coordinates
	# These commands will be repeated in the two sections
	# below with correct values

	slt	temp,	maxx,	minx
	blez	temp,	xSorted

	# store attrmask away since we're going to trash
	# its register. blah! blah! blah!

	# delay slot
	sb	attrmask,	(0+RSP_SCRATCH_OFFSET)(zero)

.unname	attrmask

.name	cXH,	$4
.name	cXL,	$18
.name	d,	$19
.name	s,	$20


xNotSorted:
	add	cXH,	zero,	maxx
	add	cXL,	zero,	minx
	j	MiniMaxX
	nop

xSorted:

	add	cXH,	zero,	minx
	add	cXL,	zero,	maxx

MiniMaxX:

	# Now we compute the minimax and maximin of
	# what the real scissor box should be
	# Using Rich Webb's min/max algorithm

	# compute max of sXH, cXH
	sub	d,	sXH,	cXH
	sra	s,	d,	31
	and	d,	s,	d
	sub	sXH,	sXH,	d

	# compute min of sXL, cXL
	sub	d,	sXL,	cXL
	sra	s,	d,	31
	and	d,	s,	d
	add	sXL,	cXL,	d

	# addi	sXH,	cXH,	0
	# addi	sXL,	cXL,	0

	slt	temp,	maxx, 	minx
	blez	temp,	RightMajor

.unname	cXH
.unname	cXL
.unname	d
.unname	s

.name	attrmask,	$18


LeftMajor:	# left major
	addi 	rdp_flg,	zero,	0x80
	j	Done
	nop

RightMajor:	#right major
	addi	rdp_flg,	zero,	0x00

Done:

	# restore stored attrmask to correct register

	lb	attrmask,	(0+RSP_SCRATCH_OFFSET)(zero)

	bgtz	Hdy,	MostlyHorizontal
	nop

ExactlyHorizontal:

	# check to see if we need to do any attribute setup

	blez	attrmask,	ExHattDone


	# special case setting up initial attribute
	# values.

	vadd	aMini,	vzero,	aMaxi

	# zero out dade for exactly horizontal lines

	vxor	dadef,	dadef,	dadef
	vxor	dadei,	dadei,	dadei

	# for exactly horizontal lines, the high and mid
	# slopes are zero

	vxor	dxyHi,	dxyHi,	dxyHi
	vxor	dxyHf,	dxyHf,	dxyHf
	vxor	dxyMi,	dxyMi,	dxyMi
	vxor	dxyMf,	dxyMf,	dxyMf

ExHattDone:

	# Now set up xHigh and xMid

	mtc2	maxx, 	xHi[0]
	vmov	xHf[0],	vconst[0]

	mtc2	minx, 	xMi[0]
	vmov	xMf[0],	vconst[0]

	# now do the shift to the right by 2
	# to get the subpixel part into the
	# fractional register.
	# accomplish this by doing an IF x F

	vmudl	xHf, 	xHf,   	vconst[4]
	vmadm	xHi,	xHi,	vconst[4]
	vmadn	xHf,	vconst, vconst[0]

	vmudl	xMf, 	xMf,   	vconst[4]
	vmadm	xMi,	xMi,	vconst[4]
	vmadn	xMf,	vconst, vconst[0]

	# Note that no subpixel adjustment is needed
	# in the exactly horizontal case

	# now set up ymax and ymin so that they can be
	# popped into the yHigh->yLow code below. Need
	# to add and subtract the line thickness from each

	# We use maxy because miny==maxy for exactly
	# horizontal lines. This is not true for those
	# lines which have been clamped to being exactly
	# horizontal, but the maximum error in not doing
	# an average is 1/2 of a quarter pixel, which is
	# the same error we'd get in doing an average

	# hack for line width. See comment in ../gimm.s
	lh	$21, CLIP_STATE_TABLE(zero)
	add	yLow,	maxy,	$21
	add	yMid,	maxy,	$21
	sub	yHigh,	maxy,	$21
#if 0
	addi	yLow,	maxy,	0x0003
	addi	yMid,	maxy,	0x0003
	addi	yHigh,	maxy,	0xfffd
#endif

	j 	AdjustAlpha
	nop

MostlyHorizontal:

	# initial attributes are already in aMin{i,f} so we
	# don't have to do anything special to set them up

	# Now set up xHigh

	mtc2	minx, 	xHi[0]
	vmov	xHf[0],	vconst[0]

	# now do the shift to the right by 2
	# to get the subpixel part into the
	# fractional register.
	# accomplish this by doing an IF x F

	vmudl	xHf, 	xHf,   	vconst[4]
	vmadm	xHi,	xHi,	vconst[4]
	vmadn	xHf,	vconst, vconst[0]

	# now copy over the xHigh values into
	# the xLow values. The xHigh values
	# are modified below when we do the
	# sub pixel adjustment

	vmov	xMi[0],	xHi[0]
	vmov	xMf[0],	xHf[0]

	# now set up ymax and ymin so that they can be
	# popped into the yHigh->yLow code below. Need
	# to add and subtract the line thickness from each

	# hack for line width. See comment in ../gimm.s
	lh	$21, CLIP_STATE_TABLE(zero)
	sub	yHigh,	miny,	$21
	add	yMid,	miny,	$21
	add	yLow,	maxy,	$21
#if 0
	addi	yHigh,	miny,	0xfffd
	addi	yMid,	miny,	0x0003
	addi	yLow,	maxy,	0x0003
#endif

	jal	AttrBackup

	# delay slot
	addi	temp,	yHigh,	0

	# for mostly horizontal lines, mid slope is zero
	vxor	dxyMi,	dxyMi,	dxyMi
	vxor	dxyMf,	dxyMf,	dxyMf

	# now do specific output routine

	addi 	outp, outp, 8		# increment output pointer

	# need to write out XLow values for nearly horizontal lines

	ssv	xMi[0],		 8(outp)	# XL = XM
	ssv	xMf[0],		10(outp)	# XL, frac = XM, frac
	ssv	dxyHi[0],	12(outp)	# DxLDy	      = DxHDy
	ssv	dxyHf[0],	14(outp)	# DxLDy, frac = DxHDy, frac

	addi	outp,	outp,	0xfff8

AdjustAlpha:
	nop

	# set up the other attribute slope registers here

	# in horizontal lines, dady = 0
	vxor	dadyi,	dadyi,	dadyi
	vxor	dadyf,	dadyf,	dadyf

.unname	miny
.unname	maxy

.unname	minx
.unname	maxx

.unname	Hdx
.unname	Hdy

.unname xHigh
.unname	xMid


Exit:

	# now do output routine, dump out
	# the calculated values above. everybody
	# should already be happy with their location

	addi	temp, 	zero, G_SETSCISSOR
	sb	temp, 	0(outp)	# output rdp command

	# now merge together user Y values with computed
	# X values

	lh	temp,	RSP_STATE_SCISSOR_YH(rsp_state)
	sll	sXH,	sXH,	20
	sll	temp,	temp,	 8
	or	sXH,	sXH,	temp

	lh	temp,	RSP_STATE_SCISSOR_YL(rsp_state)
	sll	sXL,	sXL,	12
	or	sXL,	sXL,	temp

	# I'm writing out 4 bytes below instead of 3 in each
	# command, so I'm making them overlap and have the
	# second write contain the real data for the overlapped
	# byte

	sw	sXH,	1(outp)		# XH and YH
	sw 	sXL,	4(outp)		# flags, XL and YL

	# now increment outp pointer so that next batch of output
	# is with respect to beginning of the edge structure

	addi 	outp, outp, 8		# increment output pointer

	# addi	rdp_cmd, zero, G_TRI_SHADE_ZBUFF
	sb	rdp_cmd, 0(outp)	# output rdp command

	sb	rdp_flg, 1(outp)	# output poly flag

	sh	yLow,	2(outp)		# YL
	sh	yMid, 	4(outp)		# YM
	sh	yHigh,	6(outp)		# YH

	# now squat out xhigh values

	ssv	xHi[0],		16(outp)	# XH
	ssv	xHf[0],		18(outp)	# XH, frac
	ssv	dxyHi[0],	20(outp)	# DxHDy
	ssv	dxyHf[0],	22(outp)	# DxHDy, frac

	# now do xmid

	ssv	xMi[0],		24(outp)	# XM
	ssv	xMf[0],		26(outp)	# XM, frac
	ssv	dxyMi[0],	28(outp)	# DxMDy
	ssv	dxyMf[0],	30(outp)	# DxMDy, frac


	# check if we need to write out shade values and slopes

	andi	temp,	rdp_cmd,	G_RDP_TRI_SHADE_MASK
	blez	temp,	SHADEBEDONE


	# now increment outp pointer to point after edge structure

	# delay slot
	addi 	outp, 	outp, 	32


	# write out attribute slopes and initial values

	sdv	aMini[0],	 0(outp)	# {rgba}
	sdv	dadxi[0],	 8(outp)	# D{r,g,b,a}Dx
	sdv	vzero[0],	16(outp)	# {rgba}, frac
	sdv	dadxf[0],	24(outp)	# D{r,g,b,a}Dx, frac
	sdv	dadyi[0],	40(outp)	# D{r,g,b,a}Dy
	sdv	dadyf[0],	56(outp)	# D{r,g,b,a}Dy, frac
	sdv	dadei[0],	32(outp)	# D{r,g,b,a}De
	sdv	dadef[0],	48(outp)	# D{r,g,b,a}De, frac

	# now increment outp pointer to point after shade structure

 	addi	outp,	outp,	64


SHADEBEDONE:

	# Check if we need to write out texture values and slopes

	andi	temp,	rdp_cmd,	G_RDP_TRI_TXTR_MASK
	blez	temp,	TEXBEDONE
	nop	# delay slot

	slv	aMini[10],	 0(outp)	# {s,t}
	ssv	vconst[1],	 4(outp)	# w = 1
	ssv	vconst[0],	 6(outp)	# l = 0
	slv	dadxi[10],	 8(outp)	# D{s,t}Dx
	slv	vzero[0],	12(outp)	# D{w,l}Dx = 0
	slv	aMinf[10],	16(outp)	# {s,t}, frac
	slv	vzero[0],	20(outp)	# {w,l}, frac = 0
	slv	dadxf[10],	24(outp)	# D{s,t}Dx, frac
	slv	vzero[0],	28(outp)	# D{w,l}Dx, frac = 0
	slv	dadei[10],	32(outp)	# D{s,t}De
	slv	vzero[0],	36(outp)	# D{w,l}De = 0
	slv	dadyi[10],	40(outp)	# D{s,t}Dy
	slv	vzero[0],	44(outp)	# D{w,l}Dy = 0
	slv	dadef[10],	48(outp)	# D{s,t}De, frac
	slv	vzero[0],	52(outp)	# D{w,l}De, frac = 0
	slv	dadyf[10],	56(outp)	# D{s,t}Dy, frac
	slv	vzero[0],	60(outp)	# D{w,l}Dy, frac = 0


	# now increment outp pointer to point after texture structure

	addi 	outp,	outp,	64


TEXBEDONE:

	# Check if we need to write out z buffer values and slopes

	andi	temp,	rdp_cmd,	G_RDP_TRI_ZBUFF_MASK
	blez	temp, 	ZBEDONE
	nop	# delay slot


	# Scale all the Z related values up to match what's being done
	# in the poly microcode

	# Scale up Z
	vmudn	aMinf, aMinf, vconst1[4]
	vmadh	aMini, aMini, vconst1[4]
	vmadn	aMinf, vconst, vconst[0]

	# Scale up DzDx
	vmudn	dadxf, dadxf, vconst1[4]
	vmadh	dadxi, dadxi, vconst1[4]
	vmadn	dadxf, vconst, vconst[0]

	# Scale up DzDe
	vmudn	dadef, dadef, vconst1[4]
	vmadh	dadei, dadei, vconst1[4]
	vmadn	dadef, vconst, vconst[0]

	# Scale up DzDy
	vmudn	dadyf, dadyf, vconst1[4]
	vmadh	dadyi, dadyi, vconst1[4]
	vmadn	dadyf, vconst, vconst[0]

	ssv	aMini[8],	 0(outp)	# Z
	ssv	aMinf[8],	 2(outp)	# Z, frac
	ssv	dadxi[8],	 4(outp)	# DzDx
	ssv	dadxf[8],	 6(outp)	# DzDx, frac
	ssv	dadei[8],	 8(outp)	# DzDe
	ssv	dadef[8],	10(outp)	# DzDe, frac
	ssv	dadyi[8],	12(outp)	# DzDy
	ssv	dadyf[8],	14(outp)	# DzDy, frac

	# now increment outp pointer so that next triangle output starts
	# at the correct place.

	addi 	outp, 	outp, 	16		# increment output pointer


ZBEDONE:

	# now clean up the output buffers

	jal	OutputClose

	# reload the return address that got hosed by the output close

	# delay slot
	lw	return,	RSP_L_0(zero)

	# go back to the processing loop

	jr	return
	nop	# delay slot

.end 	lineSetup

.unname	rdp_cmd
.unname	rdp_flg
.unname	temp

.name	temp,		$7
.name 	miny,		$3

.name	Adjf,		$v18
.name	Adji,		$v19


AttrBackup:

	# Back up the starting X and attribute values

	# now do the subpixel adjustment to the
	# xMid and xHigh values

	# the operation being effected is:
	# xH = xH - dx/dy [(yHigh & 0x03) << 14]
	# xM = xM - dx/dy [(yHigh & 0x03) << 14]

	# first we load in yHigh and then shift
	# left by 14 bits to get the fractional part
	# then we load it into a fractional vector
	# register

	# note that temp has already been loaded with
	# the correct value in the delay slot of the
	# jump that got us here

	sll	temp,	temp,	14
	mtc2	temp,	vtmp

	# now we do an IF x F on this fractional
	# y shift with the high slope

	vmudl	Adjf,	dxyHf, 	vtmp[0]
	vmadm	Adji,	dxyHi,	vtmp[0]
	vmadn	Adjf,	vconst, vconst[0]

	# now we do a 32 bit subtract to adjust
	# xHigh and xMid to their pixel values

	vsubc	xHf,	xHf,	Adjf
	# rest of subtraction is shoved down to
	# delay slot below. doh.

	# now check if we're doing a horizontal or
	# vertical line. If Horizontal, then jump
	# to end

	lb	temp,	(2+RSP_SCRATCH_OFFSET)(zero)
	bgtz	temp,	XBackupDone

	# delay slot
	vsub	xHi,	xHi,	Adji

	# This is not done for horizontal lines
	vsubc	xMf,	xMf,	Adjf
	vsub	xMi,	xMi,	Adji

XBackupDone:

	blez	attrmask,	AttrBackupDone
	nop

	# do the actual attribute backup in here.

	# now we do an IF x F on this fractional
	# y shift with the starting attribute values

	vmudl	Adjf,	dadef, 	vtmp[0]
	vmadm	Adji,	dadei,	vtmp[0]
	vmadn	Adjf,	vconst, vconst[0]

	# now we do a 32 bit subtract to adjust
	# xHigh and xMid to their pixel values

	vsubc	aMinf,	aMinf,	Adjf
	vsub	aMini,	aMini,	Adji

AttrBackupDone:

	jr return
	nop

.unname	temp
.unname	miny

.unname	Adjf
.unname Adji

.unname	attrmask