glsetup.s 16.4 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633

 # ########################### CLIP TEST #################################
                .ent    clipAndSetup
clipAndSetup:
                .end    clipAndSetup
 # ########################### END CLIP TEST #############################

#define	LOW1		4
#define	MID1		6
#define	HIGH1		7
#define	LOW2		0
#define	MID2		2
#define	HIGH2		3

#define	Dx1_LOW		vL[4]	# vHM[5] は DY2_LOW
#define	Dx1_MID		vL[6]
#define	Dx1_HIGH	vL[7]
#define	Dx2_LOW		vL[0]	# vHM[1] は DY1_LOW
#define	Dx2_MID		vL[2]
#define	Dx2_HIGH	vL[3]

#define	Dy1_LOW		vL[5]
#define	Dy1_MID		vM[5]
#define	Dy1_HIGH	vH[5]
#define	Dy2_LOW		vL[1]
#define	Dy2_MID		vM[1]
#define	Dy2_HIGH	vH[1]

#define	Dx1_MID_rv	vM_rv[4]
#define	Dy1_HIGH_rv	vH_rv[5]
#define	Dx2_MID_rv	vM_rv[0]
#define	Dy2_HIGH_rv	vH_rv[1]
	
#define	invDy_LOWi	invDyi[4]
#define	invDy_LOWf	invDyf[4]
#define	invDy_MIDi	invDyi[6]
#define	invDy_MIDf	invDyf[6]
#define	invDy_HIGHi	invDyi[7]
#define	invDy_HIGHf	invDyf[7]
	
.name	min1p,		$1
.name	mid1p,		$2
.name	max1p,		$3
.name	flat1p,		$4
.name	min2p,		$5
.name	mid2p,		$6
.name	max2p,		$7
.name	flat2p,		$8

.name	bsignr1,	$9
.name	bsignr2,	$10

.name	rendState,	$21
.name	rejMask,	$20
.name	tmp,		$19
.name	rdp_cmd,	$18
.name	rdp_flg,	$17
.name	xmin,		$16
.name	xmid,		$15
.name	zpos,		$14
.name	isrej1,		$13
.name	isrej2,		$12
	
.name	vzero,	$v0
.name	vmin,	$v1
.name	vmid,	$v2
.name	vmax,	$v3
.name	vH,	$v4
.name	vM,	$v5
.name	vL,	$v6
.name	vH_rv,	$v7
.name	vM_rv,	$v8
.name	Dxi,	$v9
.name	Dxf,	$v10
.name	invri,	$v11
.name	invrf,	$v12
.name	invWi,	$v13
.name	invWf,	$v14

.name	amin,	$v15
.name	aminf,	$v16
.name	amid,	$v17
.name	amidf,	$v18
.name	amax,	$v19
.name	amaxf,	$v20

.name	vtmp,	$v29	/* y ソート用変数 */
.name	vminy,	$v28
.name	vmidy,	$v27
.name	vmaxy,	$v26
.name	v2miny,	$v25
.name	v2min,	$v24
.name	v2max,	$v23
.name	vtmp2,	$v22

#define	nearWi  $v28
#define	nearWf  $v27
#define	stwi	$v26
#define	stwf	$v25

#define	ri	$v28
#define	rf	$v27
#define	r2i	$v26
#define	r2f	$v25

#define	DaMf	$v28
#define	DaMi	$v27
#define	DaHf	$v26
#define	DaHi	$v25
#define	invDyf	$v24
#define	invDyi	$v23
#define	DxDyf	$v22
#define	DxDyi	$v21
#define	DaDei	$v20
#define	DaDef	$v19
#define	DaDxf	$v18
#define	DaDxi	$v17
#define	DaDyf	$v24
#define	DaDyi	$v23

		.ent	case_G_TRI2
		# ----,min2,mid2,max2
		# ----,min1,mid1,max1
case_G_TRI2:
#ifdef	PERSPTXTR
/*1*/	llv	vtmp[0], RSP_STATE_VPTR_COEFF(rsp_state)
			vnxor	vtmp2, vconst, vconst
/*2*/	lpv	vmid[0], 0-8(dinp)
			vxor	amidf, vconst, vconst
/*3*/	lbu	min1p,   0-3(dinp)
			vxor	amaxf, vconst, vconst
/*4*/	lbu	mid1p,   0-2(dinp)
			vxor	aminf, vconst, vconst
/*5*/	sll	tmp,   min1p, 2
			vmudn	vtmp2, vtmp2, vtmp[1]
/*6*/	addu	tmp,   min1p, tmp
			vmadl	vmid,  vmid,  vtmp[0]
/*7*/	sll	min1p, tmp,   1
/*8*/	addiu	min1p, min1p, RSP_POINTS_OFFSET	# バブル回避
/*9*/	sll	tmp,   mid1p, 2
/*10*/	mfc2	min2p, vmid[2]	# vmid の計算終了
/*11*/	mfc2	max2p, vmid[6]
/*12*/	addu	tmp,   mid1p, tmp
/*13*/	sll	mid1p, tmp,   1
/*14*/	mfc2	max1p, vmid[14]
/*15*/	mfc2	mid2p, vmid[4]
/*16*/	addiu	mid1p, mid1p, RSP_POINTS_OFFSET	# バブル回避

#else
/*1*/	llv	vtmp[0], RSP_STATE_VPTR_COEFF(rsp_state)
			vnxor	vtmp2, vconst, vconst
/*2*/	lpv	vmid[0], 0-8(dinp)
			vxor	amidf, vconst, vconst
/*3*/	lbu	min1p, 0-3(dinp)
			vxor	amaxf, vconst, vconst
/*4*/	lbu	mid1p, 0-2(dinp)
			vxor	aminf, vconst, vconst
/*5*/	lbu	max1p, 0-1(dinp)
			vmudn	vtmp2, vtmp2, vtmp[1]
/*6*/	sll	min1p, min1p, 3
			vmadl	vmid,  vmid,  vtmp[0]
/*7V*/	sll	mid1p, mid1p, 3
/*8V*/	sll	max1p, max1p, 3
/*9V*/	addi	min1p, min1p, RSP_POINTS_OFFSET
/*10V*/	mfc2	min2p, vmid[2]	# vmid の計算終了
/*11V*/	mfc2	max2p, vmid[6]
/*12V*/	addi	mid1p, mid1p, RSP_POINTS_OFFSET	# バブル回避
/*13V*/	addi	max1p, max1p, RSP_POINTS_OFFSET	# バブル回避
/*14V*/	mfc2	mid2p, vmid[4]
#endif		
		.end	case_G_TRI2
	
		.ent	beginSetup	
beginSetup:
	
 #-YASU
 #  
 #  ベクトルレジスタの [0|1|4|5] に X0,Y0,X1,Y1 値を代入する
 #  
 #-YASU
					vxor	vzero, vzero, vzero
	llv	vmin[0], RSP_PTS_XS(min2p)
					vmov	vmin[2], vmid[1]
	llv	vmin[8], RSP_PTS_XS(min1p)
					vmov	vmin[6], vmid[5]
	llv	vmax[0], RSP_PTS_XS(max2p)
					vmov	vmax[2], vmid[3]
	llv	vmax[8], RSP_PTS_XS(max1p)
					vmov	vmax[6], vmid[7]
	llv	vmid[0], RSP_PTS_XS(mid2p)
	llv	vmid[8], RSP_PTS_XS(mid1p)
					vadd	vtmp,  vzero, vmin[0h]
			lsv	vtmp[2],  RSP_PTS_XS(mid2p)

	
	
 #-YASU
 #  
 #  外積値を計算し, 裏面判定を行なう
 #  
 #    ただしここで計算するのは外積値に -1 を掛けたものである.
 #    このため外積値が正または 0 なら裏または描画面積がないと判定し Reject
 #    を行なう. このため G_CULL_BACK のときのみ 外積値==0 の Reject が有効
 #    で両面描画時には判定しないが使用頻度は G_CULL_BACK を設定する時と比較
 #    して小さいと思われるので高速化のため手を抜く.
 #  
 #-YASU
	/* 外積値計算 (クランプ値を使うが正負判定は可能) */
					vadd	vminy, vzero, vmin[1h]
			lsv	vtmp[4],  RSP_PTS_XS(max2p)
	vsub	vH,    vmax, vmin
			lsv	vtmp[10], RSP_PTS_XS(mid1p)
					vadd	vmidy, vzero, vmid[1h]
			lsv	vtmp[12], RSP_PTS_XS(max1p)
	vsub	vM_rv, vmin, vmid
			lw	rendState, RSP_STATE_RENDER(rsp_state)	
	vsub	vM,    vmid, vmin
			addi	flat1p, min1p, 0
					vadd	vmaxy, vzero, vmax[1h]
			addi	flat2p, min2p, 0
		# パラレル y sort
		vlt	v2miny, vminy , vmidy		# vminy と vmidy の比較
			andi	rejMask,   rendState, G_CULL_BACK
		# Reject チェック XS=0x7fff なら画面外の点
		vsubc	vtmp, vtmp, vconst1[0]	# vconst1[0] = 0x7fff
			sll	rejMask,   rejMask, 18
	vmudh	vM_rv, vM_rv, vH[0h]	# - HLx * MLy
		cfc2	tmp,  $vco	# 結果取得
	vmadh	vM_rv, vH,    vM[0h]	# + MLx * HLy  vLM[1|5]=外積値*(-1)
		ctc2	zero, $vco	# vco を戻す
		vmrg	v2min , vmin  , vmid		# 小さい方が v2min へ
	nor	tmp, tmp, zero		# Reject 判定結果を反転させる
		vmrg	vmidy , vmidy , vminy		# 大きい方が v2mid へ
	sltiu	isrej1, tmp, 0x1000	# e4-e7 のどれかが 0x7fff なら >=0x1000
		vmrg	vmid  , vmid  , vmin
	andi	isrej2, tmp, 0x0f00
		vge	vmaxy , v2miny, vmaxy		# v2minyと vmaxy の比較
	mfc2	bsignr1, vM_rv[10]	# 外積値*(-1)の取得 (符号拡張)
		vmrg	v2max , v2min , vmax		# 大きい方が v2max へ
	mfc2	bsignr2, vM_rv[2]	# bsignr が正なら裏向き        
		vmrg	vmin  , vmax  , v2min		# 小さい方は vmin へ
	# 
	# rejMask :	G_CULL_BACK = 0 なら rejMask = 0x00000000; 
	#		G_CULL_BACK = 1 なら rejMask = 0x80000000; 
	# 外積値を r としたときに sltu result, rejMask, r として
	# リジェクト判定 (result=0 なら Reject) をする.
	#
	# G_CULL_BACK = 0x00002000
	#
	/* 裏面判定 */
	sltiu	isrej2, isrej2, 0x0100	# e0-e3 のどれかが 0x7fff なら >=0x0100
/* V */
	sltu	bsignr1,   rejMask, bsignr1	# bsignr が 0 なら描画不要
		vlt	vmidy , vmidy , vmaxy	# v2midyとv2maxy の比較
	sltu	bsignr2,   rejMask, bsignr2
		vmrg	vmax  , v2max , vmid	# 大きい方は vmax へ
	and	bsignr1,   bsignr1, isrej1	# 画面外なら描画せず
		vmrg	vmid  , vmid  , v2max	# 小さい方は vmid へ
	/* もし 1 つ目の 3 角形を作画しないなら 2 つ目に JUMP */
	beq	bsignr1, zero, Draw2ndTri
	and	bsignr2,   bsignr2, isrej2	# 画面外なら描画せず(Delay)
	
 #
 # ここまでで用意したデータ
 #	bsignr1,2		リジェクト判定用
 #	vmin[0|1|2],[4|5|6]	頂点 L のデータ (x,y,ptr)
 #	vmid[0|1|2],[4|5|6]	頂点 M のデータ (x,y,ptr)
 #	vmax[0|1|2],[4|5|6]	頂点 H のデータ (x,y,ptr)
 #			
Draw1stTri:	
	# 3 角形の辺に沿ったベクトルの取得
	vsub	vH,    vmax,  vmin
			# y sort 結果取得
			mfc2	min1p, vmin[12]	
	vsub	vM,    vmid,  vmin
			mfc2	max1p, vmax[12]	
	vsub	vM_rv, vmin,  vmid		# [0|1|.|.|4|5|.|.] が有効値
			mfc2	mid1p, vmid[12]	
	vsub	vH_rv, vmin, vmax
			andi	bsignr1, rendState, G_SHADING_SMOOTH
	vsub	vL,    vmax, vmid		# DX*_LOW を vL に代入
			bne	bsignr1, zero, smoothShade1
	# 外積値再計算
	vmudh	vtmp,  vM,    vH[0h]		# + HLx * MLy (Delay スロット)
 #----------
			luv	amin[0], RSP_PTS_R_NX(flat1p)
	vmadh	vtmp,  vH, vM_rv[0h]		# - MLx * HLy
			luv	amid[0], RSP_PTS_R_NX(flat1p)
	vsar	ri, ri, ri[0]			# r[1|5] = 外積値 * (-1)
			luv	amax[0], RSP_PTS_R_NX(flat1p)
	vsar	rf, rf, rf[1]
			j	flatShade1
 #----------
smoothShade1:	
	vmov	Dx1_MID,   vM[4]		# 1 命令で2役
 #----------
			luv	amin[0], RSP_PTS_R_NX(min1p)
	vmadh	vtmp,  vH, vM_rv[0h]		# - MLx * HLy
			luv	amid[0], RSP_PTS_R_NX(mid1p)
	vsar	ri, ri, ri[0]			# r[1|5] = 外積値 * (-1)
			luv	amax[0], RSP_PTS_R_NX(max1p)
	vsar	rf, rf, rf[1]
 #----------
flatShade1:
	andi	tmp, rendState, G_TEXTURE_ENABLE
			vmudm	amin, amin, vconst[7]	# luv でロードした値の
	bne	tmp, zero, useTexture
			lsv	invWf[0],  RSP_PTS_INVW_FRAC(min1p) # 助力
				# 上を処理しても noTxtr での速度に変化なし
			vmudm	amid, amid, vconst[7]	# 小数点合わせ
			lsv	amin[14], RSP_PTS_ZS(min1p)
			vmudm	amax, amax, vconst[7]
			lsv	amid[14], RSP_PTS_ZS(mid1p)
	vmov	Dx1_HIGH,  vH[4]
			lsv	amax[14], RSP_PTS_ZS(max1p)
	# 1/r の計算 (r から 1/r を求める)
	vrcph	invri[5], ri[5]
			lsv	aminf[14], RSP_PTS_ZSF(min1p)
	vrcpl	invrf[5], rf[5]
			lsv	amidf[14], RSP_PTS_ZSF(mid1p)
	vrcph	invri[5], ri[1]
			lsv	amaxf[14], RSP_PTS_ZSF(max1p)
	vmudm	Dxi, vL,     vconst[4]		# make S15.16
			addi	outp, outp, 32
	vmadn	Dxf, vconst, vconst[0]
			lh	return, GFXDONE(zero)
	vmudn	invrf, invrf, vconst[2]
			mfc2	rdp_flg, ri[9]
	vmadh	invri, invri, vconst[2]
			lb	rdp_cmd, RSP_STATE_TRI(rsp_state)
	vrcp	invDy_LOWf,   Dy1_LOW		# 1.0/Ldy (遅延スロット)
			lb	tmp, RSP_STATE_TEX_TILE(rsp_state)
	vrcph	invDy_LOWi,   vconst[0]
			lw	zpos, RSP_PTS_ZS(min1p)
	vrcp	invDy_MIDf,   Dy1_MID		# 1.0/Mdy
		j	noTexture
	vrcph	invDy_MIDi,   vconst[0]
	
useTexture:	
	lsv	invWf[8],  RSP_PTS_INVW_FRAC(mid1p)
			vmudm	amid, amid, vconst[7]	# 小数点合わせ
	lsv	invWf[10], RSP_PTS_INVW_FRAC(max1p)
			vmudm	amax, amax, vconst[7]
	lsv	invWi[0],  RSP_PTS_INVW_INT(min1p)
			vmov	Dx1_HIGH, vH[4]
	lsv	invWi[8],  RSP_PTS_INVW_INT(mid1p)
			vadd	stwi, vzero, vconst1[0]
	lsv	invWi[10], RSP_PTS_INVW_INT(max1p)	# [0|4|5] min,mid,max
			vrcph	invri[5], ri[5]
	llv	stwi[0],  RSP_PTS_S(min1p) # min,mid が stwi に
			vrcpl	invrf[5], rf[5]
	llv	stwi[8],  RSP_PTS_S(mid1p)
			vrcph	invri[5], ri[1]
	llv	amax[8],  RSP_PTS_S(max1p)
		vsubc	vtmp,   invWf,  invWf[4]
	lsv	amid[14], RSP_PTS_ZS(mid1p)
		vge	nearWi, invWi,  invWi[4]
	lsv	amax[14], RSP_PTS_ZS(max1p)
		vmrg	nearWf, invWf,  invWf[4]
	lsv	amidf[14], RSP_PTS_ZSF(mid1p)
			vmudm	Dxi, vL,     vconst[4]		# make S15.16
	lsv	amaxf[14], RSP_PTS_ZSF(max1p)
			vmadn	Dxf, vconst, vconst[0]
	/*S or V delay */	vmov	amax[6], vconst1[0]
		vsubc	vtmp,   nearWf, invWf[5]
		vge	nearWi, nearWi, invWi[5]
		vmrg	nearWf, nearWf, invWf[5]
	vmudn	invrf, invrf, vconst[2]
	vmadh	invri, invri, vconst[2]
		vrcph	vtmp[0],  nearWi[0]
		vrcpl	invWf[3], nearWf[0]
		vrcph	invWi[3], vconst[0]	# [0|4|5]各1/w値  [3]nW値
	vrcp	invDy_LOWf,   Dy1_LOW		# 1.0/Ldy (遅延スロット)
			lb	rdp_cmd, RSP_STATE_TRI(rsp_state)
	vrcph	invDy_LOWi,   vconst[0]
	vmudl	vtmp,  invWf, invWf[3]
	vmadm	vtmp,  invWi, invWf[3]
	vmadn	invWf, invWf, invWi[3]
		lh	return, GFXDONE(zero)
	vmadh	invWi, invWi, invWi[3]
			mfc2	rdp_flg, invri[9]
	vrcp	invDy_MIDf,   Dy1_MID		# 1.0/Mdy
			lb	tmp, RSP_STATE_TEX_TILE(rsp_state)
	vrcph	invDy_MIDi,   vconst[0]
		addi	xmin, zero, 0x8f
	vmudm	vtmp,   stwi,   invWf[0h]
			lw	zpos, RSP_PTS_ZS(min1p)
	vmadh	stwi,   stwi,   invWi[0h]
		ctc2	xmin, $vcc
	vmadn	stwf,   vconst, vconst[0]
		addi	outp, outp, 32	
	vmudm	vtmp,   amax,   invWf[5]
	vmadh	nearWi, amax,   invWi[5]
		sdv	stwi[0], 0(outp)
	vmadn	nearWf, vconst, vconst[0]	
 #		sdv	stwf[0], 8(outp)	# コードが入らない
	vmrg	amid,  amid,  stwi
		ldv	amin[8], 0(outp)
	vmrg	amidf, amidf, stwf
 #		ldv	aminf[8], 8(outp)	# コードが入らない
	vmrg	amax,  amax,  nearWi
		lsv	aminf[14], RSP_PTS_ZSF(min1p)
	vmrg	amaxf, amaxf, nearWf
		lsv	amin[14], RSP_PTS_ZS(min1p)
noTexture:	
	vrcp	invDy_HIGHf,  Dy1_HIGH		# 1.0/Hdy
			lh	xmin, RSP_PTS_XS(min1p)
	vrcph	invDy_HIGHi,  vconst[0]
			lh	xmid, RSP_PTS_XS(mid1p)
	vsubc	DaMf, amidf, aminf
			ori	rdp_cmd, rdp_cmd, G_TRI_SHADE	
	vsub	DaMi, amid,  amin
			andi	rdp_flg, 0x80
	vsubc	DaHf, amaxf, aminf
			or	rdp_flg, rdp_flg, tmp
	vsub	DaHi, amax,  amin
			sb	rdp_cmd, 0-32(outp)
		vmudl	invDyf, invDyf, vconst1[2]
			sb	rdp_flg, 1-32(outp)
		vmadm	invDyi, invDyi, vconst1[2]
			sll	xmin, xmin, 14	
  		vmadn	invDyf, vconst, vconst[0]
			sll	xmid, xmid, 14	
	vmudh	vM,    vM,    vconst[1h]	# e4-e7 に vconst[5] を乗ずる
			sw	xmid,  8-32(outp)	# output xLow
	vmudh	vH,    vH,    vconst[1h]	# 結構ムチャなコード...
			sw	xmin, 16-32(outp)	# output xHigh
	vmudh	vM_rv, vM_rv, vconst[1h]
			sw	xmin, 24-32(outp)	# output xMid
	vmudh	vH_rv, vH_rv, vconst[1h]
			ssv	vmax[10],  2-32(outp)
		
		vmudl	vtmp,  invDyf, Dxf		# Ldx / Ldy
			ssv	vmid[10],  4-32(outp)
		vmadm	vtmp,  invDyi, Dxf		# Mdx / Mdy
			ssv	vmin[10],  6-32(outp)
		vmadn	DxDyf, invDyf, Dxi		# Hdx / Hdy
			sdv	vzero[0], 16(outp)	# 小数部のクリア
		vmadh	DxDyi, invDyi, Dxi
			sdv	vzero[0], 40(outp)

	vmudl	vtmp,  DaHf, invDy_HIGHf
			sdv	vzero[0], 56(outp)
	vmadm	vtmp,  DaHi, invDy_HIGHf
			sdv	amin[0],   0(outp)	
	vmadn	DaDef, DaHf, invDy_HIGHi
		addi	flat1p, flat2p, 0		# Flat ポインタ設定
	vmadh	DaDei, DaHi, invDy_HIGHi
			ssv	DxDyf[(LOW1*2)],   14-32(outp)
		vcr	DxDyi, DxDyi, vconst1[6]
			ssv	DxDyf[(HIGH1*2)],  22-32(outp)
	vmudn	vtmp,  DaHf, vM[5]
			ssv	DxDyf[(MID1*2)],   30-32(outp)
	vmadh	vtmp,  DaHi, vM[5]
			sdv	DaDef[0], 48(outp)
	vmadn	vtmp,  DaMf, vH_rv[5]
			sdv	DaDei[0], 32(outp)
	vmadh	vtmp,  DaMi, vH_rv[5]
			ssv	DxDyi[(LOW1*2)],   12-32(outp)
	vsar	DaDxf, vconst, vconst[1]
			ssv	DxDyi[(HIGH1*2)],  20-32(outp)
	vsar	DaDxi, vconst, vconst[0]
			ssv	DxDyi[(MID1*2)],   28-32(outp)

		# Z-Buffer 用のデータを計算し始める(1)
		vmudn	vtmp,  DaMf, vH[4]
			addi	outp, outp, 64	# Shade パラメータ分
		vmadh	vtmp,  DaMi, vH[4]
			sdv	DaDei[8], 32(outp)
		vmadn	vtmp,  DaHf, vM_rv[4]
			sdv	DaDef[8], 48(outp)
		vmadh	vtmp,  DaHi, vM_rv[4]
#ifdef	PERSPTXTR
		sdv	aminf[0], 16(outp)	# init fracs
#else
			sdv	vzero[0], 16(outp)	# init fracs
#endif
		vsar	DaDyf, vconst, vconst[1]
#ifndef	PERSPTXTR
			sdv	vzero[0], 40(outp)	# DyAtt
#endif
		vsar	DaDyi, vconst, vconst[0]
#ifndef	PERSPTXTR
			sdv	vzero[0], 56(outp)	# DyAtt
#endif
	vmudl	vtmp,   DaDxf, invrf[5]
			sdv	amin[8],   0(outp)
	vmadm	vtmp,   DaDxi, invrf[5]
	# 次の 3 角形のデータの取得
	sdv	vmin[0], 8(outp)
	vmadn	DaDxf,  DaDxf, invri[5]
	ldv	vmin[8], 8(outp)
	vmadh	DaDxi,  DaDxi, invri[5]
		# Z-Buffer 用のデータを計算し始める(2)
	andi	tmp,     rdp_cmd, G_RDP_TRI_TXTR_MASK
		vmudn	DaDef, DaDef,  vconst1[4]
	andi	rdp_flg, rdp_cmd, G_RDP_TRI_ZBUFF_MASK
		vmadh	DaDei, DaDei,  vconst1[4]
	sll	zpos, zpos, 5
		vmudl	vtmp,   DaDyf,  invrf[5]
			sdv	DaDxf[0], 24-64(outp)
		vmadm	vtmp,   DaDyi,  invrf[5]
			sdv	DaDxi[0],  8-64(outp)
		vmadn	DaDyf,  DaDyf,  invri[5]
			blez	tmp, outputZBUF
		vmadh	DaDyi,  DaDyi,  invri[5]
		
		addi	outp, outp, 64
		sdv	DaDxf[8], 24-64(outp)
		sdv	DaDxi[8],  8-64(outp)	
#ifdef	PERSPTXTR
		sdv	DaDyf[8], 56-64(outp)
		sdv	DaDyi[8], 40-64(outp)
#endif
outputZBUF:
		vmudn	DaDxf, DaDxf, vconst1[4]
	blez	rdp_flg, End1stTri_NoZ
	vmov	vmax[4], vmax[0]		# (delay)
		vmadh	DaDxi, DaDxi, vconst1[4]
	sw	zpos, 0(outp)			# output Z /YASU
		vmudn	DaDyf, DaDyf, vconst1[4]
	addi	outp, outp, 16			# increment output pointer
		vmadh	DaDyi, DaDyi, vconst1[4]
	ssv	DaDef[14],   (10-16)(outp)
		vmov	vmax[5], vmax[1]
	ssv	DaDei[14],   ( 8-16)(outp)
		vmov	vmax[6], vmax[2]
	ssv	DaDxi[14],   ( 4-16)(outp)
		vmov	vmid[4], vmid[0]
	ssv	DaDxf[14],   ( 6-16)(outp)	
		vmov	vmid[5], vmid[1]
	ssv	DaDyf[14],   (14-16)(outp)	
		vmov	vmid[6], vmid[2]
	beq	bsignr2, zero, OutputClose	# return には gfxDone を代入済
	ssv	DaDyi[14],   (12-16)(outp)
	j	Draw1stTri
		and	bsignr2, zero, zero	# (delay)
	
Draw2ndTri:
	addi	flat1p, flat2p, 0
		vmov	vmax[4], vmax[0]
	beq	bsignr2, zero, GfxDone
		sdv	vmin[0], 0(outp)
		ldv	vmin[8], 0(outp)
End1stTri_NoZ:
		vmov	vmax[5], vmax[1]	# 1
	beq	bsignr2, zero, OutputClose	# return には gfxDone を代入済
		sdv	vmid[0], 0(outp)	# 2
		vmov	vmax[6], vmax[2]	# 3
		ldv	vmid[8], 0(outp)
		j	Draw1stTri		# 4
		and	bsignr2, zero, zero	# 5
		.end	beginSetup

.unname	min1p
.unname	mid1p
.unname	max1p
.unname	flat1p
.unname	min2p
.unname	mid2p
.unname	max2p
.unname	flat2p
	
.unname	bsignr1
.unname	bsignr2

.unname	rendState
.unname	rejMask
.unname	tmp
.unname	rdp_cmd
.unname	rdp_flg
.unname	xmin
.unname	xmid
.unname	zpos
.unname	isrej1
.unname	isrej2
	
.unname	vzero
.unname	vmin
.unname	vmid
.unname	vmax
.unname	vH
.unname	vM
.unname	vL
.unname	vH_rv
.unname	vM_rv
.unname	Dxi
.unname	Dxf
.unname	invri
.unname	invrf
.unname	invWi
.unname	invWf

.unname	amin
.unname	aminf
.unname	amid
.unname	amidf
.unname	amax
.unname	amaxf

.unname	vtmp
.unname	vminy
.unname	vmidy
.unname	vmaxy
.unname	v2miny
.unname	v2min
.unname	v2max
.unname	vtmp2

#undef	nearWi 
#undef	nearWf 
#undef	stwi
#undef	stwf

#undef	ri
#undef	rf
#undef	r2i
#undef	r2f

#undef	DaMf
#undef	DaMi
#undef	DaHf
#undef	DaHi
#undef	invDyf
#undef	invDyi
#undef	DxDyf
#undef	DxDyi
#undef	DaDei
#undef	DaDef
#undef	DaDxf
#undef	DaDxi
#undef	DaDyf
#undef	DaDyi