glxsetup.s 21.8 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892

	/****** NOTE:
	 ******
	 ******	This code is the optimized version for HARDWARE 2!
	 ******
	 ****** It won't run (100%) on hardware 1.
	 ******
	 ******/
	
 ##########################################################################
 #
 # Triangle Setup Routine.
 # When entering this code we have a points buffer full of points,
 # and registers r1, r2, r3 point to the three vertices of a triangle.
 #
 ##########################################################################
	
#ifdef SETUP_ALONE
#include <rsp.h>
#include "mbi.h"

		.text	beginSetup
	
#include "gdmem.h"
#include "gfx_regs.h"
#endif	

 # ########################### CLIP TEST #################################
.name   minp,           $1
.name   midp,           $2
.name   maxp,           $3
.name   tmp,            $8
.name   tmp2,           $9
.name   ccor,           $11     # OR of all points' clip codes
.name   ccand,          $12     # AND of all points' clip codes
.name	rendState,	$13

                .ent    clipAndSetup
clipAndSetup:
 # ########################### CLIP TEST #################################
#ifdef	CLIPSWITCH
	lb	tmp2, (RSP_STATE_RENDER+1)(rsp_state)
#endif	
        lh      ccor, (RSP_PTS_CC)(maxp)	# or Clip Codes together &
        lh      tmp, (RSP_PTS_CC)(midp)	# and Clip Codes together
#ifdef	CLIPSWITCH
	bgez	tmp2, beginSetup
#endif
        lh      tmp2, (RSP_PTS_CC)(minp)	#
	and	ccand, ccor, tmp		#
	or      ccor, ccor, tmp                 #
	and	ccand, ccand, tmp2		#
#ifdef NEAR_CLIP_OFF
	andi	ccand, ccand, 0x7030		# only see reject + xyz, - xy
#else /* NEAR_CLIP_OFF */
	andi	ccand, ccand, 0x7070		# only see reject +/- xyz
#endif /* NEAR_CLIP_OFF */
#ifdef	RETGFX1
	/* return_save => gfx1 の変更に伴う処理 */
	bne	ccand, zero, SetupReject
#else
	bne	ccand, zero, GfxDone		# Trivial rejection ?
#endif
	or      ccor, ccor, tmp2                #
 ### BRANCH OCCURS TO GfxDone: IF TRIVIALLY REJECTED

 	andi	ccor, ccor, 0x4343		# only see clip/accept +/- xyz
	bne     ccor, zero, startClip           # if ccor is 0, no clipping

 ### JUMP OCCURS to doClip or startClip: IF clipping is neccessary
 ### NOTE: delay slot is first instruction of beginSetup:
        
                .end    clipAndSetup

.unname ccor
.unname ccand
.unname minp
.unname midp
.unname maxp
.unname tmp
.unname tmp2

 # ########################### END CLIP TEST #############################

#define	LOWX	0	/* used to index elements of edge vectors */
#define	LOWY	1
#define MIDX	2
#define MIDY	3
#define HIGHX	4
#define HIGHY	5

/* scalar registers: */
.name	minp,		$1
.name	midp,		$2
.name	maxp,		$3
.name	miny, 		$9

.name	tmp,		$7
.name	flatp,		$4
.name	rdp_cmd,	$5
.name	rdp_flg,	$6
.name	dscratchp,	$8
.name	midy, 		$10
.name	maxy, 		$11
.name	negR, 		$12

/* these are "global", used for both edge and attribute setup */
.name	DxXDyi,		$v0
.name	DxXDyf,		$v1
.name	yf,		$v2
.name	xHighf,		$v3
.name	EDel,		$v4
.name	invri,		$v27
.name	invrf,		$v26

/* these registers are dynamic, allocated and released as they are used */
.name	ri,		$v29
.name	rf,		$v28
.name	Hd,		$v9
.name	Md,		$v10
.name	td,		$v12
.name	vmin, 		$v13
.name	vmid, 		$v14	
.name	vmax, 		$v15	
	
.name	frontrej, $14
.name	backrej,  $15
.name	bsignr,   $17
	
.name	jnk,	$v16
	
.name	allWi,	$v5
.name	allWf,	$v6
.name   wscl,   $v21
	
.name	nearWi,	$v19
.name	nearWf,	$v20
	

		.ent	beginSetup

beginSetup:
	#  
	#  vmin[0|1] = [ XS0 | YS0 ]   小
	#  vmid[0|1] = [ XS1 | YS1 ]   中
	#  vmax[0|1] = [ XS2 | YS2 ]   大
	#  rendState = STATE_RENDER
	#  
	#  スクリーン座標系の XS,YS をレジスタに代入する
	#  RENDER モードフラグを取得する
	#
	llv	vmin[0], RSP_PTS_XS(minp)
	llv	vmid[0], RSP_PTS_XS(midp)	# element 1 is y
	llv	vmax[0], RSP_PTS_XS(maxp)	# element 0 is x,
	addi	dscratchp, zero, RSP_SETUP_TMP_OFFSET
	lw	rendState, RSP_STATE_RENDER(rsp_state)
	lsv     wscl[0], RSP_STATE_PERSPNORM(rsp_state)
#ifdef	NEWCOM
	lsv	allWi[0], RSP_PTS_INVW_INT (minp)	
			vsub	Md, vmid, vmin
	lsv	allWf[0], RSP_PTS_INVW_FRAC(minp)
			vsub	Hd, vmax, vmin
	lsv	allWi[2], RSP_PTS_INVW_INT (midp)
			vsub	td, vmin, vmid
	lsv	allWf[2], RSP_PTS_INVW_FRAC(midp)
	lsv	allWi[4], RSP_PTS_INVW_INT (maxp)
	lsv	allWf[4], RSP_PTS_INVW_FRAC(maxp)
#else	
	lsv	allWi[0], RSP_PTS_W_INT (minp)	
			vsub	Md, vmid, vmin
	lsv	allWf[0], RSP_PTS_W_FRAC(minp)
			vsub	Hd, vmax, vmin
	lsv	allWi[2], RSP_PTS_W_INT (midp)
			vsub	td, vmin, vmid
	lsv	allWf[2], RSP_PTS_W_FRAC(midp)
	lsv	allWi[4], RSP_PTS_W_INT (maxp)
	lsv	allWf[4], RSP_PTS_W_FRAC(maxp)
#endif
	# compute the partial products...
	# careful with the math here...
			vmudh	jnk, Hd, Md[1]
	lh	miny, RSP_PTS_YS(minp)		# get the y's (BEGIN SETUP)
			vmadh	jnk, td, Hd[1]
	lh	midy, RSP_PTS_YS(midp)
			vsar	rf, rf, rf[1]
	lh	maxy, RSP_PTS_YS(maxp)
			vsar	ri, ri, ri[0]
	#
	# Y-SORT をテーブル引きで行なう & できるだけ早く CULL を判定する
	# 全体のバランスを考えて CULL_BACK の方を優先し, 先に処理する.
	#
	sll	backrej, rendState, 18	# G_CULL_BACK フラグを最上位に
#ifdef	NEWCOM
	mfc2	bsignr, jnk[0]
	sh	minp, RSP_SETUP_TMP_OFFSET+0($0)
#else
		                vmudl   allWf, allWf, wscl[0]
	mfc2	bsignr, jnk[0]
		                vmadm   allWi, allWi, wscl[0]
	sh	minp, RSP_SETUP_TMP_OFFSET+0($0)
		                vmadn   allWf, vconst, vconst[0]
#endif
	sh	midp, RSP_SETUP_TMP_OFFSET+2($0)

	and	backrej, bsignr, backrej
	bltz	backrej, BackReject

	sh	maxp, RSP_SETUP_TMP_OFFSET+4($0)
	#
	# tmp = [ maxy<midy | midy<miny | miny<maxy ]
	#
	#  0:	maxy>=midy,midy>=miny,miny>=maxy    miny=midy=maxy
	#  1:	maxy>=midy,midy>=miny,miny< maxy    miny<midy<maxy
	#  2:	maxy>=midy,midy< miny,miny>=maxy    midy<maxy<miny
	#  3:	maxy>=midy,midy< miny,miny< maxy    midy<miny<maxy
	#  4:	maxy< midy,midy>=miny,miny>=maxy    maxy<miny<midy
	#  5:	maxy< midy,midy>=miny,miny< maxy    miny<maxy<midy
	#  6:	maxy< midy,midy< miny,miny>=maxy    maxy<midy<miny
	#  7:	maxy< midy,midy< miny,miny< maxy    --------------
	#
			vsubc	wscl,   allWf, allWf[1]
	slt	tmp,  maxy, midy
#ifdef	NEWCOM
			vge	nearWi, allWi, allWi[1]
#else
			vlt	nearWi, allWi, allWi[1]
#endif
	slt	negR, midy, miny
			vmrg	nearWf, allWf, allWf[1]
	add	tmp,  tmp,  tmp
	add	tmp,  tmp,  negR
	slt	negR, miny, maxy
	add	tmp,  tmp,  tmp
	add	tmp,  tmp,  negR
			vsubc	wscl,   nearWf, allWf[2]
	lbu	maxp, RSP_YSORT_MAX(tmp)
#ifdef	NEWCOM
			vge	nearWi, nearWi, allWi[2]
#else
			vlt	nearWi, nearWi, allWi[2]
#endif
	lbu	midp, RSP_YSORT_MID(tmp)	
			vmrg	nearWf, nearWf, allWf[2]	
	lbu	minp, RSP_YSORT_MIN(tmp)	
	lh	maxp, RSP_SETUP_TMP_OFFSET(maxp)
	lh	midp, RSP_SETUP_TMP_OFFSET(midp)
	lh	minp, RSP_SETUP_TMP_OFFSET(minp)
	lbu	negR, RSP_YSORT_NEG(tmp)
#ifdef	NEWCOM
			vrcph	jnk[0],    nearWi[0]
	llv	vmax[0], RSP_PTS_XS(maxp)	# element 0 is x,
			vrcpl	nearWf[0], nearWf[0]
	llv	vmid[0], RSP_PTS_XS(midp)	# element 1 is y
			vrcph	nearWi[0], vconst[0]
#else
			vmudl	nearWf, nearWf, vconst1[5]
	llv	vmax[0], RSP_PTS_XS(maxp)	# element 0 is x,
			vmadm	nearWi, nearWi, vconst1[5]
	llv	vmid[0], RSP_PTS_XS(midp)	# element 1 is y
			vmadn	nearWf, vconst, vconst[0]		
#endif
	blez	negR, posiR
	llv	vmin[0], RSP_PTS_XS(minp)
		vsubc	rf, vconst, rf		# negate R
		vsub	ri, vconst, ri
	posiR:
	beq	bsignr, $0, SetupReject
		vsub	EDel, vmax, vmid	# delay slot, low deltas
	sll	frontrej, rendState, 19
		vsub	Md,   vmid, vmin
	nor	frontrej, frontrej, $0
		vsub	Hd,   vmax, vmin
	or	frontrej, bsignr, frontrej
		vmov	ri[3], ri[0]
#ifdef	DMANOWAIT
 #-YASU
 #  
 #  FIFO バッファへの DMA 終了確認及び CMD_END の更新
 #  
 #-YASU
checkFIFO:
	mfc0	tmp, DMA_BUSY
		vmov	rf[3], rf[0]
	bgez	frontrej, BackReject
		vmov	EDel[MIDX],  Md[0]
	bne	tmp, zero, checkFIFO
	lw	negR, RSP_STATE_FIFO_OUTP(rsp_state)
	mtc0	negR, CMD_END
#else
		vmov	rf[3], rf[0]
	bgez	frontrej, BackReject
		vmov	EDel[MIDX],  Md[0]
#ifdef	OUTPUT_DUMP
	jal	OutputOpen
	addi	$18, zero, 176-RSP_OUTPUT_END
#endif
#endif
	ssv	vmax[2], 2(outp)
		vmov	EDel[MIDY],  Md[1]
	ssv	vmid[2], 4(outp)
		vmov	EDel[HIGHX], Hd[0]
	ssv	vmin[2], 6(outp)
		vmov	EDel[HIGHY], Hd[1]

	#
	# 1/r の計算
	#
	jal	NewtonDiv
	mfc2	rdp_flg, ri[5]	# tmp の bit7 には ri の符号が入る
	
.unname jnk
.unname	td
.unname	vmin
.unname	vmid
.unname	vmax
	
.unname	negR
.unname	frontrej
.unname	backrej
	
.unname	Hd
.unname	Md
	
.unname ri
.unname rf
			
.name	toutp,	$16

.name	invEDeli,	$v7
.name	invEDelf,	$v8
.name	EDeli,		$v10
.name	EDelf,		$v9
.name	invW1f,	$v11
.name	invW1i,	$v12
.name 	xi,	$v13
.name 	xf,	$v14

.name	vtmpi,	$v15
.name	vtmpf,	$v16
	
.name	ptTX2i,	$v22	# these registers hold S, T, 1/W
.name	ptTX2f,	$v29	# for each vertex.
.name	invW2f,	$v24
.name	invW2i,	$v25
	
	# メジャーの判定
	lb	rdp_cmd, RSP_STATE_TRI(rsp_state)

	# Ldx/Ldy, Mdx/Mdy, Hdx/Hdy:
			vmudm	EDeli, EDel,   vconst[4]
	lsv	xi[(LOWX*2)],  RSP_PTS_XS(midp)	
			vmadn	EDelf, vconst, vconst[0]	
	lsv	xi[(MIDX*2)],  RSP_PTS_XS(minp) # same as high
			vrcp	invEDelf[LOWY], EDel[LOWY]
	lsv	xi[(HIGHX*2)], RSP_PTS_XS(minp)
			vrcph	invEDeli[LOWY], vconst[0]
	ori	rdp_cmd, rdp_cmd, G_TRI_FILL
	
	# stick in tile number
	lb	tmp, RSP_STATE_TEX_TILE(rsp_state)
			vrcp	invEDelf[MIDY], EDel[MIDY]	# 1.0/Mdy
	ssv	nearWi[0], 68(dscratchp)
			vrcph	invEDeli[MIDY], vconst[0]
	lsv	invW1f[0], RSP_PTS_INVW_FRAC(minp)
			vrcp	invEDelf[HIGHY], EDel[HIGHY]	# 1.0/Hdy
	lsv	invW1f[8], RSP_PTS_INVW_FRAC(midp)
			vrcph	invEDeli[HIGHY], vconst[0]
	
	#
	# We used to shift down the rcp results all the way,
	# then do the multiply. If we don't shift it down all the
	# way, do the mult, then shift some more, we get better
	# precision on the degenerate cases.
	#
	lsv	invW1i[0], RSP_PTS_INVW_INT(minp)
			vmudm	xi, xi, vconst[4]
	lsv	invW1i[8], RSP_PTS_INVW_INT(midp)
			vmadn	xf, vconst, vconst[0]
	andi	rdp_flg, rdp_flg, 0x80	
	or	rdp_flg, rdp_flg, tmp	# 3 cycles after load
	 		vmudl	invEDelf, invEDelf, vconst1[4]	# make S15.16
	sb	rdp_cmd, 0(outp)	# output rdp command
	 		vmadm	invEDeli, invEDeli, vconst1[4]
	sb	rdp_flg, 1(outp)	# output poly flag
	  		vmadn	invEDelf, vconst, vconst[0]
	ssv	xi[(LOWX*2)],   8(outp)	# output xLow
			vmudl	allWf, invW1f, nearWf[0]
	ssv	xf[(LOWX*2)],  10(outp)
			vmadm	allWf, invW1i, nearWf[0]
	ssv	xi[(HIGHX*2)], 16(outp)	# output xHigh
			vmadn	allWf, invW1f, nearWi[0]
	ssv	xf[(HIGHX*2)], 18(outp)
			vmadh	allWi, invW1i, nearWi[0]
.unname	doreject
.unname	bsignr	
	ssv	nearWf[0], 76(dscratchp)
			vmudl	DxXDyf, invEDelf, EDelf[0q]	# Ldx / Ldy
	ssv	xi[(MIDX*2)],  24(outp)	# output xMid
			vmadm	DxXDyf, invEDeli, EDelf[0q]	# Mdx / Mdy
	ssv	xf[(MIDX*2)],  26(outp)
			vmadn	DxXDyf, invEDelf, EDeli[0q]	# Hdx / Hdy
.unname	EDelf
.unname xi
.unname xf
.name	ptTX1i,	$v9	# these registers hold S, T, 1/W
	llv	ptTX1i[0], RSP_PTS_S(minp)
			vmadh	DxXDyi, invEDeli, EDeli[0q]
	llv	ptTX1i[8], RSP_PTS_S(midp)
.unname	EDeli
.name	ptTX1f,	$v10	# for each vertex.
	 		vmudl	invEDelf, invEDelf, vconst[4]
	lsv	ptTX1i[ 4], VCONST1_OFFSET($0)	# = vmov ptTX1i[2],vconst1[0]
			vmadm	invEDeli, invEDeli, vconst[4]
	lsv	ptTX1i[12], VCONST1_OFFSET($0)	# = vmov ptTX1i[6],vconst1[0]
			vmadn	invEDelf, vconst, vconst[0]
	addi	return, gfx1, 0			# OutputClose 用リターンの設定
	 		vmudl	DxXDyf, DxXDyf, vconst[4]
			vmadm	DxXDyi, DxXDyi, vconst[4]
	lsv	invW2f[0], RSP_PTS_INVW_FRAC(maxp)
			vmadn	DxXDyf, vconst, vconst[0]
	lsv	invW2i[0], RSP_PTS_INVW_INT(maxp)
			vmudm	vtmpf,  ptTX1i, allWf[0h]
	llv	ptTX2i[0], RSP_PTS_S(maxp)
			vmadh	ptTX1i, ptTX1i, allWi[0h]
	lsv	ptTX2i[ 4], VCONST1_OFFSET($0)	# = vmov ptTX2i[2],vconst1[0]
			vmadn	ptTX1f, vconst, vconst[0]

	addi	outp, outp, 32	# increment output pointer
			vcr	DxXDyi, DxXDyi, vconst1[6]
	ssv	DxXDyf[(LOWY*2)],  14-32(outp)
			vmudh	EDel, EDel, vconst[5]
	ssv	DxXDyf[(HIGHY*2)], 22-32(outp)
			vmudl	vtmpf, invW2f, nearWf[0]

.unname miny
.unname midy
.unname maxy
	
.name	stmaxi,	$v17
.name	stmaxf,	$v18
	
	ssv	DxXDyf[(MIDY*2)], 30-32(outp)
			vmadm	nearWf, invW2i, nearWf[0]
 	sdv	ptTX1i[8],  32(dscratchp)
			vmadn	nearWf, invW2f, nearWi[0]
	ssv	DxXDyi[(LOWY*2)],  12-32(outp)
			vmadh	nearWi, invW2i, nearWi[0]
	ssv	DxXDyi[(HIGHY*2)], 20-32(outp)
			vabs	allWi,  ptTX1i, ptTX1i
	ssv	DxXDyi[(MIDY*2)],  28-32(outp)
			vxor	allWf,	vconst, vconst
.unname xHighi
.unname vtmp

.unname	invW1f
.unname	invW1i
.unname	invW2f
.unname	invW2i
.unname wscl
	
	andi	tmp, rdp_cmd, (G_RDP_TRI_ZBUFF_MASK|G_RDP_TRI_TXTR_MASK|G_RDP_TRI_SHADE_MASK)
			vmudm	ptTX2f, ptTX2i, nearWf[0]  # Delay slot
	sdv	ptTX1i[0],  16(dscratchp)
			vmadh	ptTX2i, ptTX2i, nearWi[0]
	sdv	ptTX1f[8],  40(dscratchp)
			vmadn	ptTX2f, vconst, vconst[0]
	llv	nearWf[0],  40(dscratchp)
			# delay-V
#ifdef	OUTPUT_DUMP
	blez	tmp, SetupReject
#else
	blez	tmp, OutputClose
#endif
			vmov	nearWi[0], allWi[4]
 	sdv	ptTX2i[0],  48(dscratchp)
			vmov	nearWi[1], allWi[5]
 	sdv	ptTX1f[0],  24(dscratchp)
			vabs	ptTX2i, ptTX2i, ptTX2i
 	sdv	ptTX2f[0],  56(dscratchp)

.unname	toutp	
.unname	vtmpi
.unname	vtmpf
.name	aminf,	$v16
.name	amidf,	$v24
.name	amaxf,	$v28
.name	amin,	$v15
.name	amid,	$v23
.name	amax,	$v25

	 		vadd	aminf, allWf, vconst1[5]
	andi	tmp, rendState, G_SHADING_SMOOTH
	 		vadd	amidf, allWf, vconst1[5]
	bne	tmp, zero, smoothShade
	 		vadd	amaxf, allWf, vconst1[5]
#ifdef	FOG4FLAT
.name	vflat, $v11
			vge	stmaxi, allWi , ptTX2i
	luv	vflat[0], RSP_PTS_R_NX(flatp)
			vmrg	stmaxf, ptTX1f, ptTX2f
	luv	amin[0], RSP_PTS_R_NX(minp)
			addi	tmp, zero, 0x08		# elem 3 = 1 
	luv	amid[0], RSP_PTS_R_NX(midp)
			ctc2	tmp, $vcc
	luv	amax[0], RSP_PTS_R_NX(maxp)
			vmrg	amin, amin, vflat	# mixed A value[elem 3]
			vmrg	amid, amid, vflat
	j	flatShade
			vmrg	amax, amax, vflat
.unname	vflat
#else
	luv	amin[0], RSP_PTS_R_NX(flatp)
			vge	stmaxi, allWi , ptTX2i
	luv	amid[0], RSP_PTS_R_NX(flatp)
			vmrg	stmaxf, ptTX1f, ptTX2f
	j	flatShade
	luv	amax[0], RSP_PTS_R_NX(flatp)
#endif
smoothShade:
	luv	amin[0], RSP_PTS_R_NX(minp)
			vge	stmaxi, allWi , ptTX2i
	luv	amid[0], RSP_PTS_R_NX(midp)
			vmrg	stmaxf, ptTX1f, ptTX2f
	luv	amax[0], RSP_PTS_R_NX(maxp)
flatShade:	
.unname	ptTX1i
.unname	ptTX1f
.unname	ptTX2i
.unname	ptTX2f
.unname	allWi
.unname	allWf
.name	vjunk,	$v5
.name	Hdai,	$v9
.name	Hdaf,	$v10
.name	Mdai,	$v11
.name	Mdaf,	$v12
.name	adei,	$v13
.name	adef,	$v14
.name	tMdai, 	$v21
.name	tMdaf, 	$v22
			# delay-V
	ldv	aminf[8], (16 +  8)(dscratchp)
			# delay-V
	lsv	aminf[14], RSP_PTS_ZSF(minp)
			vmudm	amin, amin, vconst[7]
	ldv	amidf[8], (16 + 24)(dscratchp)
			vmudm	amid, amid, vconst[7]
	lsv	amidf[14], RSP_PTS_ZSF(midp)
			vmudm	amax, amax, vconst[7]	# multiply by 512
	ldv	amin[8],  (16 +  0)(dscratchp)
			vge	stmaxi, stmaxi, nearWi
	ldv	amid[8],  (16 + 16)(dscratchp)
			vmrg	stmaxf, stmaxf, nearWf

	lsv	amin[14], RSP_PTS_ZS(minp)
			# delay-V
	lsv	amid[14], RSP_PTS_ZS(midp)
			# delay-V
	ldv	amaxf[8], (16 + 40)(dscratchp)
			# delay-V
	lsv	amaxf[14], RSP_PTS_ZSF(maxp)
			# delay-V
	ldv	amax[8],  (16 + 32)(dscratchp)
			vsubc	Mdaf,  amidf, aminf			
	lsv	amax[14], RSP_PTS_ZS(maxp)
	 		vsub	Mdai,  amid,  amin
	slv	stmaxi[0], 64(dscratchp)
			vsubc	tMdaf, aminf, amidf
	slv	stmaxf[0], 72(dscratchp)
			vsub	tMdai, amin,  amid
.unname	stmaxi
.unname	stmaxf
.unname	nearWi
.unname	nearWf
.name	tHdai, 	$v19
.name	tHdaf, 	$v20
	sdv	amin [0],  0(outp)	# 0
			vsubc	tHdaf, aminf, amaxf
	sdv	aminf[0], 16(outp)	# 16
			vsub	tHdai, amin,  amax
	andi	tmp, rdp_cmd, G_RDP_TRI_SHADE_MASK
			vsubc	Hdaf,  amaxf, aminf
	andi	rdp_flg, rdp_cmd, G_RDP_TRI_ZBUFF_MASK	# delay
			vsub	Hdai,  amax,  amin

			vmudn	vjunk, Mdaf, EDel[HIGHX]
			vmadh	vjunk, Mdai, EDel[HIGHX]
			vmadn	vjunk, tHdaf, EDel[MIDX]
			vmadh	vjunk, tHdai, EDel[MIDX]
			vsar	Mdai, Mdai, Mdai[0]
			vsar	Mdaf, Mdaf, Mdaf[1]
.unname	tHdai
.unname	tHdaf
			vmudn	vjunk, Hdaf, EDel[MIDY]
			vmadh	vjunk, Hdai, EDel[MIDY]
			vmadn	vjunk, tMdaf, EDel[HIGHY]
			vmadh	vjunk, tMdai, EDel[HIGHY]
			vsar	Hdai, Hdai, Hdai[0]
			vsar	Hdaf, Hdaf, Hdaf[1]
.unname	tMdai
.unname	tMdaf
.name vtmpf,	$v19
.name coordMi,	$v20
.name coordMf,	$v21
			vmudl	vjunk, Mdaf, invrf[3]
	addi	$16, zero, 0x0800
			vmadm	vjunk, Mdai, invrf[3]
	mtc2	$16, vtmpf[0]
			vmadn	Mdaf,  Mdaf, invri[3]
	ldv	coordMi[8], 64(dscratchp)
			vmadh	Mdai,  Mdai, invri[3]
	ldv	coordMf[8], 72(dscratchp)

			vmudl	vjunk, Hdaf, invrf[3]
			vmadm	vjunk, Hdai, invrf[3]
			vmadn	Hdaf,  Hdaf, invri[3]
	sdv	Mdaf[0],   56(outp)	
			vmadh	Hdai,  Hdai, invri[3]
	sdv	Mdai[0],   40(outp)	
			vmudn	vjunk, Mdaf, vconst[1]
			vmadh	vjunk, Mdai, vconst[1]
	sdv	Hdaf[0],   24(outp)	
			vmadl	vjunk, Hdaf, DxXDyf[HIGHY]
	sdv	Hdai[0],    8(outp)
			vmadm	vjunk, Hdai, DxXDyf[HIGHY]
			vmadn	adef,  Hdaf, DxXDyi[HIGHY]
			vmadh	adei,  Hdai, DxXDyi[HIGHY]

.unname	invri
.unname	invrf	
.unname	amaxf
.unname	amid
.unname	amidf
.unname	amax
.unname vjunk
.name absdxi,	$v24
.name absdyi,	$v25
.name absdxf,	$v26
.name absdyf,	$v27
			vabs	absdyi, Mdai, Mdai
	                vmudl   coordMf, coordMf, vtmpf[0]
	
	                vmadm   coordMi, coordMi, vtmpf[0]
	sdv	adef[0],  48(outp)
	                vmadn   coordMf, vconst, vconst[0]
	sdv	adei[0],  32(outp)
			vabs	absdxi, Hdai, Hdai
	beq	tmp, $0, outputTXTR
	                vmudm   absdyi, absdyi, vtmpf[0]
	addi	outp, outp, 64
  outputTXTR:
	                vmadn   absdyf, vconst, vconst[0]
	andi	tmp, rdp_cmd, G_RDP_TRI_TXTR_MASK	# delay	
	                vmudm   absdxi, absdxi, vtmpf[0]
	blez	tmp, outputZBUF
	                vmadn   absdxf, vconst, vconst[0]
.name scalei,	$v5
.name scalef,	$v6
			vmudn	vtmpf,  coordMf, vconst[1]
			vmadh	vtmpf,  coordMi, vconst[1]
			vmadn	vtmpf,  absdyf,  vconst[1]
			vmadh	vtmpf,  absdyi,  vconst[1]
			vmadn	scalef, absdxf,  vconst[2]
			vmadh	scalei, absdxi,  vconst[2]	
.unname absdxi
.unname absdyi
.unname absdxf
.unname absdyf
.unname coordMi
.unname coordMf
.name	vres1i,	$v20
.name	vres1f,	$v21
.name	vres2i,	$v22
.name	vres2f,	$v23
				# DELAY-V
				# DELAY-V
	addi	outp, outp, 64	# increment output pointer
				vsubc	vtmpf,  scalef, scalef[5]
				vge	scalei, scalei, scalei[5]
				vmrg	scalef, scalef, scalef[5]
		vmudn	vres1f, adef,   vconst1[4]
		vmadh	vres1i, adei,   vconst1[4]
		vmadn	vres1f, vconst, vconst[0]
				vsubc	vtmpf,  scalef, scalef[6]
				vge	scalei, scalei, scalei[6]
				vmrg	scalef, scalef, scalef[6]
	ssv	vres1i[14],    8(outp)
		vmudn	vres2f, aminf,  vconst1[4]
	ssv	vres1f[14],   10(outp)
		vmadh	vres2i, amin,   vconst1[4]
		vmadn	vres2f, vconst, vconst[0]
		                vmudl   scalef, scalef, vconst1[3]
			        vmadm   scalei, scalei, vconst1[3]
				vmadn   scalef, vconst, vconst[0]
			# DELAY-V
	ssv	vres2i[14],  0(outp)
			# DELAY-V
	ssv	vres2f[14],  2(outp)
				vrcph	vtmpf[0],  scalei[4]
				vrcpl	scalef[0], scalef[4]
				vrcph	scalei[0], vconst[0]
				# DELAY-V
				# DELAY-V
				vmudn	scalef, scalef, vconst[2]
				vmadh	scalei, scalei, vconst[2]
		vmudn	vres1f, Hdaf,   vconst1[4]
		vmadh	vres1i, Hdai,   vconst1[4]
		vmadn	vres1f, vconst, vconst[0]
				vlt	scalei, scalei, vconst[1]
				vmrg	scalef, scalef, vconst[0]
.unname vtmpf
		vmudn	vres2f, Mdaf,   vconst1[4]
	ssv	vres1i[14],    4(outp)
		vmadh	vres2i, Mdai,   vconst1[4]
	ssv	vres1f[14],    6(outp)	
		vmadn	vres2f, vconst, vconst[0]
			vmudl	vres1f, aminf, scalef[0]
			vmadm	vres1f, amin,  scalef[0]
			vmadn	vres1f, aminf, scalei[0]
	ssv	vres2i[14],   12(outp)	
	ssv	vres2f[14],   14(outp)
			vmadh	vres1i, amin,  scalei[0]

			vmudl	vres2f, Hdaf, scalef[0]
			vmadm	vres2f, Hdai, scalef[0]
			vmadn	vres2f, Hdaf, scalei[0]
	sdv	vres1f[8], 16-64(outp)
			vmadh	vres2i, Hdai, scalei[0]
	sdv	vres1i[8],  0-64(outp)

			vmudl	vres1f, Mdaf, scalef[0]
			vmadm	vres1f, Mdai, scalef[0]
			vmadn	vres1f, Mdaf, scalei[0]
	sdv	vres2f[8],   24-64(outp)	
			vmadh	vres1i, Mdai, scalei[0]
	sdv	vres2i[8],    8-64(outp)

			vmudl	vres2f, adef, scalef[0]
			vmadm	vres2f, adei, scalef[0]
			vmadn	vres2f, adef, scalei[0]
	sdv	vres1f[8],   56-64(outp)
			vmadh	vres2i, adei, scalei[0]
	sdv	vres1i[8],   40-64(outp)
			# DELAY-V
			# DELAY-S
			# DELAY-V
			# DELAY-S
			# DELAY-V
	sdv	vres2f[8],   48-64(outp)	# 48
#ifdef	OUTPUT_DUMP
		beq	rdp_flg, zero, SetupReject
#else
		beq	rdp_flg, zero, OutputClose
#endif
	sdv	vres2i[8],   32-64(outp)	# 32
#ifdef	OUTPUT_DUMP
		jr	gfx1
#else
		j	OutputClose
#endif
	addi	outp, outp, 16	# increment output pointer
	
.unname scalei
.unname scalef
	
  outputZBUF:	
#ifdef	OUTPUT_DUMP
	blez	rdp_flg, SetupReject
#else
	blez	rdp_flg, OutputClose
#endif
				vmudn	adef, adef,   vconst1[4]
				vmadh	adei, adei,   vconst1[4]
				vmadn	adef, vconst, vconst[0]
	
				vmudn	aminf, aminf,  vconst1[4]
				vmadh	amin,  amin,   vconst1[4]
				vmadn	aminf, vconst, vconst[0]

	ssv	adei[14],    8(outp)	# output z stuff.
				vmudn	Hdaf, Hdaf,   vconst1[4]
	ssv	adef[14],   10(outp)
				vmadh	Hdai, Hdai,   vconst1[4]
				vmadn	Hdaf, vconst, vconst[0]
	
	ssv	amin [14],   0(outp)
				vmudn	Mdaf, Mdaf,   vconst1[4]
	ssv	aminf[14],   2(outp)
				vmadh	Mdai, Mdai,   vconst1[4]
	addi	outp, outp, 16	# increment output pointer
				vmadn	Mdaf, vconst, vconst[0]
	ssv	Hdai[14],    4-16(outp)
	ssv	Hdaf[14],    6-16(outp)	
	ssv	Mdai[14],   12-16(outp)	
#ifdef	OUTPUT_DUMP
	j	SetupReject
#else
	j	OutputClose
#endif
	ssv	Mdaf[14],   14-16(outp)	

		/* return_save => gfx1 の変更に伴う処理 */
		/* BackReject の場合 return = 0 でリターンする */
BackReject:	addi	return, zero, 0
SetupReject:	jr	gfx1
		nop
		.end	beginSetup

/* un-name scalar registers: */
.unname	minp
.unname	midp
.unname	maxp
.unname	flatp
.unname	rdp_cmd
.unname	rdp_flg
.unname	tmp
.unname	dscratchp
.unname	rendState
	
/* un-name vector registers: */
.unname	DxXDyi
.unname	DxXDyf
.unname	yf
.unname	xHighf
.unname	EDel
.unname	invEDeli
.unname	invEDelf
	
.unname	Hdai
.unname	Hdaf
.unname	Mdai
.unname	Mdaf
.unname	adei
.unname	adef
.unname	amin
.unname	aminf

#if 0
	# test for thorough register un-naming.
.name r1, $1
.name r2, $2
.name r3, $3
.name r4, $4
.name r5, $5
.name r6, $6
.name r7, $7
.name r8, $8
.name r9, $9
.name r10, $10
.name r11, $11
.name r12, $12
.name r13, $13
.name r14, $14
.name r15, $15
.name r16, $16
.name r17, $17
.name r18, $18
.name r19, $19
.name r20, $20

.name vv0, $v0
.name vv1, $v1
.name vv2, $v2
.name vv3, $v3
.name vv4, $v4
.name vv5, $v5
.name vv6, $v6
.name vv7, $v7
.name vv8, $v8
.name vv9, $v9
.name vv10, $v10
.name vv11, $v11
.name vv12, $v12
.name vv13, $v13
.name vv14, $v14
.name vv15, $v15
.name vv16, $v16
.name vv17, $v17
.name vv18, $v18
.name vv19, $v19
.name vv20, $v20
.name vv21, $v21
.name vv22, $v22
.name vv23, $v23
.name vv24, $v24
.name vv25, $v25
.name vv26, $v26
.name vv27, $v27
.name vv28, $v28
.name vv29, $v29
	
#endif