gimm.s 28.1 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199

/**************************************************************************
 *								          *
 *               Copyright (C) 1994, Silicon Graphics, Inc.       	  *
 *								          *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *								          *
 *************************************************************************/

/*
 * File:		gimm.s
 * Creator:		hsa@sgi.com
 * Create Date:		Fri Jun 24 13:55:27 PDT 1994
 *
 * This file holds the top-level of the IMM command processing, and
 * related routines.
 *
 */

#ifdef F3DLP_GBI
#  ifdef CLIP_OFF
#    define VTXID_SHIFT		3
#  else
#    define VTXID_SHIFT		4
#  endif
#else
#  ifdef F3DEX_GBI
#    define VTXID_SHIFT		3
#  else
#    define VTXID_SHIFT		2
#  endif
#endif

 ############################################################################	
 #
 # The following code processes the IMM type display list commands.
 # Registers on input:	
 #	gfx0	- first word of display list command
 #	gfx1	- second word of display list command
 #	dinp	- points to *next* DL cmd, so back up for this one.
 #	
		.ent	doIMM
doIMM:
	# $2 is shifted in the delay slot of the branch that 
	# brought us here...
		
	# 'switch' to correct IMM command:
		andi	$2, $2, 0xfe		# shifted up 1 for offset
		lh	$2,(IMM_JMP_ADD)($2)
		jr	$2
	# consolidate some of the similar decoding...
		lbu	$1, (0-1)(dinp)	# pick off first field

		.end	doIMM
 #
 #
 #
 ############################################################################	


#ifdef	LINE3D
# ifdef	F3DEX_GBI
.name v0,	$1
.name v1,	$2
.name wd,	$3
		.ent 	case_G_LINE3D			
  case_G_LINE3D:
 		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
 		andi	wd, gfx1, 0xff
		lhu	v0, RSP_VADDR_TABLE(v0)
		lhu	v1, RSP_VADDR_TABLE(v1)
		addi	wd, wd, 3
		sh	wd, CLIP_STATE_TABLE(zero)
		addi	gfx0, v0, 0
		j	doClip
		lhu	return, GFXDONE(zero)
		
		.end	case_G_LINE3D
.unname v0
.unname v1
.unname wd

.name v0,	$1
.name v1,	$2
.name wd,	$3
		.ent	case_G_TRI2
  case_G_TRI2:	
		lbu	v0, (0-7)(dinp)		# -7,-6
		lbu	v1, (0-6)(dinp)
		addi	wd, zero, 3
		lhu	v0, RSP_VADDR_TABLE(v0)
		sh	wd, CLIP_STATE_TABLE(zero)
		lhu	v1, RSP_VADDR_TABLE(v1)
		addi	gfx0, v0, 0		# -7
		jal	doClip			# draw first line
		sh	v1, (-2+RSP_OUTPUT_OFFSET)(zero) # -6
		
		lbu	v1, (0-5)(dinp)		# -6,-5
		lhu	v0, (-2+RSP_OUTPUT_OFFSET)(zero)
		jal	doClip			# draw second line
		lhu	v1, RSP_VADDR_TABLE(v1)

		lbu	v0, (0-5)(dinp)		# -5,-7
		addi	v1, gfx0, 0
		jal	doClip			# draw third line
		lhu	v0, RSP_VADDR_TABLE(v0)
		
		.end	case_G_TRI2
.unname v0
.unname v1
.unname wd

.name v0,	$1
.name v1,	$2
.name wd,	$3
		.ent	case_G_TRI1
  case_G_TRI1:	
		lbu	v0, (0-3)(dinp)		# -3,-2
		lbu	v1, (0-2)(dinp)
		addi	wd, zero, 3
		sh	wd, CLIP_STATE_TABLE(zero)
		lhu	v0, RSP_VADDR_TABLE(v0)
		lhu	v1, RSP_VADDR_TABLE(v1)
		jal	doClip			# draw first line
		addi	gfx0, v0, 0

		lbu	v0, (0-2)(dinp)		# -2,-1
		andi	v1, gfx1, 0xff
		lhu	v1, RSP_VADDR_TABLE(v1)
		jal	doClip			# draw second line
		lhu	v0, RSP_VADDR_TABLE(v0)

		andi	v1, gfx1, 0xff		# -1,-3
		lhu	v1, RSP_VADDR_TABLE(v1)
		addi	v0, gfx0, 0
		j	doClip			# draw third line
		lhu	return, GFXDONE(zero)

		.end	case_G_TRI1
.unname v0
.unname v1
.unname wd

		
# else
 #############################################################################
 #
 # This code handles G_LINE3D. It's basically modified from Steve's triangle
 # code above. It picks off the two vertex indicies and calls the line code 
 #
 #
.name v0,	$1
.name v1,	$2
.name wd,	$3
.name vn,	$4
.name n,	$5

		.ent 	case_G_LINE3D
			
  case_G_LINE3D:
	# pick off flag field
		lbu	n, (0-4)(dinp)	# which normal?	
	# dinp points to next dl cmd, so back up to get line indices
 		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
	# which normal?
		sll	n, n, 2			# word-size offset

	# this is a hack. we get the width, add 3 (0 means 'min' width)
	# and store it back to an unused DMEM location. We'll retrieve
	# it later and use it...
 		lbu	wd, (0-1)(dinp)
		addi	wd, wd, 3
		sh	wd, CLIP_STATE_TABLE(zero)

	# translate indices into DMEM offsets. Point buffer entries are
	# 40 bytes (yuk!) each... The interface (mbi.h) pre-multiplies
	# the indices by 10, so we just have to multiply by 4.
		sll	v0, v0, 2
		sll	v1, v1, 2

		
		addi	v0, v0, RSP_POINTS_OFFSET
		addi	v1, v1, RSP_POINTS_OFFSET
	
		sw	v0, (0+RSP_SCRATCH_OFFSET)(zero)
		sw	v1, (4+RSP_SCRATCH_OFFSET)(zero)
		lw	vn, RSP_SCRATCH_OFFSET(n)
	
		jal	doClip
		nop   	# delay slot, might do something useful here later
		j	GfxDone
		nop	# delay slot, might do something useful here later
		
		.end	case_G_LINE3D
	
.unname v0
.unname v1
.unname wd
.unname vn
.unname n
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # This code handles G_TRI1 in the line microcode.  It draws 3 lines (the edges
 # of the triangle).

.name v0,	$1
.name v1,	$2
.name wd,	$3
.name vn,	$4
.name n,	$5

		.ent	case_G_TRI1
  case_G_TRI1:	
		jal	Tri1_getnormal
		lbu	n, (0-4)(dinp)	# which normal?

		jal	Tri1_presetup
		nop
		jal	doClip		# draw first line
		nop

		jal	Tri1_getnormal
		lbu	n, (0-4)(dinp)	# which normal?

		jal	Tri1_presetup
		lbu	v0, (0-1)(dinp)
		jal	doClip		# draw first line
		nop
		
		jal	Tri1_getnormal
		lbu	n, (0-4)(dinp)	# which normal?

		jal	Tri1_presetup
		lbu	v1, (0-1)(dinp)
		jal	doClip		# draw first line
		nop
		
		j	GfxDone
	

  Tri1_presetup:
	# Use min width (0 + 3)
		addi	wd, zero, 3
		sh	wd, CLIP_STATE_TABLE(zero)

	# translate indices into DMEM offsets. Point buffer entries are
	# 40 bytes (yuk!) each... The interface (mbi.h) pre-multiplies
	# the indices by 10, so we just have to multiply by 4.
		sll	v0, v0, 2
		sll	v1, v1, 2
		
		addi	v0, v0, RSP_POINTS_OFFSET
		jr	return
		addi	v1, v1, RSP_POINTS_OFFSET

  Tri1_getnormal:
	# pick off flag field
	# dinp points to next dl cmd, so back up to get tri indices
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
		lbu	vn, (0-1)(dinp)
	# which normal?
		sll	n, n, 2			# word-size offset

		sw	v0, (0+RSP_SCRATCH_OFFSET)(zero)
		sw	v1, (4+RSP_SCRATCH_OFFSET)(zero)
		sw	vn, (8+RSP_SCRATCH_OFFSET)(zero)
		lw	vn, RSP_SCRATCH_OFFSET(n)

		sll	vn, v0, 2
		jr	return
		addi	vn, vn, RSP_POINTS_OFFSET


		.end	case_G_TRI1

.unname v0
.unname v1
.unname wd
.unname vn
.unname n
 #
 #
 #
 #############################################################################


# endif /* F3DEX_GBI */
#endif /* LINE3D */

#ifdef FAST3D	
#  ifdef F3DLP_GBI
#    include	"gltri.s"
#  else	 /* F3DLP_GBI */	
#    ifdef  F3DEX_GBI

 #############################################################################
 #
 # This code handles G_TRI1. It picks off the three vertex indicies and
 # calls the triangle setup code. 
 #
 #
.name v0,		$1
.name v1,		$2
.name v2,		$3
.name vn,		$4
.name n,		$5
.name tmp,		$6

		.ent	case_G_TRI1
case_G_TRI1:	

 #-YASU
 #  
 #  F3DEX-TRI1
 #
 #  リターン位置の設定
 #  G_TRI2 と一部共有するためにこうしている
 #  flag フィールドはサポートしない. 先頭頂点を flag に固定する
 #  
 #  return_save は Clip コードの Overlay 時に破壊されるので使えないので
 #  代わりに gfx1 を使う
 #
 #  命令数を削るために変換テーブルを参照している. v0,v1,v2 の値として 2 倍
 #  値を設定する必要がある. 
 #
 #-YASU
		lh	gfx1,   GFXDONE(zero)	# return to GfxDone:
TriStart:	
		lh	return, CLIPANDSETUP(zero)
TriStart1:	
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
		lbu	v2, (0-1)(dinp)
TriStart2:
		lhu	v0, RSP_VADDR_TABLE(v0)
		lhu	v1, RSP_VADDR_TABLE(v1)
		lhu	v2, RSP_VADDR_TABLE(v2)

		jr	return
		addi	vn, v0, 0	# flag = 0 に固定

		.end	case_G_TRI1
.unname v0
.unname v1
.unname v2
.unname vn
.unname n
.unname tmp

		.ent	case_G_TRI2
case_G_TRI2:	

 #-YASU
 #  
 #  F3DEX-TRI2
 #
 #  1 番目の 3 角形 (vb0, vb1, vb2) の描画
 #
 #  return_save は Clip コードの Overlay 時に破壊されるので使えないので
 #  代わりに gfx1 を使う
 #
 #-YASU
		jal	TriStart
		addi	gfx1, return, 0
 #-YASU
 #  
 #  2 番目の 3 角形 (va0, va1, va2) の描画
 #  
 #-YASU
.name v0,		$1
.name v1,		$2
.name v2,		$3
Tri2_2ndTri:	
		lbu	v0, (0-7)(dinp)			
		lbu	v1, (0-6)(dinp)		
		lbu	v2, (0-5)(dinp)		
		lh	return, CLIPANDSETUP(zero)
		j	TriStart2
		lh	gfx1, GFXDONE(zero)	# return to GfxDone:
.unname v0
.unname v1
.unname v2
		.end	case_G_TRI2
#ifdef	QUAD
.name v0,		$1
.name v1,		$2
.name v2,		$3
.name v3,		$7
.name y0,		$8
.name y1,		$9
.name y2,		$10
.name y3,		$11	# 分りやすさを優先するため無駄にレジスタを使用
.name d02,		$6
.name d13,		$12
.name vn,		$4	# flatp
.name n,		$5
		.ent	case_G_QUAD3D
case_G_QUAD3D:	

 #-YASU
 #
 #  F3DEX-QUAD
 #
 # 7/25
 #  VTX optimize で頂点キャッシュのサイズが変わったのでそれに対応させる
 #  
 #  RSP_SCRATCH_OFFSET は CLIP コードによって使用される可能性があるため
 #  G_TRI1 とは違うところにセーブする. CLIP_TMP は 160 bytes 予約されて
 #  いるのにも関わらず. 40 bytes しか使用されていないのであまりを使う. 
 #
 #  flag のアンサポート. 代わりに flag = v1 に固定される
 #  
 #  return_save は Clip コードの Overlay 時に破壊されるので使えないので
 #  代わりに gfx1 を使う
 #
 #  backrej 時には return に 0 が代入される
 #  
 #  NewCom 版 LX は動的分割をサポートしない
 #
 #-YASU
#ifndef STATICQDIV	/* 通常の Quad */
	# dinp points to next dl cmd, so back up to get tri indices
		jal	TriStart1	# v0,v1,v2 を取得する
		lbu	v3, (0-4)(dinp)
		lhu	v3, RSP_VADDR_TABLE(v3)
		
		lh	y0, RSP_PTS_YS(v0)
		lh	y2, RSP_PTS_YS(v2)
		lh	y1, RSP_PTS_YS(v1)
		lh	y3, RSP_PTS_YS(v3)
		
		sub	d02, y0, y2	# d02 = |y0 - y2|		
		bgez	d02, Abs_1	# d13 = |y1 - y3|
		sh	v0, ( 0-6+RSP_OUTPUT_OFFSET)(zero)
		sub	d02, y2, y0
Abs_1:		sub	d13, y1, y3
		bgez	d13, Abs_2
		sh	v2, ( 2-6+RSP_OUTPUT_OFFSET)(zero)
		sub	d13, y3, y1
Abs_2:		sub	d02, d02, d13
		blez	d02, Devide02	# d02 < d13 なら 02 で分割
		sh	v3, ( 4-6+RSP_OUTPUT_OFFSET)(zero)
		
Devide13:	addi	v2, v3, 0
		sh	v1, ( 0-6+RSP_OUTPUT_OFFSET)(zero)

Devide02:	jal	clipAndSetup
		addi	gfx1, return, 0		# return to Quad2ndTri:

Quad_2ndTri:	beq	return, $0, GfxDone	# もし 1st TRI が裏なら終り
		lh	v0, ( 2-6+RSP_OUTPUT_OFFSET)(zero)	# v2
		lh	v1, ( 4-6+RSP_OUTPUT_OFFSET)(zero)	# v3
		lh	v2, ( 0-6+RSP_OUTPUT_OFFSET)(zero)	# v0
		j	clipAndSetup
		lh	gfx1, GFXDONE(zero)	# return to GfxDone:

#else	/* STATICQDIV */	/* NewCom 用カスタム版 */
	#
	#  Quad を 2Tri で描く
	#
		# 1 つ目の TRI
		jal	TriStart
		addi	gfx1, return, 0

		# 2 つ目の TRI
		lbu	v0, (0-3)(dinp)			
		lbu	v1, (0-1)(dinp)		
		lbu	v2, (0-4)(dinp)		
		lh	return, CLIPANDSETUP(zero)
		j	TriStart2
		lh	gfx1, GFXDONE(zero)	# return to GfxDone:
#endif  /* STATICQDIV */
.unname v0
.unname v1
.unname v2
.unname v3
.unname vn
.unname n
.unname y0
.unname y1
.unname y2
.unname y3
.unname d02
.unname d13
		.end	case_G_QUAD3D
#endif	/* QUAD */
#    else  /* F3DEX_GBI */

 #############################################################################
 #
 # This code handles G_TRI1. It picks off the three vertex indicies and
 # calls the triangle setup code. 
 #
 #
.name v0,		$1
.name v1,		$2
.name v2,		$3
.name vn,		$4
.name n,		$5
.name tmp,		$6

		.ent	case_G_TRI1
	
case_G_TRI1:	
	# pick off flag field
		lbu	n, (0-4)(dinp)	# which normal?
	# dinp points to next dl cmd, so back up to get tri indices
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
		lbu	v2, (0-1)(dinp)
	# which normal?
		sll	n, n, 2			# word-size offset

	# translate indices into DMEM offsets. Point buffer entries are
	# 40 bytes (yuk!) each... The interface (mbi.h) pre-multiplies
	# the indices by 10, so we just have to multiply by 4.
		sll	v0, v0, 2
		sll	v1, v1, 2
		sll	v2, v2, 2
	
		addi	v0, v0, RSP_POINTS_OFFSET
		addi	v1, v1, RSP_POINTS_OFFSET
		addi	v2, v2, RSP_POINTS_OFFSET
	
		sw	v0, (0+RSP_SCRATCH_OFFSET)(zero)
		sw	v1, (4+RSP_SCRATCH_OFFSET)(zero)
		sw	v2, (8+RSP_SCRATCH_OFFSET)(zero)
		lw	vn, RSP_SCRATCH_OFFSET(n)
	
		j	clipAndSetup
#ifdef	RETGFX1
		lh	gfx1,GFXDONE(zero)		# return to GfxDone:
#else
		lh	return_save,GFXDONE(zero)	# return to GfxDone:
#endif
		.end	case_G_TRI1

.unname v0
.unname v1
.unname v2
.unname vn
.unname n
.unname tmp
 #
 #
 #
 #############################################################################
#    endif  /* F3DEX_GBI */
#  endif  /* F3DLP_GBI */
#endif /* FAST3D */

	
 #############################################################################
 #
 # This code handles G_POPMTX. It checks the stack depth, backs up the
 # stack pointer, then DMA's the matrix into DMEM, updates the state,
 # and loads the registers.
 # 
 #
.name param,		$1
.name mstack_p,		$19
#ifdef	MTXNOLMT
.name mstack_min,	$3
#else
.name mstack_max,	$3
#endif
.name mat_sz,		$18
.name mat_p,		$20
	
		.ent	case_G_POPMTX
	
  case_G_POPMTX:	
	# 'param' already filled in but not used

	# we can only pop the MODELVIEW stack

	# get pointer and stack size
 #-YASU
 #  
 #  TASK 構造体の内部の stack 用データを参照するように改造する
 #  
 #-YASU	
		sbv     vconst[6],RSP_STATE_L_LEN(rsp_state)            # hi bit = light recalc
		lw	mstack_p,  RSP_STATE_MMTX_STACK_P(rsp_state)	# stack ptr
#ifdef	MTXNOLMT
		lw	mstack_min, (RSP_TASK_OFFSET+OS_TASK_OFF_STACK)($0)
#else
		lw	mstack_max, RSP_STATE_MMTX_STACK_MAX(rsp_state)	# end of stack
#endif
		addi	mat_p, zero, RSP_CURR_MMTX_OFFSET		# where to DMA matrix
#ifdef	MTXNOLMT
		sub	mstack_min, mstack_min, mstack_p		# size of stack
	
	# check matrix stack depth, bail if == 0
		bgez	mstack_min, GfxDone				# anything on stack?
#else
		sub	mstack_max, mstack_max, mstack_p		# size of stack
		addi	mstack_max, mstack_max, (-10*64)		#  ... - max size of stck
	
	# check matrix stack depth, bail if == 0
		bgez	mstack_max, GfxDone				# anything on stack?
#endif
		addi	mstack_p, mstack_p, -64				# stack is 1 mtx smaller
 ### BRANCH OCCURS TO GfxDone: IF NOTHING ON STACK


		jal	DMAread						# DMA matrix from stack
		addi	mat_sz, zero, 63	# DMA expects sz-1	# DMA length -1
	
		jal	DMAwait						# wait for DMA to finish
		addi    $3, zero, RSP_CURR_MPMTX_OFFSET                 # where to put MP matrix
	
	# update state, then jump to pre-multiply MxP
		j	mtx_MxP						# mult model * proj mtx
		sw	mstack_p,  RSP_STATE_MMTX_STACK_P(rsp_state)	# store new stack size

	
		.end	case_G_POPMTX
.unname param
.unname mstack_p
#ifdef	MTXNOLMT
.unname mstack_min
#else
.unname mstack_max
#endif
.unname mat_sz
.unname mat_p
 #
 #
 #
 #############################################################################
	
 #############################################################################
 #
 # this handles the G_MOVEWORD command, moving 1 word into dmem
 #
 #	
.name target,	$1
.name outptr,	$5
.name offset,	$2
	
		.ent	case_G_MOVEWORD
	
  case_G_MOVEWORD:	
 #	lbu	target, (0-6)(dinp)		  # index to address
 #	lbu	offset, (0-5)(dinp)		  # offset from address
		lbu	target, (0-5)(dinp)		  # index to address
		lhu	offset, (0-7)(dinp)		  # offset from address
		lh	outptr,(MOVEWORD_TBL)(target) # actual address
		add	outptr,outptr,offset		  #   ...plus offset
		j	GfxDone				  #
		sw	gfx1, 0(outptr)			  # store @ addr + off

		.end	case_G_MOVEWORD
	
.unname target
.unname outptr
.unname offset
 #
 #
 #############################################################################
	
	
 #############################################################################
 #
 # This code handles the G_TEXTURE.
 #
 #
.name	rmode, 		$2
.name	mask, 		$3
.name	sscale,		$4
.name	tscale,		$5
.name	tile,		$6
	
		.ent	case_G_TEXTURE
	
  case_G_TEXTURE:	

	# turn texture on or off:
		sw	gfx0, RSP_STATE_TEX_CMD(rsp_state)
		sw	gfx1, RSP_STATE_TEX_SCALE_S(rsp_state)
		lh	rmode, RSP_STATE_RENDER_L(rsp_state)
		andi	rmode, rmode, 0xfffd	# clear texture state
		andi	mask, gfx0, 0x01	# on bit
		sll	mask, mask, 1
		or	rmode, rmode, mask	# set texture on (maybe)
 		j	GfxDone
		sh	rmode, RSP_STATE_RENDER_L(rsp_state)
	
		.end	case_G_TEXTURE
	
.unname	rmode
.unname	mask
.unname	sscale
.unname	tscale
.unname	tile
	
 #############################################################################
 #
 # This code handles G_SETOTHERMODE_*.
 #
 #
.name modewd,	$3
 #		.name mask,	$4
.name mask,	$2
.name lenth,	$5
.name shft,	$6
.name waddr,	$7
.name minus1,	$8
	
		.ent	case_G_OTHERMODE
	
  case_G_SETOTHERMODE_H:
		j	doOtherMode
		addi	waddr, rsp_state, RSP_STATE_OTHER_H # delay slot
	
  case_G_SETOTHERMODE_L:
		addi	waddr, rsp_state, RSP_STATE_OTHER_L
	
		# this code is the same for both OTHERMODE commands...
 	doOtherMode:
		lw	modewd, 0(waddr)
#if 0
		addi	minus1, zero, -1
#endif	
		lbu	lenth, (0-5)(dinp)
		lbu	shft,  (0-6)(dinp)
	
		addi	mask, zero, 0x01
		sllv	mask, mask, lenth
		addi	mask, mask, -1
		sllv	mask, mask, shft
#if 0
		xor	mask, mask, minus1
#else
		nor	mask, mask, zero	# rd = ~(rs|rt)
#endif
		and	mask, mask, modewd
		or	modewd, mask, gfx1
		sw	modewd, 0(waddr)
	
	# output to RDP
	# writes 64-bits at once. cmd byte already there.
	# use the regular RDP output routine, sharing code.
		lw	gfx0, RSP_STATE_OTHER_H(rsp_state)
		j	doRDPSend
		lw	gfx1, RSP_STATE_OTHER_L(rsp_state)

		.end	case_G_OTHERMODE
	
.unname modewd
.unname mask
.unname lenth
.unname shft
.unname waddr
.unname minus1
 #
 #
 #
 #############################################################################


 #############################################################################
 #
 # This code handles G_CULLDL.
 #
 # Ends display list if vertices n through m are mutually trivially rejected
 # (ie the volume described by these vertices is completely outside of the
 # trivial reject volume).
 #
.name cc,	$2
.name tmp,	$3

 # GBI 互換を実現するため DL を以下のように変更する
 #
 #   31         24         16          8          0
 #    | G_CULLDL |        0 |    開始頂点番号*2   | 
 #    |        0 |        0 |    終了頂点番号*2   |
 #  
		.ent	case_G_CULLDL
case_G_CULLDL:
#ifndef	NO_CULLDL
#ifdef	F3DLP_GBI
#ifndef	DIRECTZCMP
#ifdef	PERSPTXTR
		/* LX.Rej 用の CULLDL 処理 */
		sll	tmp, gfx0, 2		# *8
		add	tmp, tmp, gfx0		# 2+8
		sll	gfx0, tmp, 1		# 10*2
		sll	tmp, gfx1, 2		# *8
		add	tmp, tmp, gfx1		# 2+8
		sll	gfx1, tmp, 1		# 10*2
#else
		/* LP.Rej 用の CULLDL 処理 */
		sll	gfx0, gfx0, 3
		sll	gfx1, gfx1, 3
#endif
		andi	gfx0, gfx0, 0xffff	
#else	/* DIRECTZCMP */
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)
		lhu	gfx1, RSP_VADDR_TABLE(gfx1)
#endif	/* DIRECTZCMP */
		addi	cc, zero, 0x7fff
VolCulLoop:
#ifndef	DIRECTZCMP
		lhu	tmp, (RSP_POINTS_OFFSET+RSP_PTS_XS)(gfx0)
#else	/* DIRECTZCMP */
		lhu	tmp, RSP_PTS_XS(gfx0)	
#endif	/* DIRECTZCMP */
		bne	tmp, cc, GfxDone	# 0x7fff でないなら終り
		nop
		bne	gfx0, gfx1, VolCulLoop
		addi	gfx0, gfx0, RSP_PTS_LEN	
#else
#ifdef F3DEX_GBI
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)	# vertex to start on
		lhu	gfx1, RSP_VADDR_TABLE(gfx1)	# vertex to end   on
#else
		andi	gfx0, gfx0, 0x03ff
#endif
#ifdef NEAR_CLIP_OFF
		ori	cc, zero, 0x7030		# initialize cc
#else /* NEAR_CLIP_OFF */
		ori	cc, zero, 0x7070		# initialize cc
#endif /* NEAR_CLIP_OFF */

VolCulLoop:
#ifdef F3DEX_GBI
		lh	tmp, RSP_PTS_CC(gfx0)
#else
		lh	tmp, (RSP_POINTS_OFFSET+RSP_PTS_CC)(gfx0)
#endif
		and	cc, cc, tmp			# is this vtx clipped?
		bne	gfx0, gfx1, VolCulLoop		# loop through vtx's
		addi	gfx0, gfx0, RSP_PTS_LEN
 ### LOOP OCCURS if not all points have been checked

		beq	cc, zero, GfxDone		# continue if not culled
 # NOTE Delay Slot!!
#endif
#endif
		.end	case_G_CULLDL
.unname cc
.unname tmp
 #
 #
 #
 #############################################################################

 ### IMPORTANT!!!!  Do not place any code betweeen case_G_CULLDL and G_ENDDL

 #############################################################################
 #
 # This code handles G_ENDDL.
 #
 # Causes a 'pop' of the display list stack. If we pop an empty 
 # display list stack, that's an error and we end.
 #
.name stack_sz,	$2
.name stack_p,	$3
	
		.ent	case_G_ENDDL	
  case_G_ENDDL:	
	# pop display list
		lb	stack_sz, RSP_STATE_DL_N(rsp_state)
		addi	stack_sz, stack_sz, -4
		bltz	stack_sz, TaskDone		# empty stack
		addi	stack_p, stack_sz, RSP_DLSTACK_OFFSET
		lw	inp,     0(stack_p)	# pointer of DL
		sb	stack_sz, RSP_STATE_DL_N(rsp_state)
 		j	GfxDone
#ifdef	SMARTDLCOUNT
		addi	dlcount, dinp, 0
#else
		addi	dlcount, zero, 0
#endif	
		.end	case_G_ENDDL
	
.unname stack_sz
.unname stack_p
 #
 #
 #
 #############################################################################

#ifdef		RSP_PAUSE
 #############################################################################
 #
 # This code handles G_RSP_PAUSE
 #
 #   31          24           16            8            0
 #   +------------+------------+------------+------------+
 #   |G_RSP_PAUSE |                                      |
 #   +------------+------------+------------+------------+
 #   |                      Signal                       |
 #   +------------+------------+------+-----+------------+
 #
 #  CPU が 0x04000ffc 番地に書き込んだ値が signal の値以上になるまでループを
 #  続ける.
 #
 		.ent	case_G_RSP_PAUSE
case_G_RSP_PAUSE_Loop:	
		bgez	gfx0, GfxDone
case_G_RSP_PAUSE:
		lw	gfx0, (RSP_TASK_OFFSET+OS_TASK_OFF_YIELD_SZ)(zero)
		j	case_G_RSP_PAUSE_Loop
		sub	gfx0, gfx0, gfx1
 		.end	case_G_RSP_PAUSE
 #
 #
 #
 #############################################################################
#endif
	
#ifdef		BRANCH_Z
 #############################################################################
 #
 # This code handles G_BRANCH_Z
 #
 #   31          24           16            8            0
 #   +------------+------------+------------+------------+
 #   |G_RDPHALF_1 |                                      |
 #   +------------+------------+------------+------------+
 #   |                   branch addrs                    |
 #   +------------+------------+------+-----+------------+
 #   |G_BRANCH_Z  |       Vtx*5       | 0000|    Vtx*2   |
 #   +------------+------------+------+-----+------------+
 #   |                    Z Value                        |
 #   +------------+------------+------------+------------+
 #
 #  指定された頂点の Depth 値(ここでは W)が指定された値以下なら分岐する.
 #  パラメータが入りきらないので G_RDPHALF_1 を利用して 2 つに分ける.
 #
 		.ent	case_G_BRANCH_Z
    case_G_BRANCH_Z:

 # -----------------------------------
 #	Screen Z を利用しての比較処理
 # -----------------------------------
#ifndef	DIRECTZCMP
# ifdef F3DEX_GBI
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)       # 頂点のテーブル参照
		lw	gfx0, RSP_PTS_ZS(gfx0)		  # 深さ値を取得する
# elif  defined(F3DLP_GBI)
#  ifdef PERSPTXTR
		srl	gfx0, gfx0, 10			  # Vtx を 20 倍する
#  else
		sll	gfx0, gfx0, 3			  # Vtx を 16 倍する
#  endif
		lw	gfx0, (RSP_PTS_ZS+RSP_POINTS_OFFSET)(gfx0)
# endif
#else
 # -----------------------------------
 #	W int 値を利用しての比較処理
 # -----------------------------------	
# if	(defined(F3DEX_GBI)||defined(F3DLP_GBI))
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)       # 頂点のテーブル参照
		lh	gfx0, RSP_PTS_W_INT(gfx0)	  # 深さ値を取得する
# endif
#endif
		sub	gfx0, gfx0, gfx1		  # 比較
		bgtz	gfx0, GfxDone			  # 大きければ終り
		lw	gfx1, RSP_STATE_RDPHALF(rsp_state) # DL の復帰
		j	G_DL_noPush			  # BRANCH 処理
		.end	case_G_BRANCH_Z
 #
 #
 #
 #############################################################################
#endif
	
 #############################################################################
 #
 # This code handles G_SETGEOMETRYRMODE
 #
 # Any bit set 'on' in the incoming command is 'set' in the state.
 # Assumes gfx1 is all 0's, except some of the lower 16 bits.
 #
.name	rmode, 		$2
	
		.ent	case_G_SETGEOMETRYMODE
	
  case_G_SETGEOMETRYMODE:	
		lw	rmode, RSP_STATE_RENDER(rsp_state)
		or	rmode, rmode, gfx1
 		j	GfxDone
		sw	rmode, RSP_STATE_RENDER(rsp_state) # delay slot
	
		.end	case_G_SETGEOMETRYMODE
	
.unname	rmode
 #
 #
 #
 #############################################################################
	
	
 #############################################################################
 #
 # This code handles G_CLEARGEOMETRYMODE
 #
 # Any bit set 'on' in the incoming command is 'cleared' in the state.
 # Assumes gfx1 is all 0's, except some of the lower 16 bits.
 #
.name	rmode, 		$2
.name	mask, 		$3
	
		.ent	case_G_CLEARGEOMETRYMODE
	
  case_G_CLEARGEOMETRYMODE:	
		lw	rmode, RSP_STATE_RENDER(rsp_state)
#if 0
		addi	mask, zero, -1
		xor	mask, mask, gfx1
#else
		nor	mask, gfx1, zero
#endif
		and	rmode, rmode, mask
 		j	GfxDone
		sw	rmode, RSP_STATE_RENDER(rsp_state) # delay slot
	
		.end	case_G_CLEARGEOMETRYMODE
	
.unname	rmode
.unname	mask
 #
 #
 #
 #############################################################################

#ifdef	F3DEX_GBI
 #############################################################################
 #
 # ModifyVertex の互換性のため, EX/LX と LX.Rej/LP.Rej において処理用の
 # ルーチンを作る
 #
 #   31         24          16           8           0
 #   +-----------+-----------+-----------+-----------+
 #   |G_MODIFYVTX|   offset  |         vtx * 2       |
 #   +-----------+-----------+-----------+-----------+
 #   |                      data                     |
 #   +-----------+-----------+-----------+-----------+
 #
 #############################################################################
		.name	offset, $2
		.name	addrs,  $3
 		.ent	case_G_MODIFYVTX
	
    case_G_MODIFYVTX:	
		lbu	offset, -7(dinp)
		lhu	addrs,  RSP_VADDR_TABLE(gfx0)
		add	addrs, addrs, offset
 		j	GfxDone
#ifdef	NEWCOM
		sw	gfx1, -16(addrs)
#else
		sw	gfx1, 0(addrs)
#endif
 		.end	case_G_MODIFYVTX
		.unname	offset
		.unname	addrs
	
#elif	F3DLP_GBI
		.name	offset, $2
		.name	addrs,  $3
 		.ent	case_G_MODIFYVTX	

    case_G_MODIFYVTX:
		lbu	offset, -7(dinp)
# ifndef DIRECTZCMP
#  ifdef PERSPTXTR
		sll	addrs, gfx0, 2
		add	addrs, addrs, gfx0
		sll	addrs, addrs, 1
#  else
		sll	addrs, gfx0, 3
#  endif
# else
		lhu	addrs,  RSP_VADDR_TABLE(gfx0)		
# endif
		xori	offset, offset, 0x18
		add	addrs, addrs, offset		
 		j	GfxDone
#ifndef	DIRECTZCMP
		sw	gfx1, RSP_POINTS_OFFSET(addrs)
#else
		sw	gfx1, 0(addrs)
#endif
 		.end	case_G_MODIFYVTX
		.unname	offset
		.unname	addrs
#else
 #############################################################################
 #
 # This code handles G_PERSPNORM
 #
 # This magic number is needed to fix the transformation and clip
 # math, extracting the most precision. Grab the scale from gfx1
 # and save it for later.
 #
 		.ent	case_G_PERSPNORM
	
    case_G_PERSPNORM:
 		j	GfxDone
		sh	gfx1, RSP_STATE_PERSPNORM(rsp_state) # delay slot
	
 		.end	case_G_PERSPNORM
 #
 #
 #
 #############################################################################
#endif
	
 #############################################################################
 #
 # This code handles G_RDPHALF_1
 #
 # This received the 3rd quarter of a texrect or texrectflip command
 #
 		.ent	case_G_RDPHALF_1
	
    case_G_RDPHALF_1:
 		j	noYield			   # don't yield mid cmd
		sw	gfx1, RSP_STATE_RDPHALF(rsp_state) # save for later...
	
 		.end	case_G_RDPHALF_1
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # This code handles G_RDPHALF_CONT
 #
 # This received the 2nd 32 bits of a 64 bit string to send to the RDP.
 # It sends it (using the G_RDPHALF_2 code below) AND disables yield
 # so the data will not get interrupted by a yield.  This shoulb be used
 # only to send data which will be followed by more data from a G_RDPHALF_1
 # and G_RDPHALF_2 pair (or a G_RDPHALF_1 and G_RDPHALF_CONT pair).
 #
#if	!(defined(F3DEX_GBI)||defined(F3DLP_GBI))
 		.ent	case_G_RDPHALF_CONT
	
    case_G_RDPHALF_CONT:
		ori	$2, zero, 0		# disable yield

 		.end	case_G_RDPHALF_CONT
#endif
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # This code handles G_RDPHALF_2
 #
 # This received the 4rd quarter of a texrect or texrectflip command
 # and sends it and the 3rd quarter (ie the 2nd half) to the rdp.
 #
 		.ent	case_G_RDPHALF_2
	
    case_G_RDPHALF_2:
 		j	doRDPSend		   # jmp to send routine
		lw	gfx0, RSP_STATE_RDPHALF(rsp_state) # retrieve 3rd qtr
	
 		.end	case_G_RDPHALF_2
 #
 #
 #
 #############################################################################

#ifdef	TXLOAD4b
 #############################################################################
 #
 # This code handles G_RDPHALF_2
 #
 # This received the 4rd quarter of a texrect or texrectflip command
 # and sends it and the 3rd quarter (ie the 2nd half) to the rdp.
 #
#include	"gtxtr4b.s"
 #
 #
 #
 #############################################################################
#endif