gimm.s 28.1 KB

Raw Blame History Permalink


/**************************************************************************
 *								          *
 *               Copyright (C) 1994, Silicon Graphics, Inc.       	  *
 *								          *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *								          *
 *************************************************************************/

/*
 * File:		gimm.s
 * Creator:		hsa@sgi.com
 * Create Date:		Fri Jun 24 13:55:27 PDT 1994
 *
 * This file holds the top-level of the IMM command processing, and
 * related routines.
 *
 */

#ifdef F3DLP_GBI
#  ifdef CLIP_OFF
#    define VTXID_SHIFT		3
#  else
#    define VTXID_SHIFT		4
#  endif
#else
#  ifdef F3DEX_GBI
#    define VTXID_SHIFT		3
#  else
#    define VTXID_SHIFT		2
#  endif
#endif

 ############################################################################
 #
 # The following code processes the IMM type display list commands.
 # Registers on input:
 #	gfx0	- first word of display list command
 #	gfx1	- second word of display list command
 #	dinp	- points to *next* DL cmd, so back up for this one.
 #
		.ent	doIMM
doIMM:
	# $2 is shifted in the delay slot of the branch that
	# brought us here...

	# 'switch' to correct IMM command:
		andi	$2, $2, 0xfe		# shifted up 1 for offset
		lh	$2,(IMM_JMP_ADD)($2)
		jr	$2
	# consolidate some of the similar decoding...
		lbu	$1, (0-1)(dinp)	# pick off first field

		.end	doIMM
 #
 #
 #
 ############################################################################


#ifdef	LINE3D
# ifdef	F3DEX_GBI
.name v0,	$1
.name v1,	$2
.name wd,	$3
		.ent 	case_G_LINE3D
  case_G_LINE3D:
 		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
 		andi	wd, gfx1, 0xff
		lhu	v0, RSP_VADDR_TABLE(v0)
		lhu	v1, RSP_VADDR_TABLE(v1)
		addi	wd, wd, 3
		sh	wd, CLIP_STATE_TABLE(zero)
		addi	gfx0, v0, 0
		j	doClip
		lhu	return, GFXDONE(zero)

		.end	case_G_LINE3D
.unname v0
.unname v1
.unname wd

.name v0,	$1
.name v1,	$2
.name wd,	$3
		.ent	case_G_TRI2
  case_G_TRI2:
		lbu	v0, (0-7)(dinp)		# -7,-6
		lbu	v1, (0-6)(dinp)
		addi	wd, zero, 3
		lhu	v0, RSP_VADDR_TABLE(v0)
		sh	wd, CLIP_STATE_TABLE(zero)
		lhu	v1, RSP_VADDR_TABLE(v1)
		addi	gfx0, v0, 0		# -7
		jal	doClip			# draw first line
		sh	v1, (-2+RSP_OUTPUT_OFFSET)(zero) # -6

		lbu	v1, (0-5)(dinp)		# -6,-5
		lhu	v0, (-2+RSP_OUTPUT_OFFSET)(zero)
		jal	doClip			# draw second line
		lhu	v1, RSP_VADDR_TABLE(v1)

		lbu	v0, (0-5)(dinp)		# -5,-7
		addi	v1, gfx0, 0
		jal	doClip			# draw third line
		lhu	v0, RSP_VADDR_TABLE(v0)

		.end	case_G_TRI2
.unname v0
.unname v1
.unname wd

.name v0,	$1
.name v1,	$2
.name wd,	$3
		.ent	case_G_TRI1
  case_G_TRI1:
		lbu	v0, (0-3)(dinp)		# -3,-2
		lbu	v1, (0-2)(dinp)
		addi	wd, zero, 3
		sh	wd, CLIP_STATE_TABLE(zero)
		lhu	v0, RSP_VADDR_TABLE(v0)
		lhu	v1, RSP_VADDR_TABLE(v1)
		jal	doClip			# draw first line
		addi	gfx0, v0, 0

		lbu	v0, (0-2)(dinp)		# -2,-1
		andi	v1, gfx1, 0xff
		lhu	v1, RSP_VADDR_TABLE(v1)
		jal	doClip			# draw second line
		lhu	v0, RSP_VADDR_TABLE(v0)

		andi	v1, gfx1, 0xff		# -1,-3
		lhu	v1, RSP_VADDR_TABLE(v1)
		addi	v0, gfx0, 0
		j	doClip			# draw third line
		lhu	return, GFXDONE(zero)

		.end	case_G_TRI1
.unname v0
.unname v1
.unname wd


# else
 #############################################################################
 #
 # This code handles G_LINE3D. It's basically modified from Steve's triangle
 # code above. It picks off the two vertex indicies and calls the line code
 #
 #
.name v0,	$1
.name v1,	$2
.name wd,	$3
.name vn,	$4
.name n,	$5

		.ent 	case_G_LINE3D

  case_G_LINE3D:
	# pick off flag field
		lbu	n, (0-4)(dinp)	# which normal?
	# dinp points to next dl cmd, so back up to get line indices
 		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
	# which normal?
		sll	n, n, 2			# word-size offset

	# this is a hack. we get the width, add 3 (0 means 'min' width)
	# and store it back to an unused DMEM location. We'll retrieve
	# it later and use it...
 		lbu	wd, (0-1)(dinp)
		addi	wd, wd, 3
		sh	wd, CLIP_STATE_TABLE(zero)

	# translate indices into DMEM offsets. Point buffer entries are
	# 40 bytes (yuk!) each... The interface (mbi.h) pre-multiplies
	# the indices by 10, so we just have to multiply by 4.
		sll	v0, v0, 2
		sll	v1, v1, 2


		addi	v0, v0, RSP_POINTS_OFFSET
		addi	v1, v1, RSP_POINTS_OFFSET

		sw	v0, (0+RSP_SCRATCH_OFFSET)(zero)
		sw	v1, (4+RSP_SCRATCH_OFFSET)(zero)
		lw	vn, RSP_SCRATCH_OFFSET(n)

		jal	doClip
		nop   	# delay slot, might do something useful here later
		j	GfxDone
		nop	# delay slot, might do something useful here later

		.end	case_G_LINE3D

.unname v0
.unname v1
.unname wd
.unname vn
.unname n
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # This code handles G_TRI1 in the line microcode.  It draws 3 lines (the edges
 # of the triangle).

.name v0,	$1
.name v1,	$2
.name wd,	$3
.name vn,	$4
.name n,	$5

		.ent	case_G_TRI1
  case_G_TRI1:
		jal	Tri1_getnormal
		lbu	n, (0-4)(dinp)	# which normal?

		jal	Tri1_presetup
		nop
		jal	doClip		# draw first line
		nop

		jal	Tri1_getnormal
		lbu	n, (0-4)(dinp)	# which normal?

		jal	Tri1_presetup
		lbu	v0, (0-1)(dinp)
		jal	doClip		# draw first line
		nop

		jal	Tri1_getnormal
		lbu	n, (0-4)(dinp)	# which normal?

		jal	Tri1_presetup
		lbu	v1, (0-1)(dinp)
		jal	doClip		# draw first line
		nop

		j	GfxDone


  Tri1_presetup:
	# Use min width (0 + 3)
		addi	wd, zero, 3
		sh	wd, CLIP_STATE_TABLE(zero)

	# translate indices into DMEM offsets. Point buffer entries are
	# 40 bytes (yuk!) each... The interface (mbi.h) pre-multiplies
	# the indices by 10, so we just have to multiply by 4.
		sll	v0, v0, 2
		sll	v1, v1, 2

		addi	v0, v0, RSP_POINTS_OFFSET
		jr	return
		addi	v1, v1, RSP_POINTS_OFFSET

  Tri1_getnormal:
	# pick off flag field
	# dinp points to next dl cmd, so back up to get tri indices
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
		lbu	vn, (0-1)(dinp)
	# which normal?
		sll	n, n, 2			# word-size offset

		sw	v0, (0+RSP_SCRATCH_OFFSET)(zero)
		sw	v1, (4+RSP_SCRATCH_OFFSET)(zero)
		sw	vn, (8+RSP_SCRATCH_OFFSET)(zero)
		lw	vn, RSP_SCRATCH_OFFSET(n)

		sll	vn, v0, 2
		jr	return
		addi	vn, vn, RSP_POINTS_OFFSET


		.end	case_G_TRI1

.unname v0
.unname v1
.unname wd
.unname vn
.unname n
 #
 #
 #
 #############################################################################


# endif /* F3DEX_GBI */
#endif /* LINE3D */

#ifdef FAST3D
#  ifdef F3DLP_GBI
#    include	"gltri.s"
#  else	 /* F3DLP_GBI */
#    ifdef  F3DEX_GBI

 #############################################################################
 #
 # This code handles G_TRI1. It picks off the three vertex indicies and
 # calls the triangle setup code.
 #
 #
.name v0,		$1
.name v1,		$2
.name v2,		$3
.name vn,		$4
.name n,		$5
.name tmp,		$6

		.ent	case_G_TRI1
case_G_TRI1:

 #-YASU
 #
 #  F3DEX-TRI1
 #
 #  リターン位置の設定
 #  G_TRI2 と一部共有するためにこうしている
 #  flag フィールドはサポートしない. 先頭頂点を flag に固定する
 #
 #  return_save は Clip コードの Overlay 時に破壊されるので使えないので
 #  代わりに gfx1 を使う
 #
 #  命令数を削るために変換テーブルを参照している. v0,v1,v2 の値として 2 倍
 #  値を設定する必要がある.
 #
 #-YASU
		lh	gfx1,   GFXDONE(zero)	# return to GfxDone:
TriStart:
		lh	return, CLIPANDSETUP(zero)
TriStart1:
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
		lbu	v2, (0-1)(dinp)
TriStart2:
		lhu	v0, RSP_VADDR_TABLE(v0)
		lhu	v1, RSP_VADDR_TABLE(v1)
		lhu	v2, RSP_VADDR_TABLE(v2)

		jr	return
		addi	vn, v0, 0	# flag = 0 に固定

		.end	case_G_TRI1
.unname v0
.unname v1
.unname v2
.unname vn
.unname n
.unname tmp

		.ent	case_G_TRI2
case_G_TRI2:

 #-YASU
 #
 #  F3DEX-TRI2
 #
 #  1 番目の 3 角形 (vb0, vb1, vb2) の描画
 #
 #  return_save は Clip コードの Overlay 時に破壊されるので使えないので
 #  代わりに gfx1 を使う
 #
 #-YASU
		jal	TriStart
		addi	gfx1, return, 0
 #-YASU
 #
 #  2 番目の 3 角形 (va0, va1, va2) の描画
 #
 #-YASU
.name v0,		$1
.name v1,		$2
.name v2,		$3
Tri2_2ndTri:
		lbu	v0, (0-7)(dinp)
		lbu	v1, (0-6)(dinp)
		lbu	v2, (0-5)(dinp)
		lh	return, CLIPANDSETUP(zero)
		j	TriStart2
		lh	gfx1, GFXDONE(zero)	# return to GfxDone:
.unname v0
.unname v1
.unname v2
		.end	case_G_TRI2
#ifdef	QUAD
.name v0,		$1
.name v1,		$2
.name v2,		$3
.name v3,		$7
.name y0,		$8
.name y1,		$9
.name y2,		$10
.name y3,		$11	# 分りやすさを優先するため無駄にレジスタを使用
.name d02,		$6
.name d13,		$12
.name vn,		$4	# flatp
.name n,		$5
		.ent	case_G_QUAD3D
case_G_QUAD3D:

 #-YASU
 #
 #  F3DEX-QUAD
 #
 # 7/25
 #  VTX optimize で頂点キャッシュのサイズが変わったのでそれに対応させる
 #
 #  RSP_SCRATCH_OFFSET は CLIP コードによって使用される可能性があるため
 #  G_TRI1 とは違うところにセーブする. CLIP_TMP は 160 bytes 予約されて
 #  いるのにも関わらず. 40 bytes しか使用されていないのであまりを使う.
 #
 #  flag のアンサポート. 代わりに flag = v1 に固定される
 #
 #  return_save は Clip コードの Overlay 時に破壊されるので使えないので
 #  代わりに gfx1 を使う
 #
 #  backrej 時には return に 0 が代入される
 #
 #  NewCom 版 LX は動的分割をサポートしない
 #
 #-YASU
#ifndef STATICQDIV	/* 通常の Quad */
	# dinp points to next dl cmd, so back up to get tri indices
		jal	TriStart1	# v0,v1,v2 を取得する
		lbu	v3, (0-4)(dinp)
		lhu	v3, RSP_VADDR_TABLE(v3)

		lh	y0, RSP_PTS_YS(v0)
		lh	y2, RSP_PTS_YS(v2)
		lh	y1, RSP_PTS_YS(v1)
		lh	y3, RSP_PTS_YS(v3)

		sub	d02, y0, y2	# d02 = |y0 - y2|
		bgez	d02, Abs_1	# d13 = |y1 - y3|
		sh	v0, ( 0-6+RSP_OUTPUT_OFFSET)(zero)
		sub	d02, y2, y0
Abs_1:		sub	d13, y1, y3
		bgez	d13, Abs_2
		sh	v2, ( 2-6+RSP_OUTPUT_OFFSET)(zero)
		sub	d13, y3, y1
Abs_2:		sub	d02, d02, d13
		blez	d02, Devide02	# d02 < d13 なら 02 で分割
		sh	v3, ( 4-6+RSP_OUTPUT_OFFSET)(zero)

Devide13:	addi	v2, v3, 0
		sh	v1, ( 0-6+RSP_OUTPUT_OFFSET)(zero)

Devide02:	jal	clipAndSetup
		addi	gfx1, return, 0		# return to Quad2ndTri:

Quad_2ndTri:	beq	return, $0, GfxDone	# もし 1st TRI が裏なら終り
		lh	v0, ( 2-6+RSP_OUTPUT_OFFSET)(zero)	# v2
		lh	v1, ( 4-6+RSP_OUTPUT_OFFSET)(zero)	# v3
		lh	v2, ( 0-6+RSP_OUTPUT_OFFSET)(zero)	# v0
		j	clipAndSetup
		lh	gfx1, GFXDONE(zero)	# return to GfxDone:

#else	/* STATICQDIV */	/* NewCom 用カスタム版 */
	#
	#  Quad を 2Tri で描く
	#
		# 1 つ目の TRI
		jal	TriStart
		addi	gfx1, return, 0

		# 2 つ目の TRI
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-1)(dinp)
		lbu	v2, (0-4)(dinp)
		lh	return, CLIPANDSETUP(zero)
		j	TriStart2
		lh	gfx1, GFXDONE(zero)	# return to GfxDone:
#endif  /* STATICQDIV */
.unname v0
.unname v1
.unname v2
.unname v3
.unname vn
.unname n
.unname y0
.unname y1
.unname y2
.unname y3
.unname d02
.unname d13
		.end	case_G_QUAD3D
#endif	/* QUAD */
#    else  /* F3DEX_GBI */

 #############################################################################
 #
 # This code handles G_TRI1. It picks off the three vertex indicies and
 # calls the triangle setup code.
 #
 #
.name v0,		$1
.name v1,		$2
.name v2,		$3
.name vn,		$4
.name n,		$5
.name tmp,		$6

		.ent	case_G_TRI1

case_G_TRI1:
	# pick off flag field
		lbu	n, (0-4)(dinp)	# which normal?
	# dinp points to next dl cmd, so back up to get tri indices
		lbu	v0, (0-3)(dinp)
		lbu	v1, (0-2)(dinp)
		lbu	v2, (0-1)(dinp)
	# which normal?
		sll	n, n, 2			# word-size offset

	# translate indices into DMEM offsets. Point buffer entries are
	# 40 bytes (yuk!) each... The interface (mbi.h) pre-multiplies
	# the indices by 10, so we just have to multiply by 4.
		sll	v0, v0, 2
		sll	v1, v1, 2
		sll	v2, v2, 2

		addi	v0, v0, RSP_POINTS_OFFSET
		addi	v1, v1, RSP_POINTS_OFFSET
		addi	v2, v2, RSP_POINTS_OFFSET

		sw	v0, (0+RSP_SCRATCH_OFFSET)(zero)
		sw	v1, (4+RSP_SCRATCH_OFFSET)(zero)
		sw	v2, (8+RSP_SCRATCH_OFFSET)(zero)
		lw	vn, RSP_SCRATCH_OFFSET(n)

		j	clipAndSetup
#ifdef	RETGFX1
		lh	gfx1,GFXDONE(zero)		# return to GfxDone:
#else
		lh	return_save,GFXDONE(zero)	# return to GfxDone:
#endif
		.end	case_G_TRI1

.unname v0
.unname v1
.unname v2
.unname vn
.unname n
.unname tmp
 #
 #
 #
 #############################################################################
#    endif  /* F3DEX_GBI */
#  endif  /* F3DLP_GBI */
#endif /* FAST3D */


 #############################################################################
 #
 # This code handles G_POPMTX. It checks the stack depth, backs up the
 # stack pointer, then DMA's the matrix into DMEM, updates the state,
 # and loads the registers.
 #
 #
.name param,		$1
.name mstack_p,		$19
#ifdef	MTXNOLMT
.name mstack_min,	$3
#else
.name mstack_max,	$3
#endif
.name mat_sz,		$18
.name mat_p,		$20

		.ent	case_G_POPMTX

  case_G_POPMTX:
	# 'param' already filled in but not used

	# we can only pop the MODELVIEW stack

	# get pointer and stack size
 #-YASU
 #
 #  TASK 構造体の内部の stack 用データを参照するように改造する
 #
 #-YASU
		sbv     vconst[6],RSP_STATE_L_LEN(rsp_state)            # hi bit = light recalc
		lw	mstack_p,  RSP_STATE_MMTX_STACK_P(rsp_state)	# stack ptr
#ifdef	MTXNOLMT
		lw	mstack_min, (RSP_TASK_OFFSET+OS_TASK_OFF_STACK)($0)
#else
		lw	mstack_max, RSP_STATE_MMTX_STACK_MAX(rsp_state)	# end of stack
#endif
		addi	mat_p, zero, RSP_CURR_MMTX_OFFSET		# where to DMA matrix
#ifdef	MTXNOLMT
		sub	mstack_min, mstack_min, mstack_p		# size of stack

	# check matrix stack depth, bail if == 0
		bgez	mstack_min, GfxDone				# anything on stack?
#else
		sub	mstack_max, mstack_max, mstack_p		# size of stack
		addi	mstack_max, mstack_max, (-10*64)		#  ... - max size of stck

	# check matrix stack depth, bail if == 0
		bgez	mstack_max, GfxDone				# anything on stack?
#endif
		addi	mstack_p, mstack_p, -64				# stack is 1 mtx smaller
 ### BRANCH OCCURS TO GfxDone: IF NOTHING ON STACK


		jal	DMAread						# DMA matrix from stack
		addi	mat_sz, zero, 63	# DMA expects sz-1	# DMA length -1

		jal	DMAwait						# wait for DMA to finish
		addi    $3, zero, RSP_CURR_MPMTX_OFFSET                 # where to put MP matrix

	# update state, then jump to pre-multiply MxP
		j	mtx_MxP						# mult model * proj mtx
		sw	mstack_p,  RSP_STATE_MMTX_STACK_P(rsp_state)	# store new stack size


		.end	case_G_POPMTX
.unname param
.unname mstack_p
#ifdef	MTXNOLMT
.unname mstack_min
#else
.unname mstack_max
#endif
.unname mat_sz
.unname mat_p
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # this handles the G_MOVEWORD command, moving 1 word into dmem
 #
 #
.name target,	$1
.name outptr,	$5
.name offset,	$2

		.ent	case_G_MOVEWORD

  case_G_MOVEWORD:
 #	lbu	target, (0-6)(dinp)		  # index to address
 #	lbu	offset, (0-5)(dinp)		  # offset from address
		lbu	target, (0-5)(dinp)		  # index to address
		lhu	offset, (0-7)(dinp)		  # offset from address
		lh	outptr,(MOVEWORD_TBL)(target) # actual address
		add	outptr,outptr,offset		  #   ...plus offset
		j	GfxDone				  #
		sw	gfx1, 0(outptr)			  # store @ addr + off

		.end	case_G_MOVEWORD

.unname target
.unname outptr
.unname offset
 #
 #
 #############################################################################


 #############################################################################
 #
 # This code handles the G_TEXTURE.
 #
 #
.name	rmode, 		$2
.name	mask, 		$3
.name	sscale,		$4
.name	tscale,		$5
.name	tile,		$6

		.ent	case_G_TEXTURE

  case_G_TEXTURE:

	# turn texture on or off:
		sw	gfx0, RSP_STATE_TEX_CMD(rsp_state)
		sw	gfx1, RSP_STATE_TEX_SCALE_S(rsp_state)
		lh	rmode, RSP_STATE_RENDER_L(rsp_state)
		andi	rmode, rmode, 0xfffd	# clear texture state
		andi	mask, gfx0, 0x01	# on bit
		sll	mask, mask, 1
		or	rmode, rmode, mask	# set texture on (maybe)
 		j	GfxDone
		sh	rmode, RSP_STATE_RENDER_L(rsp_state)

		.end	case_G_TEXTURE

.unname	rmode
.unname	mask
.unname	sscale
.unname	tscale
.unname	tile

 #############################################################################
 #
 # This code handles G_SETOTHERMODE_*.
 #
 #
.name modewd,	$3
 #		.name mask,	$4
.name mask,	$2
.name lenth,	$5
.name shft,	$6
.name waddr,	$7
.name minus1,	$8

		.ent	case_G_OTHERMODE

  case_G_SETOTHERMODE_H:
		j	doOtherMode
		addi	waddr, rsp_state, RSP_STATE_OTHER_H # delay slot

  case_G_SETOTHERMODE_L:
		addi	waddr, rsp_state, RSP_STATE_OTHER_L

		# this code is the same for both OTHERMODE commands...
 	doOtherMode:
		lw	modewd, 0(waddr)
#if 0
		addi	minus1, zero, -1
#endif
		lbu	lenth, (0-5)(dinp)
		lbu	shft,  (0-6)(dinp)

		addi	mask, zero, 0x01
		sllv	mask, mask, lenth
		addi	mask, mask, -1
		sllv	mask, mask, shft
#if 0
		xor	mask, mask, minus1
#else
		nor	mask, mask, zero	# rd = ~(rs|rt)
#endif
		and	mask, mask, modewd
		or	modewd, mask, gfx1
		sw	modewd, 0(waddr)

	# output to RDP
	# writes 64-bits at once. cmd byte already there.
	# use the regular RDP output routine, sharing code.
		lw	gfx0, RSP_STATE_OTHER_H(rsp_state)
		j	doRDPSend
		lw	gfx1, RSP_STATE_OTHER_L(rsp_state)

		.end	case_G_OTHERMODE

.unname modewd
.unname mask
.unname lenth
.unname shft
.unname waddr
.unname minus1
 #
 #
 #
 #############################################################################


 #############################################################################
 #
 # This code handles G_CULLDL.
 #
 # Ends display list if vertices n through m are mutually trivially rejected
 # (ie the volume described by these vertices is completely outside of the
 # trivial reject volume).
 #
.name cc,	$2
.name tmp,	$3

 # GBI 互換を実現するため DL を以下のように変更する
 #
 #   31         24         16          8          0
 #    | G_CULLDL |        0 |    開始頂点番号*2   |
 #    |        0 |        0 |    終了頂点番号*2   |
 #
		.ent	case_G_CULLDL
case_G_CULLDL:
#ifndef	NO_CULLDL
#ifdef	F3DLP_GBI
#ifndef	DIRECTZCMP
#ifdef	PERSPTXTR
		/* LX.Rej 用の CULLDL 処理 */
		sll	tmp, gfx0, 2		# *8
		add	tmp, tmp, gfx0		# 2+8
		sll	gfx0, tmp, 1		# 10*2
		sll	tmp, gfx1, 2		# *8
		add	tmp, tmp, gfx1		# 2+8
		sll	gfx1, tmp, 1		# 10*2
#else
		/* LP.Rej 用の CULLDL 処理 */
		sll	gfx0, gfx0, 3
		sll	gfx1, gfx1, 3
#endif
		andi	gfx0, gfx0, 0xffff
#else	/* DIRECTZCMP */
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)
		lhu	gfx1, RSP_VADDR_TABLE(gfx1)
#endif	/* DIRECTZCMP */
		addi	cc, zero, 0x7fff
VolCulLoop:
#ifndef	DIRECTZCMP
		lhu	tmp, (RSP_POINTS_OFFSET+RSP_PTS_XS)(gfx0)
#else	/* DIRECTZCMP */
		lhu	tmp, RSP_PTS_XS(gfx0)
#endif	/* DIRECTZCMP */
		bne	tmp, cc, GfxDone	# 0x7fff でないなら終り
		nop
		bne	gfx0, gfx1, VolCulLoop
		addi	gfx0, gfx0, RSP_PTS_LEN
#else
#ifdef F3DEX_GBI
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)	# vertex to start on
		lhu	gfx1, RSP_VADDR_TABLE(gfx1)	# vertex to end   on
#else
		andi	gfx0, gfx0, 0x03ff
#endif
#ifdef NEAR_CLIP_OFF
		ori	cc, zero, 0x7030		# initialize cc
#else /* NEAR_CLIP_OFF */
		ori	cc, zero, 0x7070		# initialize cc
#endif /* NEAR_CLIP_OFF */

VolCulLoop:
#ifdef F3DEX_GBI
		lh	tmp, RSP_PTS_CC(gfx0)
#else
		lh	tmp, (RSP_POINTS_OFFSET+RSP_PTS_CC)(gfx0)
#endif
		and	cc, cc, tmp			# is this vtx clipped?
		bne	gfx0, gfx1, VolCulLoop		# loop through vtx's
		addi	gfx0, gfx0, RSP_PTS_LEN
 ### LOOP OCCURS if not all points have been checked

		beq	cc, zero, GfxDone		# continue if not culled
 # NOTE Delay Slot!!
#endif
#endif
		.end	case_G_CULLDL
.unname cc
.unname tmp
 #
 #
 #
 #############################################################################

 ### IMPORTANT!!!!  Do not place any code betweeen case_G_CULLDL and G_ENDDL

 #############################################################################
 #
 # This code handles G_ENDDL.
 #
 # Causes a 'pop' of the display list stack. If we pop an empty
 # display list stack, that's an error and we end.
 #
.name stack_sz,	$2
.name stack_p,	$3

		.ent	case_G_ENDDL
  case_G_ENDDL:
	# pop display list
		lb	stack_sz, RSP_STATE_DL_N(rsp_state)
		addi	stack_sz, stack_sz, -4
		bltz	stack_sz, TaskDone		# empty stack
		addi	stack_p, stack_sz, RSP_DLSTACK_OFFSET
		lw	inp,     0(stack_p)	# pointer of DL
		sb	stack_sz, RSP_STATE_DL_N(rsp_state)
 		j	GfxDone
#ifdef	SMARTDLCOUNT
		addi	dlcount, dinp, 0
#else
		addi	dlcount, zero, 0
#endif
		.end	case_G_ENDDL

.unname stack_sz
.unname stack_p
 #
 #
 #
 #############################################################################

#ifdef		RSP_PAUSE
 #############################################################################
 #
 # This code handles G_RSP_PAUSE
 #
 #   31          24           16            8            0
 #   +------------+------------+------------+------------+
 #   |G_RSP_PAUSE |                                      |
 #   +------------+------------+------------+------------+
 #   |                      Signal                       |
 #   +------------+------------+------+-----+------------+
 #
 #  CPU が 0x04000ffc 番地に書き込んだ値が signal の値以上になるまでループを
 #  続ける.
 #
 		.ent	case_G_RSP_PAUSE
case_G_RSP_PAUSE_Loop:
		bgez	gfx0, GfxDone
case_G_RSP_PAUSE:
		lw	gfx0, (RSP_TASK_OFFSET+OS_TASK_OFF_YIELD_SZ)(zero)
		j	case_G_RSP_PAUSE_Loop
		sub	gfx0, gfx0, gfx1
 		.end	case_G_RSP_PAUSE
 #
 #
 #
 #############################################################################
#endif

#ifdef		BRANCH_Z
 #############################################################################
 #
 # This code handles G_BRANCH_Z
 #
 #   31          24           16            8            0
 #   +------------+------------+------------+------------+
 #   |G_RDPHALF_1 |                                      |
 #   +------------+------------+------------+------------+
 #   |                   branch addrs                    |
 #   +------------+------------+------+-----+------------+
 #   |G_BRANCH_Z  |       Vtx*5       | 0000|    Vtx*2   |
 #   +------------+------------+------+-----+------------+
 #   |                    Z Value                        |
 #   +------------+------------+------------+------------+
 #
 #  指定された頂点の Depth 値(ここでは W)が指定された値以下なら分岐する.
 #  パラメータが入りきらないので G_RDPHALF_1 を利用して 2 つに分ける.
 #
 		.ent	case_G_BRANCH_Z
    case_G_BRANCH_Z:

 # -----------------------------------
 #	Screen Z を利用しての比較処理
 # -----------------------------------
#ifndef	DIRECTZCMP
# ifdef F3DEX_GBI
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)       # 頂点のテーブル参照
		lw	gfx0, RSP_PTS_ZS(gfx0)		  # 深さ値を取得する
# elif  defined(F3DLP_GBI)
#  ifdef PERSPTXTR
		srl	gfx0, gfx0, 10			  # Vtx を 20 倍する
#  else
		sll	gfx0, gfx0, 3			  # Vtx を 16 倍する
#  endif
		lw	gfx0, (RSP_PTS_ZS+RSP_POINTS_OFFSET)(gfx0)
# endif
#else
 # -----------------------------------
 #	W int 値を利用しての比較処理
 # -----------------------------------
# if	(defined(F3DEX_GBI)||defined(F3DLP_GBI))
		lhu	gfx0, RSP_VADDR_TABLE(gfx0)       # 頂点のテーブル参照
		lh	gfx0, RSP_PTS_W_INT(gfx0)	  # 深さ値を取得する
# endif
#endif
		sub	gfx0, gfx0, gfx1		  # 比較
		bgtz	gfx0, GfxDone			  # 大きければ終り
		lw	gfx1, RSP_STATE_RDPHALF(rsp_state) # DL の復帰
		j	G_DL_noPush			  # BRANCH 処理
		.end	case_G_BRANCH_Z
 #
 #
 #
 #############################################################################
#endif

 #############################################################################
 #
 # This code handles G_SETGEOMETRYRMODE
 #
 # Any bit set 'on' in the incoming command is 'set' in the state.
 # Assumes gfx1 is all 0's, except some of the lower 16 bits.
 #
.name	rmode, 		$2

		.ent	case_G_SETGEOMETRYMODE

  case_G_SETGEOMETRYMODE:
		lw	rmode, RSP_STATE_RENDER(rsp_state)
		or	rmode, rmode, gfx1
 		j	GfxDone
		sw	rmode, RSP_STATE_RENDER(rsp_state) # delay slot

		.end	case_G_SETGEOMETRYMODE

.unname	rmode
 #
 #
 #
 #############################################################################


 #############################################################################
 #
 # This code handles G_CLEARGEOMETRYMODE
 #
 # Any bit set 'on' in the incoming command is 'cleared' in the state.
 # Assumes gfx1 is all 0's, except some of the lower 16 bits.
 #
.name	rmode, 		$2
.name	mask, 		$3

		.ent	case_G_CLEARGEOMETRYMODE

  case_G_CLEARGEOMETRYMODE:
		lw	rmode, RSP_STATE_RENDER(rsp_state)
#if 0
		addi	mask, zero, -1
		xor	mask, mask, gfx1
#else
		nor	mask, gfx1, zero
#endif
		and	rmode, rmode, mask
 		j	GfxDone
		sw	rmode, RSP_STATE_RENDER(rsp_state) # delay slot

		.end	case_G_CLEARGEOMETRYMODE

.unname	rmode
.unname	mask
 #
 #
 #
 #############################################################################

#ifdef	F3DEX_GBI
 #############################################################################
 #
 # ModifyVertex の互換性のため, EX/LX と LX.Rej/LP.Rej において処理用の
 # ルーチンを作る
 #
 #   31         24          16           8           0
 #   +-----------+-----------+-----------+-----------+
 #   |G_MODIFYVTX|   offset  |         vtx * 2       |
 #   +-----------+-----------+-----------+-----------+
 #   |                      data                     |
 #   +-----------+-----------+-----------+-----------+
 #
 #############################################################################
		.name	offset, $2
		.name	addrs,  $3
 		.ent	case_G_MODIFYVTX

    case_G_MODIFYVTX:
		lbu	offset, -7(dinp)
		lhu	addrs,  RSP_VADDR_TABLE(gfx0)
		add	addrs, addrs, offset
 		j	GfxDone
#ifdef	NEWCOM
		sw	gfx1, -16(addrs)
#else
		sw	gfx1, 0(addrs)
#endif
 		.end	case_G_MODIFYVTX
		.unname	offset
		.unname	addrs

#elif	F3DLP_GBI
		.name	offset, $2
		.name	addrs,  $3
 		.ent	case_G_MODIFYVTX

    case_G_MODIFYVTX:
		lbu	offset, -7(dinp)
# ifndef DIRECTZCMP
#  ifdef PERSPTXTR
		sll	addrs, gfx0, 2
		add	addrs, addrs, gfx0
		sll	addrs, addrs, 1
#  else
		sll	addrs, gfx0, 3
#  endif
# else
		lhu	addrs,  RSP_VADDR_TABLE(gfx0)
# endif
		xori	offset, offset, 0x18
		add	addrs, addrs, offset
 		j	GfxDone
#ifndef	DIRECTZCMP
		sw	gfx1, RSP_POINTS_OFFSET(addrs)
#else
		sw	gfx1, 0(addrs)
#endif
 		.end	case_G_MODIFYVTX
		.unname	offset
		.unname	addrs
#else
 #############################################################################
 #
 # This code handles G_PERSPNORM
 #
 # This magic number is needed to fix the transformation and clip
 # math, extracting the most precision. Grab the scale from gfx1
 # and save it for later.
 #
 		.ent	case_G_PERSPNORM

    case_G_PERSPNORM:
 		j	GfxDone
		sh	gfx1, RSP_STATE_PERSPNORM(rsp_state) # delay slot

 		.end	case_G_PERSPNORM
 #
 #
 #
 #############################################################################
#endif

 #############################################################################
 #
 # This code handles G_RDPHALF_1
 #
 # This received the 3rd quarter of a texrect or texrectflip command
 #
 		.ent	case_G_RDPHALF_1

    case_G_RDPHALF_1:
 		j	noYield			   # don't yield mid cmd
		sw	gfx1, RSP_STATE_RDPHALF(rsp_state) # save for later...

 		.end	case_G_RDPHALF_1
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # This code handles G_RDPHALF_CONT
 #
 # This received the 2nd 32 bits of a 64 bit string to send to the RDP.
 # It sends it (using the G_RDPHALF_2 code below) AND disables yield
 # so the data will not get interrupted by a yield.  This shoulb be used
 # only to send data which will be followed by more data from a G_RDPHALF_1
 # and G_RDPHALF_2 pair (or a G_RDPHALF_1 and G_RDPHALF_CONT pair).
 #
#if	!(defined(F3DEX_GBI)||defined(F3DLP_GBI))
 		.ent	case_G_RDPHALF_CONT

    case_G_RDPHALF_CONT:
		ori	$2, zero, 0		# disable yield

 		.end	case_G_RDPHALF_CONT
#endif
 #
 #
 #
 #############################################################################

 #############################################################################
 #
 # This code handles G_RDPHALF_2
 #
 # This received the 4rd quarter of a texrect or texrectflip command
 # and sends it and the 3rd quarter (ie the 2nd half) to the rdp.
 #
 		.ent	case_G_RDPHALF_2

    case_G_RDPHALF_2:
 		j	doRDPSend		   # jmp to send routine
		lw	gfx0, RSP_STATE_RDPHALF(rsp_state) # retrieve 3rd qtr

 		.end	case_G_RDPHALF_2
 #
 #
 #
 #############################################################################

#ifdef	TXLOAD4b
 #############################################################################
 #
 # This code handles G_RDPHALF_2
 #
 # This received the 4rd quarter of a texrect or texrectflip command
 # and sends it and the 3rd quarter (ie the 2nd half) to the rdp.
 #
#include	"gtxtr4b.s"
 #
 #
 #
 #############################################################################
#endif