tc_sort.v 8.67 KB
/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 *************************************************************************/

// $Id: tc_sort.v,v 1.2 2002/11/22 00:34:20 rws Exp $

////////////////////////////////////////////////////////////////////////
//
// Project Reality
//
// module:	tc_sort
// description:	Texture coordinate address sort module. Expands TMEM 
//              address into 8 sorted bank addresses.
//
// designer:	Tony DeLaurier
// date:	7/8/94
//
////////////////////////////////////////////////////////////////////////

module tc_sort (clk, start_gclk, a, b_adder, c_adder, d_adder, shift, odd_t, flip, 
		tlut_en, load_10d, clr_ind_a, clr_ind_b, clr_ind_c, clr_ind_d, 
		adrs_bnk0l, adrs_bnk1l, adrs_bnk2l, adrs_bnk3l, 
		adrs_bnk0h, adrs_bnk1h, adrs_bnk2h, adrs_bnk3h, adrs_a_1d, 
		adrs_b_ba_1d, adrs_c_1d, adrs_d_ba_1d, adrs_b_rg_1d, 
		adrs_d_rg_1d);

  input clk, start_gclk;			// RDP gated clock
  input [12:0] a;               // address a before interleave
  input [12:0] b_adder;         // address b adder input
  input [12:0] c_adder;         // address c adder input
  input [12:0] d_adder;         // address d adder input
  input shift;                  // shift b_rg address delta
  input odd_t;                  // odd line
  input flip;                   // flip c and d interleave bit
  input tlut_en;		// enable texture lookup table
  input load_10d;		// load delayed 10 cycles
  input [7:0] clr_ind_a;	// color index texel a
  input [7:0] clr_ind_b;	// color index texel b
  input [7:0] clr_ind_c;	// color index texel c
  input [7:0] clr_ind_d;	// color index texel d

  output [7:0] adrs_bnk0l;	// sorted address to low bank 0
  reg [7:0] adrs_bnk0l;		// sorted address to low bank 0
  output [7:0] adrs_bnk1l;	// sorted address to low bank 1
  reg [7:0] adrs_bnk1l;		// sorted address to low bank 1
  output [7:0] adrs_bnk2l;	// sorted address to low bank 2
  reg [7:0] adrs_bnk2l;		// sorted address to low bank 2
  output [7:0] adrs_bnk3l;	// sorted address to low bank 3
  reg [7:0] adrs_bnk3l;		// sorted address to low bank 3

  output [7:0] adrs_bnk0h;	// sorted address to high bank 0
  reg [7:0] adrs_bnk0h;		// sorted address to high bank 0
  output [7:0] adrs_bnk1h;	// sorted address to high bank 1
  reg [7:0] adrs_bnk1h;		// sorted address to high bank 1
  output [7:0] adrs_bnk2h;	// sorted address to high bank 2
  reg [7:0] adrs_bnk2h;		// sorted address to high bank 2
  output [7:0] adrs_bnk3h;	// sorted address to high bank 3
  reg [7:0] adrs_bnk3h;		// sorted address to high bank 3

  output [4:0] adrs_a_1d;	// address of texel a <12,3:0> delayed 1
  reg [4:0] adrs_a_1d;		// address of texel a <12,3:0> delayed 1
  output [4:0] adrs_b_ba_1d;	// address of texel b_ba <12,3:0> delayed 1
  reg [4:0] adrs_b_ba_1d;	// address of texel b_ba <12,3:0> delayed 1
  output [4:0] adrs_c_1d;	// address of texel c <12,3:0> delayed 1
  reg [4:0] adrs_c_1d;		// address of texel c <12,3:0> delayed 1
  output [4:0] adrs_d_ba_1d;	// address of texel d_ba <12,3:0> delayed 1
  reg [4:0] adrs_d_ba_1d;	// address of texel d_ba <12,3:0> delayed 1
  output [4:0] adrs_b_rg_1d;	// address of texel b_rg <12,3:0> delayed 1
  reg [4:0] adrs_b_rg_1d;	// address of texel b_rg <12,3:0> delayed 1
  output [4:0] adrs_d_rg_1d;	// address of texel d_rg <12,3:0> delayed 1
  reg [4:0] adrs_d_rg_1d;	// address of texel d_rg <12,3:0> delayed 1

  reg [12:0] b_ba;              // address b_ba before interleave
  reg [12:0] c;              	// address c before interleave
  reg [12:0] d_ba;              // address d_ba before interleave
  reg [12:0] b_rg;              // address b_rg before interleave
  reg [12:0] d_rg;              // address d_rg before interleave

  reg [12:0] adrs_a;		// address of texel a
  reg [12:0] adrs_b_ba;		// address of texel b (ba banks)
  reg [12:0] adrs_c;		// address of texel c 
  reg [12:0] adrs_d_ba;		// address of texel d (ba banks)
  reg [12:0] adrs_b_rg;		// address of texel b (rg banks)
  reg [12:0] adrs_d_rg;		// address of texel d (rg banks)

  reg load_11d;			// load delayed 11 cycles

  always @(posedge clk)
   if (start_gclk) begin

    // delay load one more cycle

    load_11d <= load_10d;

    // determine other 5 TMEM addresses

    b_ba[12:0] = a[12:0] + b_adder[12:0];

    c[12:0]    = a[12:0] + c_adder[12:0];

    d_ba[12:0] = b_ba[12:0] + d_adder[12:0];

    b_rg[12:0] = a[12:0] + (b_adder[12:0] << shift);

    d_rg[12:0] = b_rg[12:0] + d_adder[12:0];

    // xor long bit of addresses for interleaving 

    {adrs_a[12:4], adrs_a[2:0]} = {a[12:4], a[2:0]};
    adrs_a[3] = a[3] ^ odd_t;

    {adrs_b_ba[12:4], adrs_b_ba[2:0]} = {b_ba[12:4], b_ba[2:0]};
    adrs_b_ba[3] = b_ba[3] ^ odd_t;
    
    {adrs_c[12:4], adrs_c[2:0]} = {c[12:4], c[2:0]};
    adrs_c[3] = c[3] ^ (odd_t ^ flip);
 
    {adrs_d_ba[12:4], adrs_d_ba[2:0]} = {d_ba[12:4], d_ba[2:0]};
    adrs_d_ba[3] = d_ba[3] ^ (odd_t ^ flip);

    {adrs_b_rg[12:4], adrs_b_rg[2:0]} = {b_rg[12:4], b_rg[2:0]};
    adrs_b_rg[3] = b_rg[3] ^ odd_t;

    {adrs_d_rg[12:4], adrs_d_rg[2:0]} = {d_rg[12:4], d_rg[2:0]};
    adrs_d_rg[3] = d_rg[3] ^ (odd_t ^ flip);

    // sort addresses based on short bits

    adrs_bnk0l[7:0] <= ({8{(adrs_a[3:2] == 2'b00)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_rg[3:2] == 2'b00)}} & adrs_b_rg[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b00)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_rg[3:2] == 2'b00)}} & adrs_d_rg[11:4]);

    adrs_bnk1l[7:0] <= ({8{(adrs_a[3:2] == 2'b01)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_rg[3:2] == 2'b01)}} & adrs_b_rg[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b01)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_rg[3:2] == 2'b01)}} & adrs_d_rg[11:4]);

    adrs_bnk2l[7:0] <= ({8{(adrs_a[3:2] == 2'b10)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_rg[3:2] == 2'b10)}} & adrs_b_rg[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b10)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_rg[3:2] == 2'b10)}} & adrs_d_rg[11:4]);

    adrs_bnk3l[7:0] <= ({8{(adrs_a[3:2] == 2'b11)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_rg[3:2] == 2'b11)}} & adrs_b_rg[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b11)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_rg[3:2] == 2'b11)}} & adrs_d_rg[11:4]);

    // mux in video memory path

    adrs_bnk0h[7:0] <= (tlut_en && !load_11d) ? clr_ind_a[7:0] :
	               ({8{(adrs_a[3:2] == 2'b00)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_ba[3:2] == 2'b00)}} & adrs_b_ba[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b00)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_ba[3:2] == 2'b00)}} & adrs_d_ba[11:4]);

    adrs_bnk1h[7:0] <= (tlut_en && !load_11d) ? clr_ind_b[7:0] :
		       ({8{(adrs_a[3:2] == 2'b01)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_ba[3:2] == 2'b01)}} & adrs_b_ba[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b01)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_ba[3:2] == 2'b01)}} & adrs_d_ba[11:4]);

    adrs_bnk2h[7:0] <= (tlut_en && !load_11d) ? clr_ind_c[7:0] :
		       ({8{(adrs_a[3:2] == 2'b10)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_ba[3:2] == 2'b10)}} & adrs_b_ba[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b10)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_ba[3:2] == 2'b10)}} & adrs_d_ba[11:4]);

    adrs_bnk3h[7:0] <= (tlut_en && !load_11d) ? clr_ind_d[7:0] :
		       ({8{(adrs_a[3:2] == 2'b11)}} & adrs_a[11:4]) |
                       ({8{(adrs_b_ba[3:2] == 2'b11)}} & adrs_b_ba[11:4]) |
                       ({8{(adrs_c[3:2] == 2'b11)}} & adrs_c[11:4]) |
                       ({8{(adrs_d_ba[3:2] == 2'b11)}} & adrs_d_ba[11:4]);

    // delay addresses <12,3:0> by 1 cycle

    adrs_a_1d[4:0] <= {adrs_a[12], adrs_a[3:0]};
    adrs_b_ba_1d[4:0] <= {adrs_b_ba[12], adrs_b_ba[3:0]};
    adrs_c_1d[4:0] <= {adrs_c[12], adrs_c[3:0]};
    adrs_d_ba_1d[4:0] <= {adrs_d_ba[12], adrs_d_ba[3:0]};
    adrs_b_rg_1d[4:0] <= {adrs_b_rg[12], adrs_b_rg[3:0]};
    adrs_d_rg_1d[4:0] <= {adrs_d_rg[12], adrs_d_rg[3:0]};

  end // always

endmodule // tc_sort