tc_lod.v 11.3 KB
/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 *************************************************************************/

// $Id: tc_lod.v,v 1.1 2002/03/28 00:26:13 berndt Exp $

////////////////////////////////////////////////////////////////////////
//
// Project Reality
//
// module:	tc_lod
// description:	Level of detail module. Transforms LOD into tile memory
//		addresses and into LOD fraction for 3rd-axis mip-map
//		interpolation.
//
// designer:	Tony DeLaurier
// date:	6/21/94
//
////////////////////////////////////////////////////////////////////////

module tc_lod (gclk, s, t, st_span, ncyc, min_level, max_level, detail_en, 
	       sharp_en, lod_en, prim_tile, load, tlut_en, l_frac_out, l_tile, 
	       load_8d, load_9d, s_clamped, t_clamped);

  input gclk;			// RDP gated clock
  input [18:0] s, t;		// over, under, s,11.5
  input st_span;                // start span
  input ncyc;                   // 0: 1-cycle mode, 1: 2-cycle mode
  input [4:0] min_level;	// minimum lod fraction clamp
  input [2:0] max_level;	// number of mipmaps -1 or -2
  input detail_en;		// enable detailed texture
  input sharp_en;		// enable sharpened texture
  input lod_en;			// use lod in tile indices determination
  input [2:0] prim_tile;	// base tile index of primitive
  input load;			// load_tile, load_block or load_tlut
  input tlut_en;                // enable texture lookup table

  output [8:0] l_frac_out;	// l_frac out
  reg [8:0] l_frac_out;		// l_frac out
  output [2:0] l_tile;		// tile index into tile memory
  reg [2:0] l_tile;		// tile index into tile memory
  output load_8d;		// load delayed 8 cycles
  reg load_8d;			// load delayed 8 cycles
  output load_9d;		// load delayed 9 cycles
  reg load_9d;			// load delayed 9 cycles
  output [15:0] s_clamped;      // clamped text. coord.
  reg [15:0] s_clamped;      	// clamped text. coord.
  output [15:0] t_clamped;      // clamped text. coord.
  reg [15:0] t_clamped;      	// clamped text. coord.


  reg [14:0] lod_clamp;		// clamped lod
  reg min;			// below finest level of detail
  reg max;			// equal or above coarsest level of detail
  reg [2:0] lod_index;		// encoded lod
  reg [2:0] index_clamp;	// clamped encoded lod
  reg [7:0] fraction;		// lod fraction
  reg [7:0] fract_clamp;	// clamped lod fraction
  reg inc;			// increment tile address by one
  reg inc2;			// increment tile address by two
  reg load_1d;			// load delayed 1 cycle
  reg load_2d;			// load delayed 2 cycles
  reg load_3d;			// load delayed 3 cycles
  reg load_4d;			// load delayed 4 cycles
  reg load_5d;			// load delayed 5 cycles
  reg load_6d;			// load delayed 6 cycles
  reg load_7d;			// load delayed 7 cycles
  reg force_cycle;		// force cycle 
  reg cycle;			// cycle 0 or cycle 1
  reg cycle_1d;			// cycle delayed 1 cycle
  reg cycle_2d;			// cycle delayed 2 cycles
  reg cycle_3d;			// cycle delayed 3 cycles
  reg cycle_4d;			// cycle delayed 4 cycles
  reg cycle_5d;			// cycle delayed 5 cycles

  reg [8:0] l_frac;		// lod fraction for 3rd-axis interpolation
  reg [8:0] l_frac_1d;		// l_frac delayed 1
  reg [8:0] l_frac_2d;		// l_frac delayed 2
  reg [8:0] l_frac_3d;		// l_frac delayed 3
  reg [8:0] l_frac_4d;		// l_frac delayed 4
  reg [8:0] l_frac_5d;		// l_frac delayed 5
  reg [8:0] l_frac_6d;		// l_frac delayed 6
  reg [8:0] l_frac_7d;		// l_frac delayed 7
  reg [8:0] l_frac_8d;		// l_frac delayed 8

  reg tlut_en_1d;		// tlut_en delayed 1
  reg tlut_en_2d;		// tlut_en delayed 2

  reg st_span_1d;
  reg st_span_2d;
  reg st_span_3d;

  reg [18:0] s_curr;
  reg [18:0] s_curr_1d;
  reg [18:0] s_prev;
  reg [15:0] s_tmp;

  reg [18:0] t_curr;
  reg [18:0] t_curr_1d;
  reg [18:0] t_prev;
  reg [15:0] t_tmp;

  wire [18:0] s_next;
  wire [18:0] t_next;

  wire [14:0] max_delta;
  wire [14:0] last_max;
  reg [14:0] delta_y_max;
  reg [14:0] lod;

  wire s_clamp;
  wire t_clamp;
  wire clamp;
  wire last_clamp;
  reg y_clamp;
  reg clamp_max_lod;

  // generate clamped coordinates

  always @(posedge gclk)
  begin

    s_curr <= (cycle_2d) ? s_curr : s;
    s_curr_1d <= s_curr;
    s_prev <= s_curr_1d;

    if (s_prev[18])
      s_tmp <= 16'h7fff;
    else if (s_prev[17])
      s_tmp <= 16'h8000;
    else if (s_prev[16:15] == 2'b01)
      s_tmp <= 16'h7fff;
    else if (s_prev[16:15] == 2'b10)
      s_tmp <= 16'h8000;
    else
      s_tmp <= s_prev[15:0];

    s_clamped <= s_tmp;

  end // always

  always @(posedge gclk)
  begin

    t_curr <= (cycle_2d) ? t_curr : t;
    t_curr_1d <= t_curr;
    t_prev <= t_curr_1d;

    if (t_prev[18])
      t_tmp <= 16'h7fff;
    else if (t_prev[17])
      t_tmp <= 16'h8000;
    else if (t_prev[16:15] == 2'b01)
      t_tmp <= 16'h7fff;
    else if (t_prev[16:15] == 2'b10)
      t_tmp <= 16'h8000;
    else
      t_tmp <= t_prev[15:0];

    t_clamped <= t_tmp;

  end // always


  // special case for back-to-back spans

  mx2_1_19bit s_mux (.sel(st_span_3d), .i_zero(s), .i_one(s_prev), .out(s_next));
  mx2_1_19bit t_mux (.sel(st_span_3d), .i_zero(t), .i_one(t_prev), .out(t_next));


  // instantiate max absolute difference circuit
  tc_max_abs_dif max_abs_dif (.s_curr(s_curr[16:0]), .s_next(s_next[16:0]), 
			      .t_curr(t_curr[16:0]), .t_next(t_next[16:0]), 
			      .last_max(last_max), .max(max_delta));

  
  // register lod every other cycle
  always @(posedge gclk)
  begin
    delta_y_max <= max_delta;
    lod <= (cycle_4d) ? lod : max_delta;
  end // always

  assign last_max = {15{cycle_3d}} & delta_y_max;


  // determine clamp_max_lod
  
  assign s_clamp = s_next[18] || s_next[17] || s_curr[18] || s_curr[17];
  assign t_clamp = t_next[18] || t_next[17] || t_curr[18] || t_curr[17];
  assign clamp = s_clamp || t_clamp || last_clamp;
  assign last_clamp = cycle_3d && y_clamp;

  always @(posedge gclk)
  begin
    y_clamp <= clamp;
    clamp_max_lod <= (cycle_4d) ? clamp_max_lod : clamp;
  end // always


  // convert lod to tile index

  always @(posedge gclk)
  begin

    // clamp lod

    if (clamp_max_lod || lod[14])
    begin
      lod_clamp[14:0] = 15'h7fff;
    end 
    else if ((lod[14:5] == 0) && (lod[4:0] < min_level[4:0]))
    begin
      lod_clamp[14:5] = 0;
      lod_clamp[4:0] = min_level[4:0];
    end 
    else
      lod_clamp[14:0] = lod[14:0];

    // delay tlut_en by 2 cycles

    tlut_en_1d <= tlut_en;
    tlut_en_2d <= tlut_en_1d;

    // determine if below finest lod
    
    min = (lod_clamp[14:5] == 0);

    // determine lod_index

    casez (lod_clamp[12:5])
      8'b1???????: lod_index[2:0] = 3'h7;
      8'b01??????: lod_index[2:0] = 3'h6;
      8'b001?????: lod_index[2:0] = 3'h5;
      8'b0001????: lod_index[2:0] = 3'h4;
      8'b00001???: lod_index[2:0] = 3'h3;
      8'b000001??: lod_index[2:0] = 3'h2;
      8'b0000001?: lod_index[2:0] = 3'h1;
      8'b00000001: lod_index[2:0] = 3'h0;
      8'b00000000: lod_index[2:0] = 3'h0;
      default: lod_index[2:0] = 3'bx;
    endcase

    // determine if greater than or equal to coarsest lod

    max = ((lod_clamp[14:13] != 0) || (lod_index[2:0] >= max_level[2:0]));

    // clamp lod_index

    index_clamp[2:0] = max ? max_level[2:0] : lod_index[2:0];

    // determine lod fraction
    
    case (lod_index[2:0])
      3'h0: fraction[7:0] = {lod_clamp[4:0], 3'b0};
      3'h1: fraction[7:0] = {lod_clamp[5:0], 2'b0};
      3'h2: fraction[7:0] = {lod_clamp[6:0], 1'b0};
      3'h3: fraction[7:0] = {lod_clamp[7:0]};
      3'h4: fraction[7:0] = {lod_clamp[8:1]};
      3'h5: fraction[7:0] = {lod_clamp[9:2]};
      3'h6: fraction[7:0] = {lod_clamp[10:3]};
      3'h7: fraction[7:0] = {lod_clamp[11:4]};
      default: fraction[7:0] = 8'hx;
    endcase

    // clamp fraction

    fract_clamp[7:0] = fraction[7:0];
    if (~sharp_en && ~detail_en)
    begin
      if (max) 
        fract_clamp[7:0] = 8'hff;
      else if (min)
	fract_clamp[7:0] = 8'h00;
    end
    
    // for now leave out detail texture tlu

    // negate based on sharpen (register)
    
    l_frac[8:0] <= {(min && sharp_en), fract_clamp[7:0]};
    l_frac_1d <= l_frac;
    l_frac_2d <= l_frac_1d;
    l_frac_3d <= l_frac_2d;
    l_frac_4d <= l_frac_3d;
    l_frac_5d <= l_frac_4d;
    l_frac_6d <= l_frac_5d;
    l_frac_7d <= l_frac_6d;
    l_frac_8d <= l_frac_7d;

    l_frac_out <= tlut_en_2d ? l_frac_8d : l_frac_6d;

    // determine which cycle

    force_cycle = st_span || cycle;
    cycle <= !force_cycle && ncyc && !load;
    cycle_1d <= cycle;
    cycle_2d <= cycle_1d;
    cycle_3d <= cycle_2d;
    cycle_4d <= cycle_3d;
    cycle_5d <= cycle_4d;

    st_span_1d <= st_span;
    st_span_2d <= st_span_1d;
    st_span_3d <= st_span_2d;

    // determine inc and inc2

    inc = (detail_en && ~cycle_5d && ~min) ||
          (detail_en && cycle_5d && (min || max)) ||
          (~detail_en && cycle_5d && (~min || sharp_en) && ~max);

    // delay load 

    load_1d <= load;
    load_2d <= load_1d;
    load_3d <= load_2d;
    load_4d <= load_3d;
    load_5d <= load_4d;
    load_6d <= load_5d;
    load_7d <= load_6d;
    load_8d <= load_7d;
    load_9d <= load_8d;

    if (load_6d)
      inc = 0;
    else if (~lod_en)
      inc = cycle_5d;

    inc2 = detail_en && cycle_5d && ~(min || max);

    if (load_6d || ~lod_en)
      inc2 = 0;

    // add (register)

    l_tile[2:0] <= (index_clamp[2:0] & {3{lod_en}} & {3{~load_6d}}) + 
		   prim_tile[2:0] + inc + (inc2 << 1);

  end // always

endmodule // tc_lod


module mx2_1_19bit (sel, i_zero, i_one, out);

  input  sel;
  input  [18:0] i_zero;
  input  [18:0] i_one;
  output [18:0] out;

  mx21d1h mx18  (.z(out[18]), .i0(i_zero[18]), .i1(i_one[18]), .s(sel));
  mx21d1h mx17  (.z(out[17]), .i0(i_zero[17]), .i1(i_one[17]), .s(sel));
  mx21d1h mx16  (.z(out[16]), .i0(i_zero[16]), .i1(i_one[16]), .s(sel));
  mx21d1h mx15  (.z(out[15]), .i0(i_zero[15]), .i1(i_one[15]), .s(sel));
  mx21d1h mx14  (.z(out[14]), .i0(i_zero[14]), .i1(i_one[14]), .s(sel));
  mx21d1h mx13  (.z(out[13]), .i0(i_zero[13]), .i1(i_one[13]), .s(sel));
  mx21d1h mx12  (.z(out[12]), .i0(i_zero[12]), .i1(i_one[12]), .s(sel));
  mx21d1h mx11  (.z(out[11]), .i0(i_zero[11]), .i1(i_one[11]), .s(sel));
  mx21d1h mx10  (.z(out[10]), .i0(i_zero[10]), .i1(i_one[10]), .s(sel));
  mx21d1h mx9  (.z(out[9]), .i0(i_zero[9]), .i1(i_one[9]), .s(sel));
  mx21d1h mx8  (.z(out[8]), .i0(i_zero[8]), .i1(i_one[8]), .s(sel));
  mx21d1h mx7  (.z(out[7]), .i0(i_zero[7]), .i1(i_one[7]), .s(sel));
  mx21d1h mx6  (.z(out[6]), .i0(i_zero[6]), .i1(i_one[6]), .s(sel));
  mx21d1h mx5 (.z(out[5]), .i0(i_zero[5]), .i1(i_one[5]), .s(sel));
  mx21d1h mx4 (.z(out[4]), .i0(i_zero[4]), .i1(i_one[4]), .s(sel));
  mx21d1h mx3 (.z(out[3]), .i0(i_zero[3]), .i1(i_one[3]), .s(sel));
  mx21d1h mx2 (.z(out[2]), .i0(i_zero[2]), .i1(i_one[2]), .s(sel));
  mx21d1h mx1 (.z(out[1]), .i0(i_zero[1]), .i1(i_one[1]), .s(sel));
  mx21d1h mx0 (.z(out[0]), .i0(i_zero[0]), .i1(i_one[0]), .s(sel));

endmodule // mx2_1_19bit