st.c 12.1 KB
/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright law.  They  may  not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 **************************************************************************/

/*
 *  Texture/Color/Z Stepper Unit
 *
 *
 *
 *
 */
#include <stdio.h>
#include "st.h"


#define POSEDGE         (save_clk && !save_clk_old)
#define NEGEDGE         (!save_clk && save_clk_old)


#define SIGN_EXTEND_27(x)		(((x) & 0x4000000) ? ((x) | ~0x3ffffff) : (x))
#define SIGN_EXTEND_22(x)		(((x) & 0x200000) ? ((x) | ~0x1fffff) : (x))
#define SIGN_EXTEND_13(x)		(((x) & 0x1000) ? ((x) | ~0xfff) : (x))
#define SIGN_EXTEND_16(x)		(((x) & 0x8000) ? ((x) | ~0x7fff) : (x))
#define SIGN_EXTEND_17(x)               (((x) & 0x10000) ? ((x) | ~0xffff) : (x))

#define ACCUM_MASK			0x1fffff /* 10.11, unsigned */


/*----------------------------------------------------------------------------
 *  rgba_offset - offset attributes R/G/B/A to nearest subpixel
 *
 *  formats:
 *    att_data: 10.2
 *    dx:       s,10.2
 *    dy:       s,10.2
 *    x_off:    .2
 *    y_off:    .2
 *
 *  returns:
 *    9.0
 *----------------------------------------------------------------------------
 */
int
  rgba_offset(int att_data, int dx, int dy, int x_off, int y_off)
{
  int x_prod;
  int y_prod;

  /*
  x_prod = (SIGN_EXTEND_13(dx) * x_off) >> 2;
  y_prod = (SIGN_EXTEND_13(dy) * y_off) >> 2;
  
  return((att_data + x_prod + y_prod) >> 2); 
  */

  /* Change to reflect instantiated 10.4 multipliers */

  x_prod = SIGN_EXTEND_13(dx) * x_off;
  y_prod = SIGN_EXTEND_13(dy) * y_off;
  
  return(((att_data << 2) + x_prod + y_prod) >> 4);
}

/*----------------------------------------------------------------------------
 *  z_offset - offset attribute Z to nearest subpixel
 *
 *  formats:
 *    att_data: 14.4  Check these?
 *    dx:       14.4  Check these?
 *    dy:       14.4  Check these?
 *    x_off:    .2
 *    y_off:    .2
 *
 *  returns:
 *    ?.?
 *----------------------------------------------------------------------------
 */
int
  z_offset(int att_data, int dx, int dy, int x_off, int y_off)
{
  int x_prod;
  int y_prod;

  /*
  x_prod = (SIGN_EXTEND_22(dx) * x_off) >> 2;
  y_prod = (SIGN_EXTEND_22(dy) * y_off) >> 2;

  return(att_data + x_prod + y_prod) >> 3;
  */

  /* Change to reflect instantiated s15.8 multipliers */

  x_prod = SIGN_EXTEND_22(dx) * x_off;
  y_prod = SIGN_EXTEND_22(dy) * y_off;

  return(((att_data << 2) + x_prod + y_prod) >> 5);  
}

/*----------------------------------------------------------------------------
 -  clr_clamp() - do some overflow/underflow clamp logic
 -  color input as 9.0 number.  color output as unsigned 8 bit number
 -
 -  based on Akeley method:   Carry one extra bit of precision through all computat
 -  and never clamp until the last computation is done (e.g. never clamp
 -  intermediate values, especially those that continue to be interpolated).
 -  When done, clamp the result if the MSB (extra bit) is one.  In this case,
 -  clamp to all zeros if the next most significant bit is 1, and to all ones
 -  if the next most significant bit is zero.  (To undo the wrap.)
 -----------------------------------------------------------------------------*/
static int
  clr_clamp(int clr)
{
  if(0x100 & clr)  /* clamp */
  {
    if(0x80 & clr) /* clamp to 0 */
      return(0);
    else /* clamp to 0.99 */
      return(0xff);
  }  
  else
    return(clr & 0xff);
}

/*----------------------------------------------------------------------------
 -  z_clamp() - do some overflow/underflow clamp logic
 -  Z input as s,15.3 number.  Z output as unsigned 15.3 bit number
 -
 -  based on Akeley method:   Carry one extra bit of precision through all computat
 -  and never clamp until the last computation is done (e.g. never clamp
 -  intermediate values, especially those that continue to be interpolated).
 -  When done, clamp the result if the MSB (extra bit) is one.  In this case,
 -  clamp to all zeros if the next most significant bit is 1, and to all ones
 -  if the next most significant bit is zero.  (To undo the wrap.)
 -----------------------------------------------------------------------------*/
static int
  z_clamp(int z)
{
  
  if(0x40000 & z)  /* clamp */
  {
    if(0x20000 & z) /* clamp to 0 */
      return(0);
    else /* clamp to 0.99 */
      return(0x3ffff);
  }  
  else
    return(z & 0x3ffff);
}



/*----------------------------------------------------------------------------
 -  st() - interface routine for Texture/Color/Z steppers
 -
 -----------------------------------------------------------------------------*/
void
  st(st_t **pp0, st_t **pp1)
{
  /* pointers to memory structure */
  st_t *p0, *p1;
  int save_clk;
  int save_clk_old;

  /* temporary signals */
  int sr, sg, sb, sa;
  int t;

  /*
   *  Get pointers, clocks
   */
  p0 = *pp0;
  p1 = *pp1;
  save_clk = p0->gclk;
  save_clk_old = p1->gclk_old;

  if(POSEDGE)
  {
    /* transfer all next-clock register values to register outputs. */
    *pp0 = p1; /* swap */
    *pp1 = p0;
    p0 = *pp0; /* fix pointers */
    p1 = *pp1;
 
 
    /*
     *             Accumulators
     */



    /*
     *  Red
     */
    p0->comp_new_r = p1->startspan_r ? !p1->ncyc_r : 
                          (p1->ncyc_r ? !p1->comp_new_r : p1->comp_new_r);
    if(p1->startspan_r)
      p0->accumr = p1->ep_st_r << 5; /* pad lsbs with zeros */
    else if(!p1->comp_new_r)
      p0->accumr = p1->accumr; 
    else if(p1->left_r)
      p0->accumr = (int)p1->accumr + SIGN_EXTEND_22(p1->at_st_dxr);
    else /* right major */
      p0->accumr = (int)p1->accumr - SIGN_EXTEND_22(p1->at_st_dxr);

    /*
     *  Green
     */
    p0->comp_new_g = p1->startspan_g ? !p1->ncyc_g : 
                          (p1->ncyc_g ? !p1->comp_new_g : p1->comp_new_g);
    if(p1->startspan_g)
      p0->accumg = p1->ep_st_g << 5; /* pad lsbs with zeros */
    else if(!p1->comp_new_g)
      p0->accumg = p1->accumg; 
    else if(p1->left_g)
      p0->accumg = (int)p1->accumg + SIGN_EXTEND_22(p1->at_st_dxg);
    else /* right major */
      p0->accumg = (int)p1->accumg - SIGN_EXTEND_22(p1->at_st_dxg);

    /*
     *  Blue
     */
    p0->comp_new_b = p1->startspan_b ? !p1->ncyc_b : 
                          (p1->ncyc_b ? !p1->comp_new_b : p1->comp_new_b);
    if(p1->startspan_b)
      p0->accumb = p1->ep_st_b << 5; /* pad lsbs with zeros */
    else if(!p1->comp_new_b)
      p0->accumb = p1->accumb; 
    else if(p1->left_b)
      p0->accumb = (int)p1->accumb + SIGN_EXTEND_22(p1->at_st_dxb);
    else /* right major */
      p0->accumb = (int)p1->accumb - SIGN_EXTEND_22(p1->at_st_dxb);

    /*
     *  Alpha
     */
    p0->comp_new_a = p1->startspan_a ? !p1->ncyc_a : 
                          (p1->ncyc_a ? !p1->comp_new_a : p1->comp_new_a);
    if(p1->startspan_a)
      p0->accuma = p1->ep_st_a << 5; /* pad lsbs with zeros */
    else if(!p1->comp_new_a)
      p0->accuma = p1->accuma; 
    else if(p1->left_a)
      p0->accuma = (int)p1->accuma + SIGN_EXTEND_22(p1->at_st_dxa);
    else /* right major */
      p0->accuma = (int)p1->accuma - SIGN_EXTEND_22(p1->at_st_dxa);

    /*
     *  Z
     */
    p0->comp_new_z = p1->startspan_z ? !p1->ncyc_z : 
                          (p1->ncyc_z ? !p1->comp_new_z : p1->comp_new_z);
    if(p1->startspan_z)
      p0->accumz = p1->ep_st_z << 10; /* pad lsbs with zeros */
    else if(!p1->comp_new_z)
      p0->accumz = p1->accumz; 
    else if(p1->left_z)
      p0->accumz = (int)p1->accumz + (int)p1->at_st_dxz;
    else /* right major */
      p0->accumz = (int)p1->accumz - (int)p1->at_st_dxz;

    /*
     *  S
     */
    p0->comp_new_ps = p1->startspan_s ? !p1->ncyc_s : 
                          (p1->ncyc_s ? !p1->comp_new_ps : p1->comp_new_ps);

    p0->comp_new_s = p0->comp_new_ps && p1->ncyc_s;

    if(p1->startspan_s)
      p0->accums = p1->ep_st_s << 5; /* pad lsbs with zeros */
    else if(!p1->comp_new_ps)
      p0->accums = p1->accums; 
    else if(p1->left_s)
      p0->accums = (int)p1->accums + (int)p1->at_st_dxs;
    else /* right major */
      p0->accums = (int)p1->accums - (int)p1->at_st_dxs;

    /* new dy adder for LOD fix */
    if(!p1->comp_new_ps)
      p0->st_tc_s_dy = (((int)SIGN_EXTEND_27(p1->accums) + 
			(int)(SIGN_EXTEND_17(p1->at_st_dys) << 10)) >> 11) & 0xffff;
    else if(p1->left_s)
      p0->st_tc_s_dy = (((int)p1->accums + (int)p1->at_st_dxs) >> 11) & 0xffff;
    else
      p0->st_tc_s_dy = (((int)p1->accums - (int)p1->at_st_dxs) >> 11) & 0xffff;

    /*
     *  T
     */
    p0->comp_new_pt = p1->startspan_t ? !p1->ncyc_t : 
                          (p1->ncyc_t ? !p1->comp_new_pt : p1->comp_new_pt);

    p0->comp_new_t = p0->comp_new_pt && p1->ncyc_t;

    /* this mux would normally be in the edge walker pipe module,
	but the Csim doesn't like zero delay signals in a module so we
	put the mux here instead. */
    p0->tc_load_d1 = p1->tc_load;
    t = (p1->tlut_en && !p1->tc_load_d1) ? p1->ew_d : p1->ep_st_t;

    if(p1->startspan_t)
      p0->accumt = t << 5;
    else if(!p1->comp_new_pt)
      p0->accumt = p1->accumt;  /* pad lsbs with zeros */
    else if(p1->left_t)
      p0->accumt = (int)p1->accumt + (int)p1->at_st_dxt;
    else /* right major */
      p0->accumt = (int)p1->accumt - (int)p1->at_st_dxt;

    /* new dy adder for LOD fix */
    if(!p1->comp_new_pt)
      p0->st_tc_t_dy = (((int)SIGN_EXTEND_27(p1->accumt) + 
			(int)(SIGN_EXTEND_17(p1->at_st_dyt) << 10)) >> 11) & 0xffff;
    else if(p1->left_t)
      p0->st_tc_t_dy = (((int)p1->accumt + (int)p1->at_st_dxt) >> 11) & 0xffff;
    else
      p0->st_tc_t_dy = (((int)p1->accumt - (int)p1->at_st_dxt) >> 11) & 0xffff;

    /*
     *  W
     */
    p0->comp_new_pw = p1->startspan_w ? !p1->ncyc_w : 
                          (p1->ncyc_w ? !p1->comp_new_pw : p1->comp_new_pw);

    p0->comp_new_w = p0->comp_new_pw && p1->ncyc_w;

    if(p1->startspan_w)
      p0->accumw = p1->ep_st_w << 5; /* pad lsbs with zeros */
    else if(!p1->comp_new_pw)
      p0->accumw = p1->accumw; 
    else if(p1->left_w)
      p0->accumw = (int)p1->accumw + (int)p1->at_st_dxw;
    else /* right major */
      p0->accumw = (int)p1->accumw - (int)p1->at_st_dxw;

    /* new dy adder for LOD fix */
    if(!p1->comp_new_pw)
      p0->st_tc_w_dy = (((int)SIGN_EXTEND_27(p1->accumw) + 
			(int)(SIGN_EXTEND_17(p1->at_st_dyw) << 10)) >> 11) & 0xffff;
    else if(p1->left_w)
      p0->st_tc_w_dy = (((int)p1->accumw + (int)p1->at_st_dxw) >> 11) & 0xffff;
    else
      p0->st_tc_w_dy = (((int)p1->accumw - (int)p1->at_st_dxw) >> 11) & 0xffff;

    /*
     *  Output Stepped Attributes
     */

    p0->st_cc_r = clr_clamp(rgba_offset(p1->accumr >> 9, p1->at_st_dxr >> 9, 
		      p1->at_st_dyr, p1->x_offsetr, p1->y_offsetr));
 
    p0->st_cc_g = clr_clamp(rgba_offset(p1->accumg >> 9, p1->at_st_dxg >> 9, 
		      p1->at_st_dyg, p1->x_offsetg, p1->y_offsetg));

    p0->st_cc_b = clr_clamp(rgba_offset(p1->accumb >> 9, p1->at_st_dxb >> 9, 
		      p1->at_st_dyb, p1->x_offsetb, p1->y_offsetb));

    p0->st_cc_a = clr_clamp(rgba_offset(p1->accuma >> 9, p1->at_st_dxa >> 9, 
		      p1->at_st_dya, p1->x_offseta, p1->y_offseta));

    p0->st_bl_z = z_clamp(z_offset(p1->accumz >> 10, p1->at_st_dxz >> 10, 
		      p1->at_st_dyz, p1->x_offsetz, p1->y_offsetz));

    p0->st_tc_s = (p0->accums >> 11) & 0xffff;
    p0->st_tc_t = (p0->accumt >> 11) & 0xffff;
    p0->st_tc_w = (p0->accumw >> 11) & 0xffff;

  } /* posedge */

  /* save last clock state */
   p0->gclk_old = p1->gclk_old = save_clk;
}


/*----------------------------------------------------------------------------
 -  st_init()
 -
 -----------------------------------------------------------------------------*/
void
  st_init(st_t *p0, st_t *p1)
{
    p1->gclk = p0->gclk = 0;
    p1->gclk_old = p0->gclk_old = 0;
}