vudp_nonrf.v 14.4 KB

Raw Blame History Permalink

/*
*************************************************************************
*									*
*               Copyright (C) 1994, Silicon Graphics, Inc.		*
*									*
*  These coded instructions, statements, and computer programs  contain	*
*  unpublished  proprietary  information of Silicon Graphics, Inc., and	*
*  are protected by Federal copyright  law.  They  may not be disclosed	*
*  to  third  parties  or copied or duplicated in any form, in whole or	*
*  in part, without the prior written consent of Silicon Graphics, Inc.	*
*									*
*************************************************************************
*/

/*
*************************************************************************
*									*
*  Project Reality							*
*									*
*  module:	vudp_nonrf.v						*
*  description:	This module contains the standard cell functionality	*
*		for the vector unit datapath except for the register	*
*		file and scalar muxes which done in a separate module.	*
*									*
*									*
*  designer:	Brian Ferguson						*
*  date:	3/20/95							*
*									*
*************************************************************************
*/

// $Id: vudp_nonrf.v,v 1.1 2002/03/28 00:26:14 berndt Exp $

module vudp_nonrf (
			clk,
			reset_l,
			vrf_vsdata_mu, vrf_vtdata_mu,
			vdpsumuprmu,
			vdpcompvtmu, vdpsumlwrmu, vdpalumu_ovr, vdpalumu_cout,
			vdp_aluctl_vt_mu,
			vdpalumu_cin, vdpalumu, vdpcoutlwrmu, vdpzerodetvtmu_z,
			vdpcoutuprmu,
			vdponedetalumu_z, vdpzerodetalumu_z, vdpzerodetvsmu_z,
			vdpaccumlwrac, vdpadderlwrac_cin,
			vdpcsaac_cout, vdpcsaac_cin, vdpcsalwrcinac, vdpcsalwrbinac,
			vdpcsalwrainac, vdpslctmxwb,
			vdpaccummidac, vdpcsauprainac,
			vdpcsauprbinac, vdpcsauprcinnandac, vdpincremac_dwn, vdpincremac_cout,
			vdpincremac_cin,
			vdpadderlwrac_ovr, vdpadderlwrac_cout,
			vdpadderuprac_ovr, vdpadderuprac_cout,
			vdpincremxac, vdpaccuprmxac, vdpacchighonewb_z,
			vdpacchighzerowb_z, vdpaccmidzerowb_z, vdpdivrsltwb,
			vumsumlowermu, vumcarrylowermu, vumsumuppermu, vumcarryuppermu,
			vdprundvluac, vdpaccsign15wb, vdpaccsign31wb,
			vdpaccsign21wb, vdpaccsign47wb, vdpclprsltwb,
			vdp_rslt_data_wb
	      );


input	clk;

input	reset_l;			// active low reset signal

input	[15:0]	vrf_vsdata_mu;		// vs port read data from register file
input	[15:0]	vrf_vtdata_mu;		// vt port read data from register file

input   vdpalumu_cin;
input   [2:0] vdpalumu;
input	vdp_aluctl_vt_mu;
input   vdpcompvtmu;

input   [1:0] vdpsumlwrmu;
input   [1:0] vdpsumuprmu;
input   [1:0] vdpcoutlwrmu;
input   [1:0] vdpcoutuprmu;
input   [15:0] vumsumlowermu;
input   [15:0] vumcarrylowermu;
input   [15:0] vumsumuppermu;
input   [15:0] vumcarryuppermu;

input   vdpadderlwrac_cin;
input   vdpcsaac_cin;
input   vdpcsalwrcinac;
input   [1:0] vdpcsalwrbinac;
input   [1:0] vdpcsalwrainac;
input   [1:0] vdpcsauprbinac;
input   [1:0] vdpcsauprainac;
input   vdpcsauprcinnandac;
input   vdpincremac_dwn;
input   vdpincremac_cin;
input   [3:0] vdprundvluac;
input   [1:0] vdpaccumlwrac;
input   [1:0] vdpaccummidac;
input   [1:0] vdpaccuprmxac;
input   vdpincremxac;

input   [2:0] vdpslctmxwb;
input   [15:0] vdpdivrsltwb;
input   [2:0] vdpclprsltwb;


output  vdpalumu_ovr;
output  vdpalumu_cout;
output  vdpzerodetvtmu_z;
output  vdponedetalumu_z;
output  vdpzerodetalumu_z;
output  vdpzerodetvsmu_z;

output  vdpcsaac_cout;
output  vdpincremac_cout;
output  vdpadderlwrac_ovr;
output  vdpadderlwrac_cout;
output  vdpadderuprac_ovr;
output  vdpadderuprac_cout;

output  vdpacchighonewb_z;
output  vdpacchighzerowb_z;
output  vdpaccmidzerowb_z;
output  vdpaccsign15wb;
output  vdpaccsign31wb;
output  vdpaccsign21wb;
output  vdpaccsign47wb;

output	[15:0]	vdp_rslt_data_wb;


/*
*	Zero detect on vs and vt operands in the mu pipeline stage.
*/

dp_zerodet vdpzerodetvtmu_i (
				.input_data		(vrf_vtdata_mu),
				.out			(vdpzerodetvtmu_z)
			    ) ;

dp_zerodet vdpzerodetvsmu_i (
				.input_data		(vrf_vsdata_mu),
				.out			(vdpzerodetvsmu_z)
			    ) ;

/*
*	assign	vdpzerodetvtmu_z =	( vrf_vtdata_mu == 16'h0000 ) ;
*
*	assign	vdpzerodetvsmu_z =	( vrf_vsdata_mu == 16'h0000 ) ;
*/

/*
*	 Conditional complementing of vt operand in the mu pipeline stage.
*/

	wire	[15:0]	vdp_xnor_vt_mu;		// conditionally complemented version of vt

	assign	vdp_xnor_vt_mu =	( {16{vdpcompvtmu}} ~^ vrf_vtdata_mu ) ;


/*
*	ALU funtionality in the mu pipeline stage.
*/

	wire	[15:0]	vdp_adder_rslt_mu;	// adder result in mu pipeline stage
	wire	[15:0]	vdp_and_rslt_mu;	// logical AND result in mu pipeline stage
	wire	[15:0]	vdp_nand_rslt_mu;	// logical NAND result in mu pipeline stage
	wire	[15:0]	vdp_or_rslt_mu;		// logical OR result in mu pipeline stage
	wire	[15:0]	vdp_nor_rslt_mu;	// logical NOR result in mu pipeline stage
	wire	[15:0]	vdp_xor_rslt_mu;	// logical XOR result in mu pipeline stage
	wire	[15:0]	vdp_xnor_rslt_mu;	// logical XNOR result in mu pipeline stage

	wire	[15:0]	vdp_alu_rslt_mu;	// final alu result in mu pipeline stage


	wire	[15:0]	vdp_alucmp_vt_mu;	// conditionally complemented version of vt for alu

	assign	vdp_alucmp_vt_mu =	( {16{vdp_aluctl_vt_mu}} ^ vrf_vtdata_mu ) ;


dp_adder16 alu_adder_i (
				.input1			(vrf_vsdata_mu),
				.input2			(vdp_alucmp_vt_mu),
				.carryin		(vdpalumu_cin),
				.sumout			(vdp_adder_rslt_mu),
				.carryout14		(vdpalumu_ovr),
				.carryout		(vdpalumu_cout)
		       ) ;


dp_zerodet vdpzerodetalumu_i (
				.input_data		(vdp_adder_rslt_mu),
				.out			(vdpzerodetalumu_z)
			    ) ;

dp_onedet vdponedetalumu_i (
				.input_data		(vdp_adder_rslt_mu),
				.out			(vdponedetalumu_z)
			    ) ;

/*
*	assign	vdpzerodetalumu_z =	( vdp_adder_rslt_mu == 16'h0000 ) ;
*
*	assign	vdponedetalumu_z =	( vdp_adder_rslt_mu == 16'hffff ) ;
*/

	assign	vdp_and_rslt_mu =	( vrf_vsdata_mu & vrf_vtdata_mu ) ;

	assign	vdp_nand_rslt_mu =	~( vrf_vsdata_mu & vrf_vtdata_mu ) ;

	assign	vdp_or_rslt_mu =	( vrf_vsdata_mu | vrf_vtdata_mu ) ;

	assign	vdp_nor_rslt_mu =	~( vrf_vsdata_mu | vrf_vtdata_mu ) ;

	assign	vdp_xor_rslt_mu =	( vrf_vsdata_mu ^ vrf_vtdata_mu ) ;

	assign	vdp_xnor_rslt_mu =	( vrf_vsdata_mu ~^ vrf_vtdata_mu ) ;


dp_8to1mx16 vdpalumu_i (
				.input0		(vdp_adder_rslt_mu),
				.input1		(vdp_and_rslt_mu),
				.input2		(vdp_nand_rslt_mu),
				.input3		(vdp_or_rslt_mu),
				.input4		(vdp_nor_rslt_mu),
				.input5		(vdp_xor_rslt_mu),
				.input6		(vdp_xnor_rslt_mu),
				.input7		(vrf_vtdata_mu),
				.select		(vdpalumu),
				.output_data	(vdp_alu_rslt_mu)
		         ) ;


/*
*	registers at the end of the mu pipeline stage.
*/

	wire	[15:0]	vdp_couupr_reg_ac;  // multiply cout upper 16 bits at beginning of ac stage

dp_regmx16 vdpcoutuprmu_i (
				.clk		(clk),
				.input0		(vumcarryuppermu),
				.input1		(vdp_alu_rslt_mu),
				.select		(vdpcoutuprmu),
				.output_data	(vdp_couupr_reg_ac)
		           ) ;


	wire	[15:0]	vdp_sumupr_reg_ac;  // multiply sum upper 16 bits at beginning of ac stage

dp_regmx16 vdpsumuprmu_i (
				.clk		(clk),
				.input0		(vumsumuppermu),
				.input1		(vrf_vsdata_mu),
				.select		(vdpsumuprmu),
				.output_data	(vdp_sumupr_reg_ac)
		          ) ;


	wire	[15:0]	vdp_coulwr_reg_ac;  // multiply cout lwr 16 bits at beginning of ac stage

dp_regmx16 vdpcoutlwrmu_i (
				.clk		(clk),
				.input0		(vumcarrylowermu),
				.input1		(vdp_alu_rslt_mu),
				.select		(vdpcoutlwrmu),
				.output_data	(vdp_coulwr_reg_ac)
		           ) ;


	wire	[15:0]	vdp_sumlwr_reg_ac;	// multiply sum lwr 16 bits at beginning of ac pipeline stage

dp_regmx16 vdpsumlwrmu_i (
				.clk		(clk),
				.input0		(vumsumlowermu),
				.input1		(vdp_xnor_vt_mu),
				.select		(vdpsumlwrmu),
				.output_data	(vdp_sumlwr_reg_ac)
		          ) ;


/*
*	Muxes for input operands to lower 16 bits of adder.
*/

	wire	[15:0]	vdp_csalwra_in_ac;  // input a to lower csa in AC pipeline stage
	wire	[15:0]	vdp_csalwrb_in_ac;  // input b to lower csa in AC pipeline stage
	wire	[15:0]	vdp_csalwrc_in_ac;  // input c to lower csa in AC pipeline stage
	wire	[15:0]	vdp_rndlwr_in_ac;   // round data input to lower csa in AC pipeline stage


	assign vdp_rndlwr_in_ac =	{ vdprundvluac[3], {9{vdprundvluac[2]}},
					  vdprundvluac[1], {5{vdprundvluac[0]}}
					} ;


dp_2to1mx16 vdpcsalwrcinac_i (
				.input0		(vdp_sumlwr_reg_ac),
				.input1		(vdp_sumupr_reg_ac),
				.select		(vdpcsalwrcinac),
				.output_data	(vdp_csalwrc_in_ac)
			       ) ;


dp_4to1mx16 vdpcsalwrbinac_i (
				.input0		(16'h0000),
				.input1		(vdp_coulwr_reg_ac),
				.input2		(vdp_rndlwr_in_ac),
				.input3		(vdp_couupr_reg_ac),
				.select		(vdpcsalwrbinac),
				.output_data	(vdp_csalwrb_in_ac)
			       ) ;


	wire	[15:0]	vdp_acmid_reg_wb;  // middle 16 bits of accumulator at start of wb stage

	wire	[15:0]	vdp_aclwr_reg_wb;  // lower 16 bits of accumulator at start of wb stage

dp_4to1mx16 vdpcsalwrainac_i (
				.input0		(16'h0000),
				.input1		(vdp_aclwr_reg_wb),
				.input2		(vdp_acmid_reg_wb),
				.input3		(vdp_rndlwr_in_ac),
				.select		(vdpcsalwrainac),
				.output_data	(vdp_csalwra_in_ac)
			       ) ;

/*
*	Muxes for input operands to upper 16 bits of adder.
*/

	wire	[15:0]	vdp_csaupra_in_ac;  // input a to upper csa in AC pipeline stage
	wire	[15:0]	vdp_csauprb_in_ac;  // input b to upper csa in AC pipeline stage
	wire	[15:0]	vdp_csauprc_in_ac;  // input c to upper csa in AC pipeline stage
	wire	[15:0]	vdp_rndupr_in_ac;   // round data input to upper csa in AC pipeline stage


	assign vdp_rndupr_in_ac =	{ {16{vdprundvluac[3]}} } ;

	assign vdp_csauprc_in_ac =	( {16{vdpcsauprcinnandac}} & vdp_sumupr_reg_ac ) ;

dp_4to1mx16 vdpcsauprbinac_i (
				.input0		(16'h0000),
				.input1		(vdp_couupr_reg_ac),
				.input2		(vdp_rndupr_in_ac),
				.input3		(16'h0000),
				.select		(vdpcsauprbinac),
				.output_data	(vdp_csauprb_in_ac)
			       ) ;


	wire	[15:0]	vdp_acupr_reg_wb;	// upper 16 bits of accumulator at start of wb stage

dp_4to1mx16 vdpcsauprainac_i (
				.input0		(16'h0000),
				.input1		(vdp_acmid_reg_wb),
				.input2		(vdp_acupr_reg_wb),
				.input3		(16'h0000),
				.select		(vdpcsauprainac),
				.output_data	(vdp_csaupra_in_ac)
			       ) ;
/*
*	Three input carry save adder for 32 bits of accumulation.
*/

	wire	[31:0]	vdp_csa_ina_ac;  // input a to CSA in AC pipeline stage
	wire	[31:0]	vdp_csa_inb_ac;  // input b to CSA in AC pipeline stage
	wire	[31:0]	vdp_csa_inc_ac;  // input c to CSA in AC pipeline stage
	wire	[31:0]	vdp_add_ina_ac;  // input a to adder in AC pipeline stage
	wire	[31:0]	vdp_add_inb_ac;  // input b to adder in AC pipeline stage
	wire	[31:0]	vdp_add_out_ac;  // result from adder in AC pipeline stage

	assign	vdp_csa_ina_ac =	{ vdp_csaupra_in_ac, vdp_csalwra_in_ac } ;

	assign	vdp_csa_inb_ac =	{ vdp_csauprb_in_ac, vdp_csalwrb_in_ac } ;

	assign	vdp_csa_inc_ac =	{ vdp_csauprc_in_ac, vdp_csalwrc_in_ac } ;


dp_csa32 vdpcsaac_i	(
				.carryin	(vdpcsaac_cin),
				.input1		(vdp_csa_ina_ac),
				.input2		(vdp_csa_inc_ac),
				.input3		(vdp_csa_inb_ac),
				.partial_sum	(vdp_add_ina_ac),
				.partial_carry	(vdp_add_inb_ac),
				.carryout	(vdpcsaac_cout)
			) ;


/*
*	Two input carry propagate adder for 32 bits of accumulation.
*/

	wire	[15:0]	vdp_add_lwr_ac;		// adder output for lower 16 bits in ac pipeline stage

dp_adder32 vdpadderac_i (
				.input1		(vdp_add_ina_ac),
				.input2		(vdp_add_inb_ac),
				.carryin	(vdpadderlwrac_cin),
				.sumout		(vdp_add_out_ac),
				.carryout14	(vdpadderlwrac_ovr),
				.carryout15	(vdpadderlwrac_cout),
				.carryout30	(vdpadderuprac_ovr),
				.carryout31	(vdpadderuprac_cout)
			   ) ;


/*
*	Upper 16 bits of accumulation is implemented with incrementer/decrementer since
*	we are really adding one 48 bit number (accumulator) to two 32 bit numbers
*	partial_carry and partial_sum.
*/

	wire	[15:0]	vdp_incr_data_ac;	// output of incrementer/decrementer in ac stage
	wire	[15:0]	vdp_addupr_sg_ac;	// sign extended output of upper adder in ac stage


	assign vdp_addupr_sg_ac =	{ 16{vdp_add_out_ac[31]} } ;


dp_incdec16 vdpincremac_i (
				.input_data		(vdp_acupr_reg_wb),
				.decrement		(vdpincremac_dwn),
				.carryin		(vdpincremac_cin),
				.output_data		(vdp_incr_data_ac),
				.carryout		(vdpincremac_cout)
		       	    ) ;


	wire	[15:0]	vdp_incr_mx_ac;		// output of incrementer mux in ac pipeline stage

dp_2to1mx16 vdpincremxac_i (
				.input0			(vdp_addupr_sg_ac),
				.input1			(vdp_incr_data_ac),
				.select			(vdpincremxac),
				.output_data		(vdp_incr_mx_ac)
			   ) ;

/*
*	registers at the end of the ac pipeline stage.
*/

dp_regmx16hp vdpaccumuprac_i (
				.clk		(clk),
				.input0		(vdp_incr_mx_ac),
				.input1		(vdp_add_out_ac[31:16]),
				.select		(vdpaccuprmxac),
				.output_data	(vdp_acupr_reg_wb)
		            ) ;


dp_regmx16 vdpaccummidac_i (
				.clk		(clk),
				.input0		(vdp_add_out_ac[31:16]),
				.input1		(vdp_add_out_ac[15:0]),
				.select		(vdpaccummidac),
				.output_data	(vdp_acmid_reg_wb)
		            ) ;


dp_regmx16 vdpaccumlwrac_i (
				.clk		(clk),
				.input0		(vdp_add_out_ac[15:0]),
				.input1		(vdp_add_out_ac[31:16]),
				.select		(vdpaccumlwrac),
				.output_data	(vdp_aclwr_reg_wb)
		            ) ;

	assign	vdpaccsign15wb =	vdp_aclwr_reg_wb[15] ;

	assign	vdpaccsign21wb =	vdp_acmid_reg_wb[5] ;

	assign	vdpaccsign31wb =	vdp_acmid_reg_wb[15] ;

	assign	vdpaccsign47wb =	vdp_acupr_reg_wb[15] ;


/*
*	Zero and all ones detection on upper portion of accumulator.
*/


dp_zerodet vdpacchighzerowb_i (
				.input_data		(vdp_acupr_reg_wb),
				.out			(vdpacchighzerowb_z)
			      ) ;

dp_onedet vdpacchighonewb_i (
				.input_data		(vdp_acupr_reg_wb),
				.out			(vdpacchighonewb_z)
			    ) ;
/*
*	assign	vdpacchighzerowb_z =	( vdp_acupr_reg_wb == 16'h0000 ) ;
*
*	assign	vdpacchighonewb_z =	( vdp_acupr_reg_wb == 16'hffff ) ;
*/


/*
*	Zero and all ones detection on middle portion of accumulator.
*/

	assign	vdpaccmidzerowb_z =	( vdp_acmid_reg_wb[15:5] == 11'h000 ) ;


/*
*	Mux for selecting the result for writing to the register file.
*/

	wire	[15:0]	vdp_exclp_rslt_wb;	// expanded 16 bits clamp result

	assign	vdp_exclp_rslt_wb =	{ 	vdpclprsltwb[2],
						{11{vdpclprsltwb[1]}},
						{4{vdpclprsltwb[0]}}
					} ;

	wire	[15:0]	vdp_mulq_rslt_wb;	// 16 bit mulq resutl

	assign	vdp_mulq_rslt_wb =	{ 	vdp_acupr_reg_wb[0],
						vdp_acmid_reg_wb[15:5],
						4'h0
					} ;

	wire	[15:0]	vdp_slct_data_wb;

dp_8to1mx16 vdpslctmxwb_i (
				.input0		(16'h0000),
				.input1		(vdp_acupr_reg_wb),
				.input2		(vdp_acmid_reg_wb),
				.input3		(vdp_aclwr_reg_wb),
				.input4		(vdp_mulq_rslt_wb),
				.input5		(vdp_exclp_rslt_wb),
				.input6		(vdpdivrsltwb),
				.input7		(16'h0000),
				.select		(vdpslctmxwb),
				.output_data	(vdp_slct_data_wb)
		     ) ;


dp_buf16 buf_wb_rslt (
			.input_data		(vdp_slct_data_wb),
			.output_data		(vdp_rslt_data_wb)
		      ) ;


endmodule  // vudp_nonrf