lsdp.v 12.7 KB
// Module instances modified by /home/rws/workarea/rf/sw/bbplayer/tools/necprimfix 
//
//    16 instances of mx41d2 changed to j_mx41.
//    6 instances of ni01d5 changed to j_ni01.
//

/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 *************************************************************************/
// $Id: lsdp.v,v 1.4 2002/11/13 02:11:41 rws Exp $

// lsdp.v		muxes and rotators for RSP SU and VU loads and stores

`timescale 1ns / 10ps

module lsdp (clk, reset_l, halt, pc, ex_dma_wen_swap, ex_dma_wen_noswap, 
	vu_ex_st_dec, ex_su_byte_ls, ex_su_half_ls, 
	ex_mfc0, cp0_write, cp0_data, cp0_data_out, 
	mem_write_data, ex_rot, wb_rot, wb_pass_thru, 
	wb_mfc2, wb_cfc2, wb_su_uns_ls, wb_su_load,
	dmem_dataout, vu_wb_ld_dec, ls_drive_ls, wb_dma_dm_to_rd, 
	ls_data, ls_ls_data, df_datain, dmem_rd_data);

    input		clk;
    input		reset_l;
    input		halt;
						// IF Stage Inputs
    input	[11:2]	pc;
						// EX Stage Inputs
    input	[11:0]	vu_ex_st_dec;
    input		ex_su_byte_ls;
    input		ex_su_half_ls;
    input		ex_mfc0;
    input		cp0_write;
    input		ex_dma_wen_swap;
    input		ex_dma_wen_noswap;
    input	[63:0]	mem_write_data;
    input	[3:0]	ex_rot;
    input	[3:0]	wb_rot;

						// WB Stage Inputs
    input		wb_pass_thru;
    input		wb_mfc2;
    input		wb_cfc2;
    input		wb_su_uns_ls;
    input		wb_su_load;
    input	[127:0]	dmem_dataout;
    input	[9:6]	vu_wb_ld_dec;
    input		ls_drive_ls;
    input		wb_dma_dm_to_rd;

    input	[127:0]	ls_data;
    output	[127:0]	ls_ls_data;
    input	[31:0]	cp0_data;
    output	[31:0]	cp0_data_out;

    output	[127:0]	df_datain;		// final store data to dmem
    output 	[63:0] 	dmem_rd_data;

    wire		df_su_byte_ls;
    wire		df_su_half_ls;
    wire		wb_su_byte_ls;
    wire		wb_su_half_ls;
    wire	[127:0]	dma_data_to_dmem;
    wire	[127:0]	dp_to_dmem_0th;		// compaction/expansion output
    wire	[127:0]	dp_to_dmem_1st;		// word rotation
    wire	[127:0]	dp_to_dmem_2nd;		// byte rotation
    wire	[127:0]	dp_to_dmem_3rd;		// mux in dma data
    wire	[127:0]	wb_datain;	// pass-through data (MT, MF, CT, CF)
    wire	[127:0]	wb_datain_sxt;
    wire	[127:0]	dmem_to_dp_1st;	    	// word-rot'd dmem data 
    wire	[127:0]	dmem_to_dp_2nd;		// byte rotation output
    wire	[127:0]	load_data;		// load expansion output
    wire	[63:0] 	sec_rd_data;
    wire	[63:0] 	secondary_write_data;
    wire	[31:0]	cp0_source;
    wire		cp0_data_enable;

assign cp0_source = halt ? {22'b0, pc} : ls_data[127:96];
assign cp0_data_enable = cp0_write || halt;
//cp0_driver cp0_driver_ls(cp0_source, cp0_data_enable, cp0_data_out);
wire [31:0] cp0_data_out = cp0_data_enable ? cp0_source : 32'b0;


// Datapaths to DMem

wire [127:0] dp_to_dmem_0th_reg;
wire [1:0] dp_to_dmem_0th_sl;    

assign dp_to_dmem_0th = dp_to_dmem_0th_reg;

// mutual exclusion assumed
// assign dp_to_dmem_0th_sl[0] = ex_mfc0;
assign dp_to_dmem_0th_sl[0] = vu_ex_st_dec[6] || vu_ex_st_dec[7] || vu_ex_st_dec[9] ; 
assign dp_to_dmem_0th_sl[1] = vu_ex_st_dec[8] || vu_ex_st_dec[9] ;

lsdp_mux4x1_128 ls_dp_to_dmem_0th(.z(dp_to_dmem_0th_reg),
				  .s(dp_to_dmem_0th_sl),
				  .i0(ls_data),
				  .i1( {ls_data[127:120], ls_data[111:104], ls_data[95:88],  
					ls_data[79:72],   ls_data[63:56],   ls_data[47:40],
					ls_data[31:24],   ls_data[15:8],
					ls_data[126:119], ls_data[110:103], ls_data[94:87],
					ls_data[78:71],   ls_data[62:55],   ls_data[46:39],
					ls_data[30:23],   ls_data[14:7]
				       }
				     ),
				  .i2( {ls_data[126:0], 1'b0}),		// half
				  .i3( {ls_data[126:119], ls_data[30:23], 16'b0,
					ls_data[110:103], ls_data[14:7],  16'b0,
					ls_data[94:87],   ls_data[62:55], 16'b0,
					ls_data[78:71],   ls_data[46:39], 16'b0
				       }
				     )					//fourth
				 );


lsdp_mux4x1_128 ls_dp_to_dmem_1st(.z(dp_to_dmem_1st),			// word rotation
				 .s(ex_rot[3:2]),
				 .i0(dp_to_dmem_0th),
				 .i1({dp_to_dmem_0th[31:0], dp_to_dmem_0th[127:32]}),
				 .i2({dp_to_dmem_0th[63:0], dp_to_dmem_0th[127:64]}),
				 .i3({dp_to_dmem_0th[95:0], dp_to_dmem_0th[127:96]})
				);

lsdp_mux4x1_128 ls_dp_to_dmem_2nd(.z(dp_to_dmem_2nd),			// byte rotation
				        .s(ex_rot[1:0]),
				        .i0(dp_to_dmem_1st),
				        .i1({dp_to_dmem_1st[7:0],dp_to_dmem_1st[127:8]}),
				        .i2({dp_to_dmem_1st[15:0],dp_to_dmem_1st[127:16]}),
				        .i3({dp_to_dmem_1st[23:0],dp_to_dmem_1st[127:24]})
				       );
// Sneak path for dma to dmem to avoid collision with potential load in WB:
// *** Probably no longer an issue with duplicated rotators.

wire [1:0] dp_to_dmem_3rd_sl;
assign dp_to_dmem_3rd_sl[0] = ex_dma_wen_noswap || ex_mfc0;
assign dp_to_dmem_3rd_sl[1] = ex_dma_wen_swap   || ex_mfc0;

lsdp_mux4x1_128 ls_dp_to_dmem_3rd(.z(dp_to_dmem_3rd),
				 .s(dp_to_dmem_3rd_sl[1:0]),
				 .i0(dp_to_dmem_2nd),
				 .i1(dma_data_to_dmem),
				 .i2({dma_data_to_dmem [63:0],dma_data_to_dmem [127:64]}),
				 .i3({cp0_data,96'h0})
				);

asdff #(1, 0) vu_ed_byte_ff (df_su_byte_ls, ex_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_ed_half_ff (df_su_half_ls, ex_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_ed_datain_ff (df_datain, dp_to_dmem_3rd, clk, 1'b1);


asdff #(1, 0) vu_dw_byte_ff (wb_su_byte_ls, df_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_dw_half_ff (wb_su_half_ls, df_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_dw_dp_to_dm_ff (wb_datain, df_datain, clk, 1'b1);

/* ******************************************************************** */ 
// DMem to Datapaths

lsdp_mux4x1_128 ls_dmem_to_dp_1st(.z(dmem_to_dp_1st),
				 .s(wb_rot[3:2]),
				 .i0(dmem_dataout),
				 .i1({dmem_dataout[31:0], dmem_dataout[127:32]}),
				 .i2({dmem_dataout[63:0], dmem_dataout[127:64]}),
				 .i3({dmem_dataout[95:0], dmem_dataout[127:96]})
				);
lsdp_mux4x1_128 ls_dmem_to_dp_2nd(.z(dmem_to_dp_2nd),
				        .s(wb_rot[1:0]),
				        .i0(dmem_to_dp_1st),
				        .i1({dmem_to_dp_1st[7:0],dmem_to_dp_1st[127:8]}),
				        .i2({dmem_to_dp_1st[15:0],dmem_to_dp_1st[127:16]}),
				        .i3({dmem_to_dp_1st[23:0],dmem_to_dp_1st[127:24]})
				       );

// mutual exclusion assumed
wire wb_sxt_cp2;	// *** move all this to df then just latch for wb.
wire wb_sxt_half;	// *** move all this to df then just latch for wb.
wire wb_sxt_byte;	// *** move all this to df then just latch for wb.
wire [127:112] wb_datain_high;

assign wb_sxt_cp2 =  wb_cfc2 || wb_mfc2;
assign wb_sxt_half = wb_su_load && wb_su_half_ls;
assign wb_sxt_byte = wb_su_load && wb_su_byte_ls;
assign wb_datain_high = wb_sxt_cp2 ? {16{wb_datain[111]}} : wb_datain[127:112];
assign wb_datain_sxt = {wb_datain_high, wb_datain[111:0]};

reg [127:0] load_data_reg;
wire [7:0] load_data_sl;

assign load_data = load_data_reg;

// mutual exclusion assumed
assign load_data_sl[0] = vu_wb_ld_dec[6];
assign load_data_sl[1] = vu_wb_ld_dec[7];
assign load_data_sl[2] = vu_wb_ld_dec[8];
assign load_data_sl[3] = vu_wb_ld_dec[9];
assign load_data_sl[4] = (vu_wb_ld_dec[9:6]==4'h0) && !wb_pass_thru && 
	!wb_sxt_half && !wb_sxt_byte;
assign load_data_sl[5] = wb_pass_thru;
assign load_data_sl[6] = wb_sxt_half;
assign load_data_sl[7] = wb_sxt_byte;

always @(load_data_sl or dmem_to_dp_2nd or wb_datain_sxt or wb_su_uns_ls)
begin

  load_data_reg = 128'h0;
  case (1'b1) 			//
   load_data_sl[0]: load_data_reg =				// pack 
		{dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
       		dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96],  8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[87:80],   8'b0,
       		dmem_to_dp_2nd[79:72],   8'b0, dmem_to_dp_2nd[71:64],   8'b0};
   load_data_sl[1]: load_data_reg = {1'b0,                     // unsigned pack
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
       		dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96],  8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[87:80],   8'b0,
       		dmem_to_dp_2nd[79:72],   8'b0, dmem_to_dp_2nd[71:64],   7'b0}; 
   load_data_sl[2]: load_data_reg = {1'b0,                     // half
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[111:104], 8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[79:72],   8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[47:40],   8'b0,
           	dmem_to_dp_2nd[31:24],   8'b0, dmem_to_dp_2nd[15:8],    7'b0};
   load_data_sl[3]: load_data_reg = {1'b0,                     // fourth
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[31:24], 8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[31:24], 8'b0,
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 7'b0};
   load_data_sl[4]: load_data_reg = dmem_to_dp_2nd;
   load_data_sl[5]: load_data_reg = wb_datain_sxt;
   load_data_sl[6]: load_data_reg = {{16{(dmem_to_dp_2nd[111] && !wb_su_uns_ls)}}, dmem_to_dp_2nd[111:0]};
   load_data_sl[7]: load_data_reg = {{24{(dmem_to_dp_2nd[103] && !wb_su_uns_ls)}}, dmem_to_dp_2nd[103:0]};
  endcase	    
end 

//ls_data_driver ls_data_driver (
//	.in(load_data), 
//	.out(ls_data_out), 
//	.enable(ls_drive_ls));

wire [127:0] ls_ls_data = ls_drive_ls ? load_data : 128'b0;

// DMA DMem to RDRAM
asdff #(64,0) dma_sec_rd_ff (sec_rd_data, dmem_to_dp_1st[63:0], clk, 1'b1);
assign dmem_rd_data = 
	wb_dma_dm_to_rd  ? dmem_to_dp_1st[127:64] : sec_rd_data;

// DMA RDRAM to DMem
asdff #(64,0) dma_sec_wr_ff (secondary_write_data, mem_write_data, clk, 1'b1);
assign dma_data_to_dmem = {secondary_write_data, mem_write_data};

endmodule

module lsdp_mux4x1_16(z, i0, i1, i2, i3, s);
input [15:0] i0, i1, i2, i3;
input [1:0] s;
output [15:0] z;

wire s0_x,s0_y,s1_x,s1_y;

j_ni01 u_s0x(.z(s0_x), .i(s[0]));
j_ni01 u_s0y(.z(s0_y), .i(s[0]));

j_ni01 u_s1x(.z(s1_x), .i(s[1]));
j_ni01 u_s1y(.z(s1_y), .i(s[1]));

j_mx41 u_00(.z(z[ 0]),.i0(i0[ 0]),.i1(i1[ 0]),.i2(i2[ 0]),.i3(i3[ 0]),.s0(s0_x),.s1(s1_x));
j_mx41 u_01(.z(z[ 1]),.i0(i0[ 1]),.i1(i1[ 1]),.i2(i2[ 1]),.i3(i3[ 1]),.s0(s0_x),.s1(s1_x));
j_mx41 u_02(.z(z[ 2]),.i0(i0[ 2]),.i1(i1[ 2]),.i2(i2[ 2]),.i3(i3[ 2]),.s0(s0_x),.s1(s1_x));
j_mx41 u_03(.z(z[ 3]),.i0(i0[ 3]),.i1(i1[ 3]),.i2(i2[ 3]),.i3(i3[ 3]),.s0(s0_x),.s1(s1_x));
j_mx41 u_04(.z(z[ 4]),.i0(i0[ 4]),.i1(i1[ 4]),.i2(i2[ 4]),.i3(i3[ 4]),.s0(s0_x),.s1(s1_x));
j_mx41 u_05(.z(z[ 5]),.i0(i0[ 5]),.i1(i1[ 5]),.i2(i2[ 5]),.i3(i3[ 5]),.s0(s0_x),.s1(s1_x));
j_mx41 u_06(.z(z[ 6]),.i0(i0[ 6]),.i1(i1[ 6]),.i2(i2[ 6]),.i3(i3[ 6]),.s0(s0_x),.s1(s1_x));
j_mx41 u_07(.z(z[ 7]),.i0(i0[ 7]),.i1(i1[ 7]),.i2(i2[ 7]),.i3(i3[ 7]),.s0(s0_x),.s1(s1_x));

j_mx41 u_08(.z(z[ 8]),.i0(i0[ 8]),.i1(i1[ 8]),.i2(i2[ 8]),.i3(i3[ 8]),.s0(s0_y),.s1(s1_y));
j_mx41 u_09(.z(z[ 9]),.i0(i0[ 9]),.i1(i1[ 9]),.i2(i2[ 9]),.i3(i3[ 9]),.s0(s0_y),.s1(s1_y));
j_mx41 u_10(.z(z[10]),.i0(i0[10]),.i1(i1[10]),.i2(i2[10]),.i3(i3[10]),.s0(s0_y),.s1(s1_y));
j_mx41 u_11(.z(z[11]),.i0(i0[11]),.i1(i1[11]),.i2(i2[11]),.i3(i3[11]),.s0(s0_y),.s1(s1_y));
j_mx41 u_12(.z(z[12]),.i0(i0[12]),.i1(i1[12]),.i2(i2[12]),.i3(i3[12]),.s0(s0_y),.s1(s1_y));
j_mx41 u_13(.z(z[13]),.i0(i0[13]),.i1(i1[13]),.i2(i2[13]),.i3(i3[13]),.s0(s0_y),.s1(s1_y));
j_mx41 u_14(.z(z[14]),.i0(i0[14]),.i1(i1[14]),.i2(i2[14]),.i3(i3[14]),.s0(s0_y),.s1(s1_y));
j_mx41 u_15(.z(z[15]),.i0(i0[15]),.i1(i1[15]),.i2(i2[15]),.i3(i3[15]),.s0(s0_y),.s1(s1_y));

endmodule

module lsdp_mux4x1_128(z, i0, i1, i2, i3, s);
input [127:0] i0, i1, i2, i3;
input [1:0] s;
output [127:0] z;

wire [1:0] sx;

j_ni01 u_sx0(.z(sx[0]), .i(s[0]));
j_ni01 u_sx1(.z(sx[1]), .i(s[1]));

lsdp_mux4x1_16 u_1(.z(z[15:0]), .i0(i0[15:0]), .i1(i1[15:0]), .i2(i2[15:0]), .i3(i3[15:0]), .s(sx));
lsdp_mux4x1_16 u_2(.z(z[31:16]),.i0(i0[31:16]),.i1(i1[31:16]),.i2(i2[31:16]),.i3(i3[31:16]),.s(sx));
lsdp_mux4x1_16 u_3(.z(z[47:32]),.i0(i0[47:32]),.i1(i1[47:32]),.i2(i2[47:32]),.i3(i3[47:32]),.s(sx));
lsdp_mux4x1_16 u_4(.z(z[63:48]),.i0(i0[63:48]),.i1(i1[63:48]),.i2(i2[63:48]),.i3(i3[63:48]),.s(sx));
lsdp_mux4x1_16 u_5(.z(z[79:64]),.i0(i0[79:64]),.i1(i1[79:64]),.i2(i2[79:64]),.i3(i3[79:64]),.s(sx));
lsdp_mux4x1_16 u_6(.z(z[95:80]),.i0(i0[95:80]),.i1(i1[95:80]),.i2(i2[95:80]),.i3(i3[95:80]),.s(sx));
lsdp_mux4x1_16 u_7(.z(z[111:96]),.i0(i0[111:96]),.i1(i1[111:96]),.i2(i2[111:96]),.i3(i3[111:96]),.s(sx));
lsdp_mux4x1_16 u_8(.z(z[127:112]),.i0(i0[127:112]),.i1(i1[127:112]),.i2(i2[127:112]),.i3(i3[127:112]),.s(sx));
endmodule