lsdp.v 27.8 KB

/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 *************************************************************************/
// $Id: lsdp.v,v 1.1.1.1 2002/05/17 06:07:47 blythe Exp $

// lsdp.v		muxes and rotators for RSP SU and VU loads and stores

`timescale 1ns / 10ps

module lsdp (clk, reset_l, halt, pc, ex_dma_wen_swap, ex_dma_wen_noswap, 
	vu_ex_st_dec, ex_su_byte_ls, ex_su_half_ls, elem_num_3, 
	ex_mfc0, cp0_write, cp0_data, cp0_data_out, 
	rot_dp, mem_write_data, rot_amt, wb_pass_thru, 
	wb_mfc2, wb_cfc2, wb_swap_dma, wb_su_uns_ls, wb_su_load,
	dmem_dataout, vu_wb_ld_dec, ls_drive_ls, wb_dma_dm_to_rd, 
	ls_data, ls_data_out, df_datain, dmem_rd_data);

    input		clk;
    input		reset_l;
    input		halt;
						// IF Stage Inputs
    input	[11:2]	pc;
						// EX Stage Inputs
    input	[11:0]	vu_ex_st_dec;
    input		ex_su_byte_ls;
    input		ex_su_half_ls;
    input		elem_num_3;
    input		ex_mfc0;
    input		cp0_write;
    input		rot_dp;
    input		ex_dma_wen_swap;
    input		ex_dma_wen_noswap;
    input	[63:0]	mem_write_data;
    input	[3:0]	rot_amt;		// EX or WB

						// WB Stage Inputs
    input		wb_pass_thru;
    input		wb_mfc2;
    input		wb_cfc2;
    input		wb_swap_dma;
    input		wb_su_uns_ls;
    input		wb_su_load;
    input	[127:0]	dmem_dataout;
    input	[11:0]	vu_wb_ld_dec;
    input		ls_drive_ls;
    input		wb_dma_dm_to_rd;

    input	[127:0]	ls_data;
    output	[127:0]	ls_data_out;
    input	[31:0]	cp0_data;
    output	[31:0]	cp0_data_out;

    output	[127:0]	df_datain;		// final store data to dmem
    output 	[63:0] 	dmem_rd_data;

    wire		df_su_byte_ls;
    wire		df_su_half_ls;
    wire		wb_su_byte_ls;
    wire		wb_su_half_ls;
    wire	[127:0]	dma_data_to_dmem;
    wire	[127:0]	dp_to_dmem_0th;		// compaction/expansion output
    wire	[127:0]	dp_to_dmem_3rd;		// mux in dma data
    wire	[127:0]	wb_datain;	// pass-through data (MT, MF, CT, CF)
    wire	[127:0]	dmem_to_dp_raw;		// source for word rotation
    wire	[127:0]	dmem_to_dp_1st;	    	// word-rot'd dmem data to dma
    wire	[127:0]	dmem_to_dp_2nd_presxt;	// byte rotation output
    wire	[103:0]	dmem_to_dp_2nd_low;
    wire	[111:104] dmem_to_dp_2nd_mid;
    wire	[127:112] dmem_to_dp_2nd_high;
    wire	[127:0]	dmem_to_dp_2nd;		// byte rotation sign extended
    wire	[127:0]	load_data;		// load expansion output
    wire	[63:0] 	sec_rd_data;
    wire	[63:0] 	secondary_write_data;
    wire	[31:0]	cp0_source;
    wire		cp0_data_enable;

assign cp0_source = halt ? {22'b0, pc} : ls_data[127:96];
assign cp0_data_enable = cp0_write || halt;
cp0_driver cp0_driver_ls(cp0_source, cp0_data_enable, cp0_data_out);

// Datapaths to DMem

// *** Make sure ex_mfc0 is exclusive of other controls
// *** For store_4th (vu_ex_st_dec[9]), the 16'b0 in each 32-bit word are 
// *** really don't cares.  How can this be specified in Verilog without
// *** breaking this mux down into 8 16-bit muxes?
// *** Similarly, the low order 96 bits are don't cares for mfc0.

/* ???? changed since mux has encoded selects.
* reg  [127:0] dp_to_dmem_0th_reg;
* wire [3:0] dp_to_dmem_0th_sl;    
*/
wire [127:0] dp_to_dmem_0th_reg;
wire [1:0] dp_to_dmem_0th_sl;    

assign dp_to_dmem_0th = dp_to_dmem_0th_reg;

// mutual exclusion assumed
// assign dp_to_dmem_0th_sl[0] = ex_mfc0;
assign dp_to_dmem_0th_sl[0] = vu_ex_st_dec[6] || vu_ex_st_dec[7] || vu_ex_st_dec[9] ; 
assign dp_to_dmem_0th_sl[1] = vu_ex_st_dec[8] || vu_ex_st_dec[9] ;

/* ???? changed since mux has encoded selects.
* assign dp_to_dmem_0th_sl[0] = vu_ex_st_dec[6] || vu_ex_st_dec[7]; 
* assign dp_to_dmem_0th_sl[1] = vu_ex_st_dec[8];
* assign dp_to_dmem_0th_sl[2] = vu_ex_st_dec[9];
* assign dp_to_dmem_0th_sl[3] = (vu_ex_st_dec[9:6]==4'h0);
*/

/*
* ????	Changed to instantiated mux to reduce routing net therefore improve
*	post-layout timing.
*
* always @(dp_to_dmem_0th_sl or cp0_data or ls_data)
* begin
*    dp_to_dmem_0th_reg = 128'h0;
* 
*    case (1'b1) //synopsys parallel_case full_case
* //      dp_to_dmem_0th_sl[0]: dp_to_dmem_0th_reg = {cp0_data, ls_data[95:0]};
*       dp_to_dmem_0th_sl[0]: dp_to_dmem_0th_reg =					//pack, upack
*                                     {ls_data[127:120], ls_data[111:104], ls_data[95:88],  
*        			             ls_data[79:72],   ls_data[63:56],   ls_data[47:40],
*        			             ls_data[31:24],   ls_data[15:8],
*        			             ls_data[126:119], ls_data[110:103], ls_data[94:87],
*        			             ls_data[78:71],   ls_data[62:55],   ls_data[46:39],
*        			             ls_data[30:23],   ls_data[14:7]
* 			            };
*       dp_to_dmem_0th_sl[1]: dp_to_dmem_0th_reg = {ls_data[126:0], 1'b0}; 		// half
*       dp_to_dmem_0th_sl[2]: dp_to_dmem_0th_reg =					//fourth
*                                     {ls_data[126:119], ls_data[30:23], 16'b0,
*        			       ls_data[110:103], ls_data[14:7],  16'b0,
*              			       ls_data[94:87],   ls_data[62:55], 16'b0,
*              			       ls_data[78:71],   ls_data[46:39], 16'b0};
*       dp_to_dmem_0th_sl[3]: dp_to_dmem_0th_reg = ls_data;
*       default		  : dp_to_dmem_0th_reg = 128'h0;
*    endcase
* end
*/

lsdp_mux4x1_128 ls_dp_to_dmem_0th(.z(dp_to_dmem_0th_reg),
				  .s(dp_to_dmem_0th_sl),
				  .i0(ls_data),
				  .i1( {ls_data[127:120], ls_data[111:104], ls_data[95:88],  
					ls_data[79:72],   ls_data[63:56],   ls_data[47:40],
					ls_data[31:24],   ls_data[15:8],
					ls_data[126:119], ls_data[110:103], ls_data[94:87],
					ls_data[78:71],   ls_data[62:55],   ls_data[46:39],
					ls_data[30:23],   ls_data[14:7]
				       }
				     ),
				  .i2( {ls_data[126:0], 1'b0}),		// half
				  .i3( {ls_data[126:119], ls_data[30:23], 16'b0,
					ls_data[110:103], ls_data[14:7],  16'b0,
					ls_data[94:87],   ls_data[62:55], 16'b0,
					ls_data[78:71],   ls_data[46:39], 16'b0
				       }
				     )					//fourth
				 );


// Sneak path for dma to dmem to avoid collision with potential load in WB:
//assign dp_to_dmem_3rd =
//    ex_dma_wen_noswap ? dma_data_to_dmem [127:0] :
//      ex_dma_wen_swap ? {dma_data_to_dmem [63:0],dma_data_to_dmem [127:64]} :
//              ex_mfc0 ? cp0_data :
//          /* not dma */ dmem_to_dp_2nd;

wire [1:0] dp_to_dmem_3rd_sl;
assign dp_to_dmem_3rd_sl[0] = ex_dma_wen_noswap || ex_mfc0;
assign dp_to_dmem_3rd_sl[1] = ex_dma_wen_swap   || ex_mfc0;

lsdp_mux4x1_128 ls_dp_to_dmem_3rd(.z(dp_to_dmem_3rd),
				 .s(dp_to_dmem_3rd_sl[1:0]),
				 .i0(dmem_to_dp_2nd),
				 .i1(dma_data_to_dmem),
				 .i2({dma_data_to_dmem [63:0],dma_data_to_dmem [127:64]}),
				 .i3({cp0_data,96'h0})
				);

asdff #(1, 0) vu_ed_elem3_ff (df_elem_3, elem_num_3, clk, 1'b1);
asdff #(1, 0) vu_ed_byte_ff (df_su_byte_ls, ex_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_ed_half_ff (df_su_half_ls, ex_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_ed_datain_ff (df_datain, dp_to_dmem_3rd, clk, 1'b1);


asdff #(1, 0) vu_dw_elem3_ff (wb_elem_3, df_elem_3, clk, 1'b1);
asdff #(1, 0) vu_dw_byte_ff (wb_su_byte_ls, df_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_dw_half_ff (wb_su_half_ls, df_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_dw_dp_to_dm_ff (wb_datain, df_datain, clk, 1'b1);

/* ******************************************************************** */ 
// DMem to Datapaths

/* ???? changed since mux has encoded selects.
* reg [127:0] dmem_to_dp_raw_reg;
* wire [2:0] dmem_to_dp_raw_sl;
*/
wire [127:0] dmem_to_dp_raw_reg;
wire [1:0] dmem_to_dp_raw_sl;

assign dmem_to_dp_raw = dmem_to_dp_raw_reg;

// mutual exclusion assumed
assign dmem_to_dp_raw_sl[0] = rot_dp || ex_mfc0;
assign dmem_to_dp_raw_sl[1] = wb_pass_thru;

/* ???? changed since mux has encoded selects.
* assign dmem_to_dp_raw_sl[2] = (dmem_to_dp_raw_sl[1:0]==2'b00);
*/

/*
* ????	Changed to instantiated mux to reduce routing net therefore improve
*	post-layout timing.
*
* always @(dmem_to_dp_raw_sl or dp_to_dmem_0th or wb_datain or dmem_dataout)
* begin
*   dmem_to_dp_raw_reg = 128'h0;
*   case(1'b1) //synopsys parallel_case full_case
*      dmem_to_dp_raw_sl[0]: dmem_to_dp_raw_reg = dp_to_dmem_0th;
*      dmem_to_dp_raw_sl[1]: dmem_to_dp_raw_reg = wb_datain;
*      dmem_to_dp_raw_sl[2]: dmem_to_dp_raw_reg = dmem_dataout;
*      default	         : dmem_to_dp_raw_reg = 128'h0;
*   endcase
* end
*/

lsdp_mux4x1_128 ls_dmem_to_dp_raw(.z(dmem_to_dp_raw_reg),
				 .s(dmem_to_dp_raw_sl),
				 .i0(dmem_dataout),
				 .i1(dp_to_dmem_0th),
				 .i2(wb_datain),
				 .i3(128'h0)
				);



/***************************************************************/

lsdp_mux4x1_128 ls_dmem_to_dp_1st(.z(dmem_to_dp_1st),
				 .s(rot_amt[3:2]),
				 .i0(dmem_to_dp_raw),
				 .i1({dmem_to_dp_raw[31:0], dmem_to_dp_raw[127:32]}),
				 .i2({dmem_to_dp_raw[63:0], dmem_to_dp_raw[127:64]}),
				 .i3({dmem_to_dp_raw[95:0], dmem_to_dp_raw[127:96]})
				);
lsdp_mux4x1_128 ls_dmem_to_dp_2nd_presxt(.z(dmem_to_dp_2nd_presxt),
				        .s(rot_amt[1:0]),
				        .i0(dmem_to_dp_1st),
				        .i1({dmem_to_dp_1st[7:0],dmem_to_dp_1st[127:8]}),
				        .i2({dmem_to_dp_1st[15:0],dmem_to_dp_1st[127:16]}),
				        .i3({dmem_to_dp_1st[23:0],dmem_to_dp_1st[127:24]})
				       );
/***************************************************************/

/* ???? changed since mux has encoded selects.
* reg [15:0] dmem_to_dp_2nd_high_reg;
* wire [2:0] dmem_to_dp_2nd_high_sl;
*/
wire [15:0] dmem_to_dp_2nd_high_reg;
wire [1:0] dmem_to_dp_2nd_high_sl;

assign dmem_to_dp_2nd_high = dmem_to_dp_2nd_high_reg;

// mutual exclusion assumed
assign dmem_to_dp_2nd_high_sl[0] = (wb_su_load && wb_su_half_ls) || wb_cfc2 || wb_mfc2;
assign dmem_to_dp_2nd_high_sl[1] = wb_su_load && wb_su_byte_ls;
/* ???? changed since mux has encoded selects.
* assign dmem_to_dp_2nd_high_sl[0] = (wb_su_load && wb_su_half_ls) || wb_cfc2 || wb_mfc2;
* assign dmem_to_dp_2nd_high_sl[1] = wb_su_load && wb_su_byte_ls;
* assign dmem_to_dp_2nd_high_sl[2] = !dmem_to_dp_2nd_high_sl[0] && !dmem_to_dp_2nd_high_sl[1];
*/

/*
* ????	Changed to instantiated mux to reduce routing net therefore improve
*	post-layout timing.
*
* always @(dmem_to_dp_2nd_high_sl or dmem_to_dp_2nd_presxt or wb_su_uns_ls)
* begin
*   dmem_to_dp_2nd_high_reg = 16'h0;
*   case (1'b1) //synopsys parallel_case full_case
*    dmem_to_dp_2nd_high_sl[0]: dmem_to_dp_2nd_high_reg = {16{(dmem_to_dp_2nd_presxt[111] && !wb_su_uns_ls)}}; 
*    dmem_to_dp_2nd_high_sl[1]: dmem_to_dp_2nd_high_reg = {16{(dmem_to_dp_2nd_presxt[103] && !wb_su_uns_ls)}};
*    dmem_to_dp_2nd_high_sl[2]: dmem_to_dp_2nd_high_reg = dmem_to_dp_2nd_presxt[127:112];
*    default		    : dmem_to_dp_2nd_high_reg = 16'h0;
*   endcase
* end
* 
*/

lsdp_mux4x1_16 ls_dmem_to_dp_2nd_high (	.z(dmem_to_dp_2nd_high_reg),
					.s(dmem_to_dp_2nd_high_sl),
					.i0(dmem_to_dp_2nd_presxt[127:112]),
					.i1({16{(dmem_to_dp_2nd_presxt[111] && !wb_su_uns_ls)}}),
					.i2({16{(dmem_to_dp_2nd_presxt[103] && !wb_su_uns_ls)}}),
					.i3(16'h0)
				      );


/***************************************************************/

assign dmem_to_dp_2nd_mid = 
   (wb_su_load && wb_su_byte_ls) ? 
		{8{(dmem_to_dp_2nd_presxt[103] && !wb_su_uns_ls)}} :  
		dmem_to_dp_2nd_presxt[111:104];

assign dmem_to_dp_2nd_low = dmem_to_dp_2nd_presxt[103:0];

assign dmem_to_dp_2nd = 
	{dmem_to_dp_2nd_high, dmem_to_dp_2nd_mid, dmem_to_dp_2nd_low};

reg [127:0] load_data_reg;
wire [4:0] load_data_sl;

assign load_data = load_data_reg;

// mutual exclusion assumed
assign load_data_sl[0] = vu_wb_ld_dec[6];
assign load_data_sl[1] = vu_wb_ld_dec[7];
assign load_data_sl[2] = vu_wb_ld_dec[8];
assign load_data_sl[3] = vu_wb_ld_dec[9];
assign load_data_sl[4] = (vu_wb_ld_dec[9:6]==4'h0);

always @(load_data_sl or dmem_to_dp_2nd)
begin

  load_data_reg = 128'h0;
  case (1'b1) //synopsys parallel_case full_case
   load_data_sl[0]: load_data_reg =				// pack 
		{dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
       		dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96],  8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[87:80],   8'b0,
       		dmem_to_dp_2nd[79:72],   8'b0, dmem_to_dp_2nd[71:64],   8'b0};
   load_data_sl[1]: load_data_reg = {1'b0,                     // unsigned pack
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
       		dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96],  8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[87:80],   8'b0,
       		dmem_to_dp_2nd[79:72],   8'b0, dmem_to_dp_2nd[71:64],   7'b0}; 
   load_data_sl[2]: load_data_reg = {1'b0,                     // half
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[111:104], 8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[79:72],   8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[47:40],   8'b0,
           	dmem_to_dp_2nd[31:24],   8'b0, dmem_to_dp_2nd[15:8],    7'b0};
   load_data_sl[3]: load_data_reg = {1'b0,                     // fourth
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[31:24], 8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[31:24], 8'b0,
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 7'b0};
   load_data_sl[4]: load_data_reg = dmem_to_dp_2nd;
   default	  : load_data_reg = 128'h0;
  endcase	    
end 


ls_data_driver ls_data_driver (
	.in(load_data), 
	.out(ls_data_out), 
	.enable(ls_drive_ls));

// DMA DMem to RDRAM
wire [63:0] sec_rd_data_in;
assign sec_rd_data_in = 
	!wb_swap_dma ? dmem_to_dp_raw[63:0] : dmem_to_dp_raw[127:64];
asdff #(64,0) dma_sec_rd_ff (sec_rd_data,sec_rd_data_in,clk,reset_l);
assign dmem_rd_data = 
	wb_dma_dm_to_rd && !wb_swap_dma ? dmem_to_dp_raw[127:64] :
	wb_dma_dm_to_rd && wb_swap_dma  ? dmem_to_dp_raw[63:0] :
					 sec_rd_data;

// DMA RDRAM to DMem
asdff #(64,0) dma_sec_wr_ff (secondary_write_data, mem_write_data,clk,reset_l);
assign dma_data_to_dmem = {secondary_write_data, mem_write_data};

endmodule

module ls_data_driver ( in, out, enable);

input  [127:0] in;
input          enable;
inout  [127:0] out;

// enable buffer tree
// wire enablen, enable0, enable1, enable2, enable3, enable4, enable5;

  in01d5 ib(.i(enable),  .zn(enablen));

  in01d5 i0(.i(enablen), .zn(enable0));
  in01d5 i1(.i(enablen), .zn(enable1));
  in01d5 i2(.i(enablen), .zn(enable2));
  in01d5 i3(.i(enablen), .zn(enable3));
  in01d5 i4(.i(enablen), .zn(enable4));
  in01d5 i5(.i(enablen), .zn(enable5));

// output buffers
  nt01d5 b0 (.i(in[0]), .z(out[0]), .oe(enable0));
  nt01d5 b1 (.i(in[1]), .z(out[1]), .oe(enable0));
  nt01d5 b2 (.i(in[2]), .z(out[2]), .oe(enable0));
  nt01d5 b3 (.i(in[3]), .z(out[3]), .oe(enable0));
  nt01d5 b4 (.i(in[4]), .z(out[4]), .oe(enable0));
  nt01d5 b5 (.i(in[5]), .z(out[5]), .oe(enable0));
  nt01d5 b6 (.i(in[6]), .z(out[6]), .oe(enable0));
  nt01d5 b7 (.i(in[7]), .z(out[7]), .oe(enable0));
  nt01d5 b8 (.i(in[8]), .z(out[8]), .oe(enable0));
  nt01d5 b9 (.i(in[9]), .z(out[9]), .oe(enable0));
  nt01d5 b10 (.i(in[10]), .z(out[10]), .oe(enable0));
  nt01d5 b11 (.i(in[11]), .z(out[11]), .oe(enable0));
  nt01d5 b12 (.i(in[12]), .z(out[12]), .oe(enable0));
  nt01d5 b13 (.i(in[13]), .z(out[13]), .oe(enable0));
  nt01d5 b14 (.i(in[14]), .z(out[14]), .oe(enable0));
  nt01d5 b15 (.i(in[15]), .z(out[15]), .oe(enable0));
  nt01d5 b16 (.i(in[16]), .z(out[16]), .oe(enable0));
  nt01d5 b17 (.i(in[17]), .z(out[17]), .oe(enable0));
  nt01d5 b18 (.i(in[18]), .z(out[18]), .oe(enable0));
  nt01d5 b19 (.i(in[19]), .z(out[19]), .oe(enable0));
  nt01d5 b20 (.i(in[20]), .z(out[20]), .oe(enable1));
  nt01d5 b21 (.i(in[21]), .z(out[21]), .oe(enable1));
  nt01d5 b22 (.i(in[22]), .z(out[22]), .oe(enable1));
  nt01d5 b23 (.i(in[23]), .z(out[23]), .oe(enable1));
  nt01d5 b24 (.i(in[24]), .z(out[24]), .oe(enable1));
  nt01d5 b25 (.i(in[25]), .z(out[25]), .oe(enable1));
  nt01d5 b26 (.i(in[26]), .z(out[26]), .oe(enable1));
  nt01d5 b27 (.i(in[27]), .z(out[27]), .oe(enable1));
  nt01d5 b28 (.i(in[28]), .z(out[28]), .oe(enable1));
  nt01d5 b29 (.i(in[29]), .z(out[29]), .oe(enable1));
  nt01d5 b30 (.i(in[30]), .z(out[30]), .oe(enable1));
  nt01d5 b31 (.i(in[31]), .z(out[31]), .oe(enable1));
  nt01d5 b32 (.i(in[32]), .z(out[32]), .oe(enable1));
  nt01d5 b33 (.i(in[33]), .z(out[33]), .oe(enable1));
  nt01d5 b34 (.i(in[34]), .z(out[34]), .oe(enable1));
  nt01d5 b35 (.i(in[35]), .z(out[35]), .oe(enable1));
  nt01d5 b36 (.i(in[36]), .z(out[36]), .oe(enable1));
  nt01d5 b37 (.i(in[37]), .z(out[37]), .oe(enable1));
  nt01d5 b38 (.i(in[38]), .z(out[38]), .oe(enable1));
  nt01d5 b39 (.i(in[39]), .z(out[39]), .oe(enable1));
  nt01d5 b40 (.i(in[40]), .z(out[40]), .oe(enable2));
  nt01d5 b41 (.i(in[41]), .z(out[41]), .oe(enable2));
  nt01d5 b42 (.i(in[42]), .z(out[42]), .oe(enable2));
  nt01d5 b43 (.i(in[43]), .z(out[43]), .oe(enable2));
  nt01d5 b44 (.i(in[44]), .z(out[44]), .oe(enable2));
  nt01d5 b45 (.i(in[45]), .z(out[45]), .oe(enable2));
  nt01d5 b46 (.i(in[46]), .z(out[46]), .oe(enable2));
  nt01d5 b47 (.i(in[47]), .z(out[47]), .oe(enable2));
  nt01d5 b48 (.i(in[48]), .z(out[48]), .oe(enable2));
  nt01d5 b49 (.i(in[49]), .z(out[49]), .oe(enable2));
  nt01d5 b50 (.i(in[50]), .z(out[50]), .oe(enable2));
  nt01d5 b51 (.i(in[51]), .z(out[51]), .oe(enable2));
  nt01d5 b52 (.i(in[52]), .z(out[52]), .oe(enable2));
  nt01d5 b53 (.i(in[53]), .z(out[53]), .oe(enable2));
  nt01d5 b54 (.i(in[54]), .z(out[54]), .oe(enable2));
  nt01d5 b55 (.i(in[55]), .z(out[55]), .oe(enable2));
  nt01d5 b56 (.i(in[56]), .z(out[56]), .oe(enable2));
  nt01d5 b57 (.i(in[57]), .z(out[57]), .oe(enable2));
  nt01d5 b58 (.i(in[58]), .z(out[58]), .oe(enable2));
  nt01d5 b59 (.i(in[59]), .z(out[59]), .oe(enable2));
  nt01d5 b60 (.i(in[60]), .z(out[60]), .oe(enable3));
  nt01d5 b61 (.i(in[61]), .z(out[61]), .oe(enable3));
  nt01d5 b62 (.i(in[62]), .z(out[62]), .oe(enable3));
  nt01d5 b63 (.i(in[63]), .z(out[63]), .oe(enable3));
  nt01d5 b64 (.i(in[64]), .z(out[64]), .oe(enable3));
  nt01d5 b65 (.i(in[65]), .z(out[65]), .oe(enable3));
  nt01d5 b66 (.i(in[66]), .z(out[66]), .oe(enable3));
  nt01d5 b67 (.i(in[67]), .z(out[67]), .oe(enable3));
  nt01d5 b68 (.i(in[68]), .z(out[68]), .oe(enable3));
  nt01d5 b69 (.i(in[69]), .z(out[69]), .oe(enable3));
  nt01d5 b70 (.i(in[70]), .z(out[70]), .oe(enable3));
  nt01d5 b71 (.i(in[71]), .z(out[71]), .oe(enable3));
  nt01d5 b72 (.i(in[72]), .z(out[72]), .oe(enable3));
  nt01d5 b73 (.i(in[73]), .z(out[73]), .oe(enable3));
  nt01d5 b74 (.i(in[74]), .z(out[74]), .oe(enable3));
  nt01d5 b75 (.i(in[75]), .z(out[75]), .oe(enable3));
  nt01d5 b76 (.i(in[76]), .z(out[76]), .oe(enable3));
  nt01d5 b77 (.i(in[77]), .z(out[77]), .oe(enable3));
  nt01d5 b78 (.i(in[78]), .z(out[78]), .oe(enable3));
  nt01d5 b79 (.i(in[79]), .z(out[79]), .oe(enable3));
  nt01d5 b80 (.i(in[80]), .z(out[80]), .oe(enable4));
  nt01d5 b81 (.i(in[81]), .z(out[81]), .oe(enable4));
  nt01d5 b82 (.i(in[82]), .z(out[82]), .oe(enable4));
  nt01d5 b83 (.i(in[83]), .z(out[83]), .oe(enable4));
  nt01d5 b84 (.i(in[84]), .z(out[84]), .oe(enable4));
  nt01d5 b85 (.i(in[85]), .z(out[85]), .oe(enable4));
  nt01d5 b86 (.i(in[86]), .z(out[86]), .oe(enable4));
  nt01d5 b87 (.i(in[87]), .z(out[87]), .oe(enable4));
  nt01d5 b88 (.i(in[88]), .z(out[88]), .oe(enable4));
  nt01d5 b89 (.i(in[89]), .z(out[89]), .oe(enable4));
  nt01d5 b90 (.i(in[90]), .z(out[90]), .oe(enable4));
  nt01d5 b91 (.i(in[91]), .z(out[91]), .oe(enable4));
  nt01d5 b92 (.i(in[92]), .z(out[92]), .oe(enable4));
  nt01d5 b93 (.i(in[93]), .z(out[93]), .oe(enable4));
  nt01d5 b94 (.i(in[94]), .z(out[94]), .oe(enable4));
  nt01d5 b95 (.i(in[95]), .z(out[95]), .oe(enable4));
  nt01d5 b96 (.i(in[96]), .z(out[96]), .oe(enable4));
  nt01d5 b97 (.i(in[97]), .z(out[97]), .oe(enable4));
  nt01d5 b98 (.i(in[98]), .z(out[98]), .oe(enable4));
  nt01d5 b99 (.i(in[99]), .z(out[99]), .oe(enable4));
  nt01d5 b100 (.i(in[100]), .z(out[100]), .oe(enable5));
  nt01d5 b101 (.i(in[101]), .z(out[101]), .oe(enable5));
  nt01d5 b102 (.i(in[102]), .z(out[102]), .oe(enable5));
  nt01d5 b103 (.i(in[103]), .z(out[103]), .oe(enable5));
  nt01d5 b104 (.i(in[104]), .z(out[104]), .oe(enable5));
  nt01d5 b105 (.i(in[105]), .z(out[105]), .oe(enable5));
  nt01d5 b106 (.i(in[106]), .z(out[106]), .oe(enable5));
  nt01d5 b107 (.i(in[107]), .z(out[107]), .oe(enable5));
  nt01d5 b108 (.i(in[108]), .z(out[108]), .oe(enable5));
  nt01d5 b109 (.i(in[109]), .z(out[109]), .oe(enable5));
  nt01d5 b110 (.i(in[110]), .z(out[110]), .oe(enable5));
  nt01d5 b111 (.i(in[111]), .z(out[111]), .oe(enable5));
  nt01d5 b112 (.i(in[112]), .z(out[112]), .oe(enable5));
  nt01d5 b113 (.i(in[113]), .z(out[113]), .oe(enable5));
  nt01d5 b114 (.i(in[114]), .z(out[114]), .oe(enable5));
  nt01d5 b115 (.i(in[115]), .z(out[115]), .oe(enable5));
  nt01d5 b116 (.i(in[116]), .z(out[116]), .oe(enable5));
  nt01d5 b117 (.i(in[117]), .z(out[117]), .oe(enable5));
  nt01d5 b118 (.i(in[118]), .z(out[118]), .oe(enable5));
  nt01d5 b119 (.i(in[119]), .z(out[119]), .oe(enable5));
  nt01d5 b120 (.i(in[120]), .z(out[120]), .oe(enable5));
  nt01d5 b121 (.i(in[121]), .z(out[121]), .oe(enable5));
  nt01d5 b122 (.i(in[122]), .z(out[122]), .oe(enable5));
  nt01d5 b123 (.i(in[123]), .z(out[123]), .oe(enable5));
  nt01d5 b124 (.i(in[124]), .z(out[124]), .oe(enable5));
  nt01d5 b125 (.i(in[125]), .z(out[125]), .oe(enable5));
  nt01d5 b126 (.i(in[126]), .z(out[126]), .oe(enable5));
  nt01d5 b127 (.i(in[127]), .z(out[127]), .oe(enable5));

// repeaters

  rp01d1 r0 (.z(out[0]));
  rp01d1 r1 (.z(out[1]));
  rp01d1 r2 (.z(out[2]));
  rp01d1 r3 (.z(out[3]));
  rp01d1 r4 (.z(out[4]));
  rp01d1 r5 (.z(out[5]));
  rp01d1 r6 (.z(out[6]));
  rp01d1 r7 (.z(out[7]));
  rp01d1 r8 (.z(out[8]));
  rp01d1 r9 (.z(out[9]));
  rp01d1 r10 (.z(out[10]));
  rp01d1 r11 (.z(out[11]));
  rp01d1 r12 (.z(out[12]));
  rp01d1 r13 (.z(out[13]));
  rp01d1 r14 (.z(out[14]));
  rp01d1 r15 (.z(out[15]));
  rp01d1 r16 (.z(out[16]));
  rp01d1 r17 (.z(out[17]));
  rp01d1 r18 (.z(out[18]));
  rp01d1 r19 (.z(out[19]));
  rp01d1 r20 (.z(out[20]));
  rp01d1 r21 (.z(out[21]));
  rp01d1 r22 (.z(out[22]));
  rp01d1 r23 (.z(out[23]));
  rp01d1 r24 (.z(out[24]));
  rp01d1 r25 (.z(out[25]));
  rp01d1 r26 (.z(out[26]));
  rp01d1 r27 (.z(out[27]));
  rp01d1 r28 (.z(out[28]));
  rp01d1 r29 (.z(out[29]));
  rp01d1 r30 (.z(out[30]));
  rp01d1 r31 (.z(out[31]));
  rp01d1 r32 (.z(out[32]));
  rp01d1 r33 (.z(out[33]));
  rp01d1 r34 (.z(out[34]));
  rp01d1 r35 (.z(out[35]));
  rp01d1 r36 (.z(out[36]));
  rp01d1 r37 (.z(out[37]));
  rp01d1 r38 (.z(out[38]));
  rp01d1 r39 (.z(out[39]));
  rp01d1 r40 (.z(out[40]));
  rp01d1 r41 (.z(out[41]));
  rp01d1 r42 (.z(out[42]));
  rp01d1 r43 (.z(out[43]));
  rp01d1 r44 (.z(out[44]));
  rp01d1 r45 (.z(out[45]));
  rp01d1 r46 (.z(out[46]));
  rp01d1 r47 (.z(out[47]));
  rp01d1 r48 (.z(out[48]));
  rp01d1 r49 (.z(out[49]));
  rp01d1 r50 (.z(out[50]));
  rp01d1 r51 (.z(out[51]));
  rp01d1 r52 (.z(out[52]));
  rp01d1 r53 (.z(out[53]));
  rp01d1 r54 (.z(out[54]));
  rp01d1 r55 (.z(out[55]));
  rp01d1 r56 (.z(out[56]));
  rp01d1 r57 (.z(out[57]));
  rp01d1 r58 (.z(out[58]));
  rp01d1 r59 (.z(out[59]));
  rp01d1 r60 (.z(out[60]));
  rp01d1 r61 (.z(out[61]));
  rp01d1 r62 (.z(out[62]));
  rp01d1 r63 (.z(out[63]));
  rp01d1 r64 (.z(out[64]));
  rp01d1 r65 (.z(out[65]));
  rp01d1 r66 (.z(out[66]));
  rp01d1 r67 (.z(out[67]));
  rp01d1 r68 (.z(out[68]));
  rp01d1 r69 (.z(out[69]));
  rp01d1 r70 (.z(out[70]));
  rp01d1 r71 (.z(out[71]));
  rp01d1 r72 (.z(out[72]));
  rp01d1 r73 (.z(out[73]));
  rp01d1 r74 (.z(out[74]));
  rp01d1 r75 (.z(out[75]));
  rp01d1 r76 (.z(out[76]));
  rp01d1 r77 (.z(out[77]));
  rp01d1 r78 (.z(out[78]));
  rp01d1 r79 (.z(out[79]));
  rp01d1 r80 (.z(out[80]));
  rp01d1 r81 (.z(out[81]));
  rp01d1 r82 (.z(out[82]));
  rp01d1 r83 (.z(out[83]));
  rp01d1 r84 (.z(out[84]));
  rp01d1 r85 (.z(out[85]));
  rp01d1 r86 (.z(out[86]));
  rp01d1 r87 (.z(out[87]));
  rp01d1 r88 (.z(out[88]));
  rp01d1 r89 (.z(out[89]));
  rp01d1 r90 (.z(out[90]));
  rp01d1 r91 (.z(out[91]));
  rp01d1 r92 (.z(out[92]));
  rp01d1 r93 (.z(out[93]));
  rp01d1 r94 (.z(out[94]));
  rp01d1 r95 (.z(out[95]));
  rp01d1 r96 (.z(out[96]));
  rp01d1 r97 (.z(out[97]));
  rp01d1 r98 (.z(out[98]));
  rp01d1 r99 (.z(out[99]));
  rp01d1 r100 (.z(out[100]));
  rp01d1 r101 (.z(out[101]));
  rp01d1 r102 (.z(out[102]));
  rp01d1 r103 (.z(out[103]));
  rp01d1 r104 (.z(out[104]));
  rp01d1 r105 (.z(out[105]));
  rp01d1 r106 (.z(out[106]));
  rp01d1 r107 (.z(out[107]));
  rp01d1 r108 (.z(out[108]));
  rp01d1 r109 (.z(out[109]));
  rp01d1 r110 (.z(out[110]));
  rp01d1 r111 (.z(out[111]));
  rp01d1 r112 (.z(out[112]));
  rp01d1 r113 (.z(out[113]));
  rp01d1 r114 (.z(out[114]));
  rp01d1 r115 (.z(out[115]));
  rp01d1 r116 (.z(out[116]));
  rp01d1 r117 (.z(out[117]));
  rp01d1 r118 (.z(out[118]));
  rp01d1 r119 (.z(out[119]));
  rp01d1 r120 (.z(out[120]));
  rp01d1 r121 (.z(out[121]));
  rp01d1 r122 (.z(out[122]));
  rp01d1 r123 (.z(out[123]));
  rp01d1 r124 (.z(out[124]));
  rp01d1 r125 (.z(out[125]));
  rp01d1 r126 (.z(out[126]));
  rp01d1 r127 (.z(out[127]));

endmodule


module lsdp_mux4x1_16(z, i0, i1, i2, i3, s);
input [15:0] i0, i1, i2, i3;
input [1:0] s;
output [15:0] z;

wire s0_x,s0_y,s1_x,s1_y;

ni01d5 u_s0x(.z(s0_x), .i(s[0]));
ni01d5 u_s0y(.z(s0_y), .i(s[0]));

ni01d5 u_s1x(.z(s1_x), .i(s[1]));
ni01d5 u_s1y(.z(s1_y), .i(s[1]));

mx41d2 u_00(.z(z[ 0]),.i0(i0[ 0]),.i1(i1[ 0]),.i2(i2[ 0]),.i3(i3[ 0]),.s0(s0_x),.s1(s1_x));
mx41d2 u_01(.z(z[ 1]),.i0(i0[ 1]),.i1(i1[ 1]),.i2(i2[ 1]),.i3(i3[ 1]),.s0(s0_x),.s1(s1_x));
mx41d2 u_02(.z(z[ 2]),.i0(i0[ 2]),.i1(i1[ 2]),.i2(i2[ 2]),.i3(i3[ 2]),.s0(s0_x),.s1(s1_x));
mx41d2 u_03(.z(z[ 3]),.i0(i0[ 3]),.i1(i1[ 3]),.i2(i2[ 3]),.i3(i3[ 3]),.s0(s0_x),.s1(s1_x));
mx41d2 u_04(.z(z[ 4]),.i0(i0[ 4]),.i1(i1[ 4]),.i2(i2[ 4]),.i3(i3[ 4]),.s0(s0_x),.s1(s1_x));
mx41d2 u_05(.z(z[ 5]),.i0(i0[ 5]),.i1(i1[ 5]),.i2(i2[ 5]),.i3(i3[ 5]),.s0(s0_x),.s1(s1_x));
mx41d2 u_06(.z(z[ 6]),.i0(i0[ 6]),.i1(i1[ 6]),.i2(i2[ 6]),.i3(i3[ 6]),.s0(s0_x),.s1(s1_x));
mx41d2 u_07(.z(z[ 7]),.i0(i0[ 7]),.i1(i1[ 7]),.i2(i2[ 7]),.i3(i3[ 7]),.s0(s0_x),.s1(s1_x));

mx41d2 u_08(.z(z[ 8]),.i0(i0[ 8]),.i1(i1[ 8]),.i2(i2[ 8]),.i3(i3[ 8]),.s0(s0_y),.s1(s1_y));
mx41d2 u_09(.z(z[ 9]),.i0(i0[ 9]),.i1(i1[ 9]),.i2(i2[ 9]),.i3(i3[ 9]),.s0(s0_y),.s1(s1_y));
mx41d2 u_10(.z(z[10]),.i0(i0[10]),.i1(i1[10]),.i2(i2[10]),.i3(i3[10]),.s0(s0_y),.s1(s1_y));
mx41d2 u_11(.z(z[11]),.i0(i0[11]),.i1(i1[11]),.i2(i2[11]),.i3(i3[11]),.s0(s0_y),.s1(s1_y));
mx41d2 u_12(.z(z[12]),.i0(i0[12]),.i1(i1[12]),.i2(i2[12]),.i3(i3[12]),.s0(s0_y),.s1(s1_y));
mx41d2 u_13(.z(z[13]),.i0(i0[13]),.i1(i1[13]),.i2(i2[13]),.i3(i3[13]),.s0(s0_y),.s1(s1_y));
mx41d2 u_14(.z(z[14]),.i0(i0[14]),.i1(i1[14]),.i2(i2[14]),.i3(i3[14]),.s0(s0_y),.s1(s1_y));
mx41d2 u_15(.z(z[15]),.i0(i0[15]),.i1(i1[15]),.i2(i2[15]),.i3(i3[15]),.s0(s0_y),.s1(s1_y));

endmodule

module lsdp_mux4x1_128(z, i0, i1, i2, i3, s);
input [127:0] i0, i1, i2, i3;
input [1:0] s;
output [127:0] z;

wire [1:0] sx;

ni01d5 u_sx0(.z(sx[0]), .i(s[0]));
ni01d5 u_sx1(.z(sx[1]), .i(s[1]));

lsdp_mux4x1_16 u_1(.z(z[15:0]), .i0(i0[15:0]), .i1(i1[15:0]), .i2(i2[15:0]), .i3(i3[15:0]), .s(sx));
lsdp_mux4x1_16 u_2(.z(z[31:16]),.i0(i0[31:16]),.i1(i1[31:16]),.i2(i2[31:16]),.i3(i3[31:16]),.s(sx));
lsdp_mux4x1_16 u_3(.z(z[47:32]),.i0(i0[47:32]),.i1(i1[47:32]),.i2(i2[47:32]),.i3(i3[47:32]),.s(sx));
lsdp_mux4x1_16 u_4(.z(z[63:48]),.i0(i0[63:48]),.i1(i1[63:48]),.i2(i2[63:48]),.i3(i3[63:48]),.s(sx));
lsdp_mux4x1_16 u_5(.z(z[79:64]),.i0(i0[79:64]),.i1(i1[79:64]),.i2(i2[79:64]),.i3(i3[79:64]),.s(sx));
lsdp_mux4x1_16 u_6(.z(z[95:80]),.i0(i0[95:80]),.i1(i1[95:80]),.i2(i2[95:80]),.i3(i3[95:80]),.s(sx));
lsdp_mux4x1_16 u_7(.z(z[111:96]),.i0(i0[111:96]),.i1(i1[111:96]),.i2(i2[111:96]),.i3(i3[111:96]),.s(sx));
lsdp_mux4x1_16 u_8(.z(z[127:112]),.i0(i0[127:112]),.i1(i1[127:112]),.i2(i2[127:112]),.i3(i3[127:112]),.s(sx));
endmodule