lsdp.v 27.8 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 *************************************************************************/
// $Id: lsdp.v,v 1.1.1.1 2002/05/17 06:07:47 blythe Exp $

// lsdp.v		muxes and rotators for RSP SU and VU loads and stores

`timescale 1ns / 10ps

module lsdp (clk, reset_l, halt, pc, ex_dma_wen_swap, ex_dma_wen_noswap, 
	vu_ex_st_dec, ex_su_byte_ls, ex_su_half_ls, elem_num_3, 
	ex_mfc0, cp0_write, cp0_data, cp0_data_out, 
	rot_dp, mem_write_data, rot_amt, wb_pass_thru, 
	wb_mfc2, wb_cfc2, wb_swap_dma, wb_su_uns_ls, wb_su_load,
	dmem_dataout, vu_wb_ld_dec, ls_drive_ls, wb_dma_dm_to_rd, 
	ls_data, ls_data_out, df_datain, dmem_rd_data);

    input		clk;
    input		reset_l;
    input		halt;
						// IF Stage Inputs
    input	[11:2]	pc;
						// EX Stage Inputs
    input	[11:0]	vu_ex_st_dec;
    input		ex_su_byte_ls;
    input		ex_su_half_ls;
    input		elem_num_3;
    input		ex_mfc0;
    input		cp0_write;
    input		rot_dp;
    input		ex_dma_wen_swap;
    input		ex_dma_wen_noswap;
    input	[63:0]	mem_write_data;
    input	[3:0]	rot_amt;		// EX or WB

						// WB Stage Inputs
    input		wb_pass_thru;
    input		wb_mfc2;
    input		wb_cfc2;
    input		wb_swap_dma;
    input		wb_su_uns_ls;
    input		wb_su_load;
    input	[127:0]	dmem_dataout;
    input	[11:0]	vu_wb_ld_dec;
    input		ls_drive_ls;
    input		wb_dma_dm_to_rd;

    input	[127:0]	ls_data;
    output	[127:0]	ls_data_out;
    input	[31:0]	cp0_data;
    output	[31:0]	cp0_data_out;

    output	[127:0]	df_datain;		// final store data to dmem
    output 	[63:0] 	dmem_rd_data;

    wire		df_su_byte_ls;
    wire		df_su_half_ls;
    wire		wb_su_byte_ls;
    wire		wb_su_half_ls;
    wire	[127:0]	dma_data_to_dmem;
    wire	[127:0]	dp_to_dmem_0th;		// compaction/expansion output
    wire	[127:0]	dp_to_dmem_3rd;		// mux in dma data
    wire	[127:0]	wb_datain;	// pass-through data (MT, MF, CT, CF)
    wire	[127:0]	dmem_to_dp_raw;		// source for word rotation
    wire	[127:0]	dmem_to_dp_1st;	    	// word-rot'd dmem data to dma
    wire	[127:0]	dmem_to_dp_2nd_presxt;	// byte rotation output
    wire	[103:0]	dmem_to_dp_2nd_low;
    wire	[111:104] dmem_to_dp_2nd_mid;
    wire	[127:112] dmem_to_dp_2nd_high;
    wire	[127:0]	dmem_to_dp_2nd;		// byte rotation sign extended
    wire	[127:0]	load_data;		// load expansion output
    wire	[63:0] 	sec_rd_data;
    wire	[63:0] 	secondary_write_data;
    wire	[31:0]	cp0_source;
    wire		cp0_data_enable;

assign cp0_source = halt ? {22'b0, pc} : ls_data[127:96];
assign cp0_data_enable = cp0_write || halt;
cp0_driver cp0_driver_ls(cp0_source, cp0_data_enable, cp0_data_out);

// Datapaths to DMem

// *** Make sure ex_mfc0 is exclusive of other controls
// *** For store_4th (vu_ex_st_dec[9]), the 16'b0 in each 32-bit word are 
// *** really don't cares.  How can this be specified in Verilog without
// *** breaking this mux down into 8 16-bit muxes?
// *** Similarly, the low order 96 bits are don't cares for mfc0.

/* ???? changed since mux has encoded selects.
* reg  [127:0] dp_to_dmem_0th_reg;
* wire [3:0] dp_to_dmem_0th_sl;    
*/
wire [127:0] dp_to_dmem_0th_reg;
wire [1:0] dp_to_dmem_0th_sl;    

assign dp_to_dmem_0th = dp_to_dmem_0th_reg;

// mutual exclusion assumed
// assign dp_to_dmem_0th_sl[0] = ex_mfc0;
assign dp_to_dmem_0th_sl[0] = vu_ex_st_dec[6] || vu_ex_st_dec[7] || vu_ex_st_dec[9] ; 
assign dp_to_dmem_0th_sl[1] = vu_ex_st_dec[8] || vu_ex_st_dec[9] ;

/* ???? changed since mux has encoded selects.
* assign dp_to_dmem_0th_sl[0] = vu_ex_st_dec[6] || vu_ex_st_dec[7]; 
* assign dp_to_dmem_0th_sl[1] = vu_ex_st_dec[8];
* assign dp_to_dmem_0th_sl[2] = vu_ex_st_dec[9];
* assign dp_to_dmem_0th_sl[3] = (vu_ex_st_dec[9:6]==4'h0);
*/

/*
* ????	Changed to instantiated mux to reduce routing net therefore improve
*	post-layout timing.
*
* always @(dp_to_dmem_0th_sl or cp0_data or ls_data)
* begin
*    dp_to_dmem_0th_reg = 128'h0;
* 
*    case (1'b1) //synopsys parallel_case full_case
* //      dp_to_dmem_0th_sl[0]: dp_to_dmem_0th_reg = {cp0_data, ls_data[95:0]};
*       dp_to_dmem_0th_sl[0]: dp_to_dmem_0th_reg =					//pack, upack
*                                     {ls_data[127:120], ls_data[111:104], ls_data[95:88],  
*        			             ls_data[79:72],   ls_data[63:56],   ls_data[47:40],
*        			             ls_data[31:24],   ls_data[15:8],
*        			             ls_data[126:119], ls_data[110:103], ls_data[94:87],
*        			             ls_data[78:71],   ls_data[62:55],   ls_data[46:39],
*        			             ls_data[30:23],   ls_data[14:7]
* 			            };
*       dp_to_dmem_0th_sl[1]: dp_to_dmem_0th_reg = {ls_data[126:0], 1'b0}; 		// half
*       dp_to_dmem_0th_sl[2]: dp_to_dmem_0th_reg =					//fourth
*                                     {ls_data[126:119], ls_data[30:23], 16'b0,
*        			       ls_data[110:103], ls_data[14:7],  16'b0,
*              			       ls_data[94:87],   ls_data[62:55], 16'b0,
*              			       ls_data[78:71],   ls_data[46:39], 16'b0};
*       dp_to_dmem_0th_sl[3]: dp_to_dmem_0th_reg = ls_data;
*       default		  : dp_to_dmem_0th_reg = 128'h0;
*    endcase
* end
*/

lsdp_mux4x1_128 ls_dp_to_dmem_0th(.z(dp_to_dmem_0th_reg),
				  .s(dp_to_dmem_0th_sl),
				  .i0(ls_data),
				  .i1( {ls_data[127:120], ls_data[111:104], ls_data[95:88],  
					ls_data[79:72],   ls_data[63:56],   ls_data[47:40],
					ls_data[31:24],   ls_data[15:8],
					ls_data[126:119], ls_data[110:103], ls_data[94:87],
					ls_data[78:71],   ls_data[62:55],   ls_data[46:39],
					ls_data[30:23],   ls_data[14:7]
				       }
				     ),
				  .i2( {ls_data[126:0], 1'b0}),		// half
				  .i3( {ls_data[126:119], ls_data[30:23], 16'b0,
					ls_data[110:103], ls_data[14:7],  16'b0,
					ls_data[94:87],   ls_data[62:55], 16'b0,
					ls_data[78:71],   ls_data[46:39], 16'b0
				       }
				     )					//fourth
				 );


// Sneak path for dma to dmem to avoid collision with potential load in WB:
//assign dp_to_dmem_3rd =
//    ex_dma_wen_noswap ? dma_data_to_dmem [127:0] :
//      ex_dma_wen_swap ? {dma_data_to_dmem [63:0],dma_data_to_dmem [127:64]} :
//              ex_mfc0 ? cp0_data :
//          /* not dma */ dmem_to_dp_2nd;

wire [1:0] dp_to_dmem_3rd_sl;
assign dp_to_dmem_3rd_sl[0] = ex_dma_wen_noswap || ex_mfc0;
assign dp_to_dmem_3rd_sl[1] = ex_dma_wen_swap   || ex_mfc0;

lsdp_mux4x1_128 ls_dp_to_dmem_3rd(.z(dp_to_dmem_3rd),
				 .s(dp_to_dmem_3rd_sl[1:0]),
				 .i0(dmem_to_dp_2nd),
				 .i1(dma_data_to_dmem),
				 .i2({dma_data_to_dmem [63:0],dma_data_to_dmem [127:64]}),
				 .i3({cp0_data,96'h0})
				);

asdff #(1, 0) vu_ed_elem3_ff (df_elem_3, elem_num_3, clk, 1'b1);
asdff #(1, 0) vu_ed_byte_ff (df_su_byte_ls, ex_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_ed_half_ff (df_su_half_ls, ex_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_ed_datain_ff (df_datain, dp_to_dmem_3rd, clk, 1'b1);


asdff #(1, 0) vu_dw_elem3_ff (wb_elem_3, df_elem_3, clk, 1'b1);
asdff #(1, 0) vu_dw_byte_ff (wb_su_byte_ls, df_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_dw_half_ff (wb_su_half_ls, df_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_dw_dp_to_dm_ff (wb_datain, df_datain, clk, 1'b1);

/* ******************************************************************** */ 
// DMem to Datapaths

/* ???? changed since mux has encoded selects.
* reg [127:0] dmem_to_dp_raw_reg;
* wire [2:0] dmem_to_dp_raw_sl;
*/
wire [127:0] dmem_to_dp_raw_reg;
wire [1:0] dmem_to_dp_raw_sl;

assign dmem_to_dp_raw = dmem_to_dp_raw_reg;

// mutual exclusion assumed
assign dmem_to_dp_raw_sl[0] = rot_dp || ex_mfc0;
assign dmem_to_dp_raw_sl[1] = wb_pass_thru;

/* ???? changed since mux has encoded selects.
* assign dmem_to_dp_raw_sl[2] = (dmem_to_dp_raw_sl[1:0]==2'b00);
*/

/*
* ????	Changed to instantiated mux to reduce routing net therefore improve
*	post-layout timing.
*
* always @(dmem_to_dp_raw_sl or dp_to_dmem_0th or wb_datain or dmem_dataout)
* begin
*   dmem_to_dp_raw_reg = 128'h0;
*   case(1'b1) //synopsys parallel_case full_case
*      dmem_to_dp_raw_sl[0]: dmem_to_dp_raw_reg = dp_to_dmem_0th;
*      dmem_to_dp_raw_sl[1]: dmem_to_dp_raw_reg = wb_datain;
*      dmem_to_dp_raw_sl[2]: dmem_to_dp_raw_reg = dmem_dataout;
*      default	         : dmem_to_dp_raw_reg = 128'h0;
*   endcase
* end
*/

lsdp_mux4x1_128 ls_dmem_to_dp_raw(.z(dmem_to_dp_raw_reg),
				 .s(dmem_to_dp_raw_sl),
				 .i0(dmem_dataout),
				 .i1(dp_to_dmem_0th),
				 .i2(wb_datain),
				 .i3(128'h0)
				);



/***************************************************************/

lsdp_mux4x1_128 ls_dmem_to_dp_1st(.z(dmem_to_dp_1st),
				 .s(rot_amt[3:2]),
				 .i0(dmem_to_dp_raw),
				 .i1({dmem_to_dp_raw[31:0], dmem_to_dp_raw[127:32]}),
				 .i2({dmem_to_dp_raw[63:0], dmem_to_dp_raw[127:64]}),
				 .i3({dmem_to_dp_raw[95:0], dmem_to_dp_raw[127:96]})
				);
lsdp_mux4x1_128 ls_dmem_to_dp_2nd_presxt(.z(dmem_to_dp_2nd_presxt),
				        .s(rot_amt[1:0]),
				        .i0(dmem_to_dp_1st),
				        .i1({dmem_to_dp_1st[7:0],dmem_to_dp_1st[127:8]}),
				        .i2({dmem_to_dp_1st[15:0],dmem_to_dp_1st[127:16]}),
				        .i3({dmem_to_dp_1st[23:0],dmem_to_dp_1st[127:24]})
				       );
/***************************************************************/

/* ???? changed since mux has encoded selects.
* reg [15:0] dmem_to_dp_2nd_high_reg;
* wire [2:0] dmem_to_dp_2nd_high_sl;
*/
wire [15:0] dmem_to_dp_2nd_high_reg;
wire [1:0] dmem_to_dp_2nd_high_sl;

assign dmem_to_dp_2nd_high = dmem_to_dp_2nd_high_reg;

// mutual exclusion assumed
assign dmem_to_dp_2nd_high_sl[0] = (wb_su_load && wb_su_half_ls) || wb_cfc2 || wb_mfc2;
assign dmem_to_dp_2nd_high_sl[1] = wb_su_load && wb_su_byte_ls;
/* ???? changed since mux has encoded selects.
* assign dmem_to_dp_2nd_high_sl[0] = (wb_su_load && wb_su_half_ls) || wb_cfc2 || wb_mfc2;
* assign dmem_to_dp_2nd_high_sl[1] = wb_su_load && wb_su_byte_ls;
* assign dmem_to_dp_2nd_high_sl[2] = !dmem_to_dp_2nd_high_sl[0] && !dmem_to_dp_2nd_high_sl[1];
*/

/*
* ????	Changed to instantiated mux to reduce routing net therefore improve
*	post-layout timing.
*
* always @(dmem_to_dp_2nd_high_sl or dmem_to_dp_2nd_presxt or wb_su_uns_ls)
* begin
*   dmem_to_dp_2nd_high_reg = 16'h0;
*   case (1'b1) //synopsys parallel_case full_case
*    dmem_to_dp_2nd_high_sl[0]: dmem_to_dp_2nd_high_reg = {16{(dmem_to_dp_2nd_presxt[111] && !wb_su_uns_ls)}}; 
*    dmem_to_dp_2nd_high_sl[1]: dmem_to_dp_2nd_high_reg = {16{(dmem_to_dp_2nd_presxt[103] && !wb_su_uns_ls)}};
*    dmem_to_dp_2nd_high_sl[2]: dmem_to_dp_2nd_high_reg = dmem_to_dp_2nd_presxt[127:112];
*    default		    : dmem_to_dp_2nd_high_reg = 16'h0;
*   endcase
* end
* 
*/

lsdp_mux4x1_16 ls_dmem_to_dp_2nd_high (	.z(dmem_to_dp_2nd_high_reg),
					.s(dmem_to_dp_2nd_high_sl),
					.i0(dmem_to_dp_2nd_presxt[127:112]),
					.i1({16{(dmem_to_dp_2nd_presxt[111] && !wb_su_uns_ls)}}),
					.i2({16{(dmem_to_dp_2nd_presxt[103] && !wb_su_uns_ls)}}),
					.i3(16'h0)
				      );


/***************************************************************/

assign dmem_to_dp_2nd_mid = 
   (wb_su_load && wb_su_byte_ls) ? 
		{8{(dmem_to_dp_2nd_presxt[103] && !wb_su_uns_ls)}} :  
		dmem_to_dp_2nd_presxt[111:104];

assign dmem_to_dp_2nd_low = dmem_to_dp_2nd_presxt[103:0];

assign dmem_to_dp_2nd = 
	{dmem_to_dp_2nd_high, dmem_to_dp_2nd_mid, dmem_to_dp_2nd_low};

reg [127:0] load_data_reg;
wire [4:0] load_data_sl;

assign load_data = load_data_reg;

// mutual exclusion assumed
assign load_data_sl[0] = vu_wb_ld_dec[6];
assign load_data_sl[1] = vu_wb_ld_dec[7];
assign load_data_sl[2] = vu_wb_ld_dec[8];
assign load_data_sl[3] = vu_wb_ld_dec[9];
assign load_data_sl[4] = (vu_wb_ld_dec[9:6]==4'h0);

always @(load_data_sl or dmem_to_dp_2nd)
begin

  load_data_reg = 128'h0;
  case (1'b1) //synopsys parallel_case full_case
   load_data_sl[0]: load_data_reg =				// pack 
		{dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
       		dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96],  8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[87:80],   8'b0,
       		dmem_to_dp_2nd[79:72],   8'b0, dmem_to_dp_2nd[71:64],   8'b0};
   load_data_sl[1]: load_data_reg = {1'b0,                     // unsigned pack
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
       		dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96],  8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[87:80],   8'b0,
       		dmem_to_dp_2nd[79:72],   8'b0, dmem_to_dp_2nd[71:64],   7'b0}; 
   load_data_sl[2]: load_data_reg = {1'b0,                     // half
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[111:104], 8'b0,
       		dmem_to_dp_2nd[95:88],   8'b0, dmem_to_dp_2nd[79:72],   8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[47:40],   8'b0,
           	dmem_to_dp_2nd[31:24],   8'b0, dmem_to_dp_2nd[15:8],    7'b0};
   load_data_sl[3]: load_data_reg = {1'b0,                     // fourth
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[31:24], 8'b0,
       		dmem_to_dp_2nd[63:56],   8'b0, dmem_to_dp_2nd[31:24], 8'b0,
       		dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 7'b0};
   load_data_sl[4]: load_data_reg = dmem_to_dp_2nd;
   default	  : load_data_reg = 128'h0;
  endcase	    
end 


ls_data_driver ls_data_driver (
	.in(load_data), 
	.out(ls_data_out), 
	.enable(ls_drive_ls));

// DMA DMem to RDRAM
wire [63:0] sec_rd_data_in;
assign sec_rd_data_in = 
	!wb_swap_dma ? dmem_to_dp_raw[63:0] : dmem_to_dp_raw[127:64];
asdff #(64,0) dma_sec_rd_ff (sec_rd_data,sec_rd_data_in,clk,reset_l);
assign dmem_rd_data = 
	wb_dma_dm_to_rd && !wb_swap_dma ? dmem_to_dp_raw[127:64] :
	wb_dma_dm_to_rd && wb_swap_dma  ? dmem_to_dp_raw[63:0] :
					 sec_rd_data;

// DMA RDRAM to DMem
asdff #(64,0) dma_sec_wr_ff (secondary_write_data, mem_write_data,clk,reset_l);
assign dma_data_to_dmem = {secondary_write_data, mem_write_data};

endmodule

module ls_data_driver ( in, out, enable);

input  [127:0] in;
input          enable;
inout  [127:0] out;

// enable buffer tree
// wire enablen, enable0, enable1, enable2, enable3, enable4, enable5;

  in01d5 ib(.i(enable),  .zn(enablen));

  in01d5 i0(.i(enablen), .zn(enable0));
  in01d5 i1(.i(enablen), .zn(enable1));
  in01d5 i2(.i(enablen), .zn(enable2));
  in01d5 i3(.i(enablen), .zn(enable3));
  in01d5 i4(.i(enablen), .zn(enable4));
  in01d5 i5(.i(enablen), .zn(enable5));

// output buffers
  nt01d5 b0 (.i(in[0]), .z(out[0]), .oe(enable0));
  nt01d5 b1 (.i(in[1]), .z(out[1]), .oe(enable0));
  nt01d5 b2 (.i(in[2]), .z(out[2]), .oe(enable0));
  nt01d5 b3 (.i(in[3]), .z(out[3]), .oe(enable0));
  nt01d5 b4 (.i(in[4]), .z(out[4]), .oe(enable0));
  nt01d5 b5 (.i(in[5]), .z(out[5]), .oe(enable0));
  nt01d5 b6 (.i(in[6]), .z(out[6]), .oe(enable0));
  nt01d5 b7 (.i(in[7]), .z(out[7]), .oe(enable0));
  nt01d5 b8 (.i(in[8]), .z(out[8]), .oe(enable0));
  nt01d5 b9 (.i(in[9]), .z(out[9]), .oe(enable0));
  nt01d5 b10 (.i(in[10]), .z(out[10]), .oe(enable0));
  nt01d5 b11 (.i(in[11]), .z(out[11]), .oe(enable0));
  nt01d5 b12 (.i(in[12]), .z(out[12]), .oe(enable0));
  nt01d5 b13 (.i(in[13]), .z(out[13]), .oe(enable0));
  nt01d5 b14 (.i(in[14]), .z(out[14]), .oe(enable0));
  nt01d5 b15 (.i(in[15]), .z(out[15]), .oe(enable0));
  nt01d5 b16 (.i(in[16]), .z(out[16]), .oe(enable0));
  nt01d5 b17 (.i(in[17]), .z(out[17]), .oe(enable0));
  nt01d5 b18 (.i(in[18]), .z(out[18]), .oe(enable0));
  nt01d5 b19 (.i(in[19]), .z(out[19]), .oe(enable0));
  nt01d5 b20 (.i(in[20]), .z(out[20]), .oe(enable1));
  nt01d5 b21 (.i(in[21]), .z(out[21]), .oe(enable1));
  nt01d5 b22 (.i(in[22]), .z(out[22]), .oe(enable1));
  nt01d5 b23 (.i(in[23]), .z(out[23]), .oe(enable1));
  nt01d5 b24 (.i(in[24]), .z(out[24]), .oe(enable1));
  nt01d5 b25 (.i(in[25]), .z(out[25]), .oe(enable1));
  nt01d5 b26 (.i(in[26]), .z(out[26]), .oe(enable1));
  nt01d5 b27 (.i(in[27]), .z(out[27]), .oe(enable1));
  nt01d5 b28 (.i(in[28]), .z(out[28]), .oe(enable1));
  nt01d5 b29 (.i(in[29]), .z(out[29]), .oe(enable1));
  nt01d5 b30 (.i(in[30]), .z(out[30]), .oe(enable1));
  nt01d5 b31 (.i(in[31]), .z(out[31]), .oe(enable1));
  nt01d5 b32 (.i(in[32]), .z(out[32]), .oe(enable1));
  nt01d5 b33 (.i(in[33]), .z(out[33]), .oe(enable1));
  nt01d5 b34 (.i(in[34]), .z(out[34]), .oe(enable1));
  nt01d5 b35 (.i(in[35]), .z(out[35]), .oe(enable1));
  nt01d5 b36 (.i(in[36]), .z(out[36]), .oe(enable1));
  nt01d5 b37 (.i(in[37]), .z(out[37]), .oe(enable1));
  nt01d5 b38 (.i(in[38]), .z(out[38]), .oe(enable1));
  nt01d5 b39 (.i(in[39]), .z(out[39]), .oe(enable1));
  nt01d5 b40 (.i(in[40]), .z(out[40]), .oe(enable2));
  nt01d5 b41 (.i(in[41]), .z(out[41]), .oe(enable2));
  nt01d5 b42 (.i(in[42]), .z(out[42]), .oe(enable2));
  nt01d5 b43 (.i(in[43]), .z(out[43]), .oe(enable2));
  nt01d5 b44 (.i(in[44]), .z(out[44]), .oe(enable2));
  nt01d5 b45 (.i(in[45]), .z(out[45]), .oe(enable2));
  nt01d5 b46 (.i(in[46]), .z(out[46]), .oe(enable2));
  nt01d5 b47 (.i(in[47]), .z(out[47]), .oe(enable2));
  nt01d5 b48 (.i(in[48]), .z(out[48]), .oe(enable2));
  nt01d5 b49 (.i(in[49]), .z(out[49]), .oe(enable2));
  nt01d5 b50 (.i(in[50]), .z(out[50]), .oe(enable2));
  nt01d5 b51 (.i(in[51]), .z(out[51]), .oe(enable2));
  nt01d5 b52 (.i(in[52]), .z(out[52]), .oe(enable2));
  nt01d5 b53 (.i(in[53]), .z(out[53]), .oe(enable2));
  nt01d5 b54 (.i(in[54]), .z(out[54]), .oe(enable2));
  nt01d5 b55 (.i(in[55]), .z(out[55]), .oe(enable2));
  nt01d5 b56 (.i(in[56]), .z(out[56]), .oe(enable2));
  nt01d5 b57 (.i(in[57]), .z(out[57]), .oe(enable2));
  nt01d5 b58 (.i(in[58]), .z(out[58]), .oe(enable2));
  nt01d5 b59 (.i(in[59]), .z(out[59]), .oe(enable2));
  nt01d5 b60 (.i(in[60]), .z(out[60]), .oe(enable3));
  nt01d5 b61 (.i(in[61]), .z(out[61]), .oe(enable3));
  nt01d5 b62 (.i(in[62]), .z(out[62]), .oe(enable3));
  nt01d5 b63 (.i(in[63]), .z(out[63]), .oe(enable3));
  nt01d5 b64 (.i(in[64]), .z(out[64]), .oe(enable3));
  nt01d5 b65 (.i(in[65]), .z(out[65]), .oe(enable3));
  nt01d5 b66 (.i(in[66]), .z(out[66]), .oe(enable3));
  nt01d5 b67 (.i(in[67]), .z(out[67]), .oe(enable3));
  nt01d5 b68 (.i(in[68]), .z(out[68]), .oe(enable3));
  nt01d5 b69 (.i(in[69]), .z(out[69]), .oe(enable3));
  nt01d5 b70 (.i(in[70]), .z(out[70]), .oe(enable3));
  nt01d5 b71 (.i(in[71]), .z(out[71]), .oe(enable3));
  nt01d5 b72 (.i(in[72]), .z(out[72]), .oe(enable3));
  nt01d5 b73 (.i(in[73]), .z(out[73]), .oe(enable3));
  nt01d5 b74 (.i(in[74]), .z(out[74]), .oe(enable3));
  nt01d5 b75 (.i(in[75]), .z(out[75]), .oe(enable3));
  nt01d5 b76 (.i(in[76]), .z(out[76]), .oe(enable3));
  nt01d5 b77 (.i(in[77]), .z(out[77]), .oe(enable3));
  nt01d5 b78 (.i(in[78]), .z(out[78]), .oe(enable3));
  nt01d5 b79 (.i(in[79]), .z(out[79]), .oe(enable3));
  nt01d5 b80 (.i(in[80]), .z(out[80]), .oe(enable4));
  nt01d5 b81 (.i(in[81]), .z(out[81]), .oe(enable4));
  nt01d5 b82 (.i(in[82]), .z(out[82]), .oe(enable4));
  nt01d5 b83 (.i(in[83]), .z(out[83]), .oe(enable4));
  nt01d5 b84 (.i(in[84]), .z(out[84]), .oe(enable4));
  nt01d5 b85 (.i(in[85]), .z(out[85]), .oe(enable4));
  nt01d5 b86 (.i(in[86]), .z(out[86]), .oe(enable4));
  nt01d5 b87 (.i(in[87]), .z(out[87]), .oe(enable4));
  nt01d5 b88 (.i(in[88]), .z(out[88]), .oe(enable4));
  nt01d5 b89 (.i(in[89]), .z(out[89]), .oe(enable4));
  nt01d5 b90 (.i(in[90]), .z(out[90]), .oe(enable4));
  nt01d5 b91 (.i(in[91]), .z(out[91]), .oe(enable4));
  nt01d5 b92 (.i(in[92]), .z(out[92]), .oe(enable4));
  nt01d5 b93 (.i(in[93]), .z(out[93]), .oe(enable4));
  nt01d5 b94 (.i(in[94]), .z(out[94]), .oe(enable4));
  nt01d5 b95 (.i(in[95]), .z(out[95]), .oe(enable4));
  nt01d5 b96 (.i(in[96]), .z(out[96]), .oe(enable4));
  nt01d5 b97 (.i(in[97]), .z(out[97]), .oe(enable4));
  nt01d5 b98 (.i(in[98]), .z(out[98]), .oe(enable4));
  nt01d5 b99 (.i(in[99]), .z(out[99]), .oe(enable4));
  nt01d5 b100 (.i(in[100]), .z(out[100]), .oe(enable5));
  nt01d5 b101 (.i(in[101]), .z(out[101]), .oe(enable5));
  nt01d5 b102 (.i(in[102]), .z(out[102]), .oe(enable5));
  nt01d5 b103 (.i(in[103]), .z(out[103]), .oe(enable5));
  nt01d5 b104 (.i(in[104]), .z(out[104]), .oe(enable5));
  nt01d5 b105 (.i(in[105]), .z(out[105]), .oe(enable5));
  nt01d5 b106 (.i(in[106]), .z(out[106]), .oe(enable5));
  nt01d5 b107 (.i(in[107]), .z(out[107]), .oe(enable5));
  nt01d5 b108 (.i(in[108]), .z(out[108]), .oe(enable5));
  nt01d5 b109 (.i(in[109]), .z(out[109]), .oe(enable5));
  nt01d5 b110 (.i(in[110]), .z(out[110]), .oe(enable5));
  nt01d5 b111 (.i(in[111]), .z(out[111]), .oe(enable5));
  nt01d5 b112 (.i(in[112]), .z(out[112]), .oe(enable5));
  nt01d5 b113 (.i(in[113]), .z(out[113]), .oe(enable5));
  nt01d5 b114 (.i(in[114]), .z(out[114]), .oe(enable5));
  nt01d5 b115 (.i(in[115]), .z(out[115]), .oe(enable5));
  nt01d5 b116 (.i(in[116]), .z(out[116]), .oe(enable5));
  nt01d5 b117 (.i(in[117]), .z(out[117]), .oe(enable5));
  nt01d5 b118 (.i(in[118]), .z(out[118]), .oe(enable5));
  nt01d5 b119 (.i(in[119]), .z(out[119]), .oe(enable5));
  nt01d5 b120 (.i(in[120]), .z(out[120]), .oe(enable5));
  nt01d5 b121 (.i(in[121]), .z(out[121]), .oe(enable5));
  nt01d5 b122 (.i(in[122]), .z(out[122]), .oe(enable5));
  nt01d5 b123 (.i(in[123]), .z(out[123]), .oe(enable5));
  nt01d5 b124 (.i(in[124]), .z(out[124]), .oe(enable5));
  nt01d5 b125 (.i(in[125]), .z(out[125]), .oe(enable5));
  nt01d5 b126 (.i(in[126]), .z(out[126]), .oe(enable5));
  nt01d5 b127 (.i(in[127]), .z(out[127]), .oe(enable5));

// repeaters

  rp01d1 r0 (.z(out[0]));
  rp01d1 r1 (.z(out[1]));
  rp01d1 r2 (.z(out[2]));
  rp01d1 r3 (.z(out[3]));
  rp01d1 r4 (.z(out[4]));
  rp01d1 r5 (.z(out[5]));
  rp01d1 r6 (.z(out[6]));
  rp01d1 r7 (.z(out[7]));
  rp01d1 r8 (.z(out[8]));
  rp01d1 r9 (.z(out[9]));
  rp01d1 r10 (.z(out[10]));
  rp01d1 r11 (.z(out[11]));
  rp01d1 r12 (.z(out[12]));
  rp01d1 r13 (.z(out[13]));
  rp01d1 r14 (.z(out[14]));
  rp01d1 r15 (.z(out[15]));
  rp01d1 r16 (.z(out[16]));
  rp01d1 r17 (.z(out[17]));
  rp01d1 r18 (.z(out[18]));
  rp01d1 r19 (.z(out[19]));
  rp01d1 r20 (.z(out[20]));
  rp01d1 r21 (.z(out[21]));
  rp01d1 r22 (.z(out[22]));
  rp01d1 r23 (.z(out[23]));
  rp01d1 r24 (.z(out[24]));
  rp01d1 r25 (.z(out[25]));
  rp01d1 r26 (.z(out[26]));
  rp01d1 r27 (.z(out[27]));
  rp01d1 r28 (.z(out[28]));
  rp01d1 r29 (.z(out[29]));
  rp01d1 r30 (.z(out[30]));
  rp01d1 r31 (.z(out[31]));
  rp01d1 r32 (.z(out[32]));
  rp01d1 r33 (.z(out[33]));
  rp01d1 r34 (.z(out[34]));
  rp01d1 r35 (.z(out[35]));
  rp01d1 r36 (.z(out[36]));
  rp01d1 r37 (.z(out[37]));
  rp01d1 r38 (.z(out[38]));
  rp01d1 r39 (.z(out[39]));
  rp01d1 r40 (.z(out[40]));
  rp01d1 r41 (.z(out[41]));
  rp01d1 r42 (.z(out[42]));
  rp01d1 r43 (.z(out[43]));
  rp01d1 r44 (.z(out[44]));
  rp01d1 r45 (.z(out[45]));
  rp01d1 r46 (.z(out[46]));
  rp01d1 r47 (.z(out[47]));
  rp01d1 r48 (.z(out[48]));
  rp01d1 r49 (.z(out[49]));
  rp01d1 r50 (.z(out[50]));
  rp01d1 r51 (.z(out[51]));
  rp01d1 r52 (.z(out[52]));
  rp01d1 r53 (.z(out[53]));
  rp01d1 r54 (.z(out[54]));
  rp01d1 r55 (.z(out[55]));
  rp01d1 r56 (.z(out[56]));
  rp01d1 r57 (.z(out[57]));
  rp01d1 r58 (.z(out[58]));
  rp01d1 r59 (.z(out[59]));
  rp01d1 r60 (.z(out[60]));
  rp01d1 r61 (.z(out[61]));
  rp01d1 r62 (.z(out[62]));
  rp01d1 r63 (.z(out[63]));
  rp01d1 r64 (.z(out[64]));
  rp01d1 r65 (.z(out[65]));
  rp01d1 r66 (.z(out[66]));
  rp01d1 r67 (.z(out[67]));
  rp01d1 r68 (.z(out[68]));
  rp01d1 r69 (.z(out[69]));
  rp01d1 r70 (.z(out[70]));
  rp01d1 r71 (.z(out[71]));
  rp01d1 r72 (.z(out[72]));
  rp01d1 r73 (.z(out[73]));
  rp01d1 r74 (.z(out[74]));
  rp01d1 r75 (.z(out[75]));
  rp01d1 r76 (.z(out[76]));
  rp01d1 r77 (.z(out[77]));
  rp01d1 r78 (.z(out[78]));
  rp01d1 r79 (.z(out[79]));
  rp01d1 r80 (.z(out[80]));
  rp01d1 r81 (.z(out[81]));
  rp01d1 r82 (.z(out[82]));
  rp01d1 r83 (.z(out[83]));
  rp01d1 r84 (.z(out[84]));
  rp01d1 r85 (.z(out[85]));
  rp01d1 r86 (.z(out[86]));
  rp01d1 r87 (.z(out[87]));
  rp01d1 r88 (.z(out[88]));
  rp01d1 r89 (.z(out[89]));
  rp01d1 r90 (.z(out[90]));
  rp01d1 r91 (.z(out[91]));
  rp01d1 r92 (.z(out[92]));
  rp01d1 r93 (.z(out[93]));
  rp01d1 r94 (.z(out[94]));
  rp01d1 r95 (.z(out[95]));
  rp01d1 r96 (.z(out[96]));
  rp01d1 r97 (.z(out[97]));
  rp01d1 r98 (.z(out[98]));
  rp01d1 r99 (.z(out[99]));
  rp01d1 r100 (.z(out[100]));
  rp01d1 r101 (.z(out[101]));
  rp01d1 r102 (.z(out[102]));
  rp01d1 r103 (.z(out[103]));
  rp01d1 r104 (.z(out[104]));
  rp01d1 r105 (.z(out[105]));
  rp01d1 r106 (.z(out[106]));
  rp01d1 r107 (.z(out[107]));
  rp01d1 r108 (.z(out[108]));
  rp01d1 r109 (.z(out[109]));
  rp01d1 r110 (.z(out[110]));
  rp01d1 r111 (.z(out[111]));
  rp01d1 r112 (.z(out[112]));
  rp01d1 r113 (.z(out[113]));
  rp01d1 r114 (.z(out[114]));
  rp01d1 r115 (.z(out[115]));
  rp01d1 r116 (.z(out[116]));
  rp01d1 r117 (.z(out[117]));
  rp01d1 r118 (.z(out[118]));
  rp01d1 r119 (.z(out[119]));
  rp01d1 r120 (.z(out[120]));
  rp01d1 r121 (.z(out[121]));
  rp01d1 r122 (.z(out[122]));
  rp01d1 r123 (.z(out[123]));
  rp01d1 r124 (.z(out[124]));
  rp01d1 r125 (.z(out[125]));
  rp01d1 r126 (.z(out[126]));
  rp01d1 r127 (.z(out[127]));

endmodule


module lsdp_mux4x1_16(z, i0, i1, i2, i3, s);
input [15:0] i0, i1, i2, i3;
input [1:0] s;
output [15:0] z;

wire s0_x,s0_y,s1_x,s1_y;

ni01d5 u_s0x(.z(s0_x), .i(s[0]));
ni01d5 u_s0y(.z(s0_y), .i(s[0]));

ni01d5 u_s1x(.z(s1_x), .i(s[1]));
ni01d5 u_s1y(.z(s1_y), .i(s[1]));

mx41d2 u_00(.z(z[ 0]),.i0(i0[ 0]),.i1(i1[ 0]),.i2(i2[ 0]),.i3(i3[ 0]),.s0(s0_x),.s1(s1_x));
mx41d2 u_01(.z(z[ 1]),.i0(i0[ 1]),.i1(i1[ 1]),.i2(i2[ 1]),.i3(i3[ 1]),.s0(s0_x),.s1(s1_x));
mx41d2 u_02(.z(z[ 2]),.i0(i0[ 2]),.i1(i1[ 2]),.i2(i2[ 2]),.i3(i3[ 2]),.s0(s0_x),.s1(s1_x));
mx41d2 u_03(.z(z[ 3]),.i0(i0[ 3]),.i1(i1[ 3]),.i2(i2[ 3]),.i3(i3[ 3]),.s0(s0_x),.s1(s1_x));
mx41d2 u_04(.z(z[ 4]),.i0(i0[ 4]),.i1(i1[ 4]),.i2(i2[ 4]),.i3(i3[ 4]),.s0(s0_x),.s1(s1_x));
mx41d2 u_05(.z(z[ 5]),.i0(i0[ 5]),.i1(i1[ 5]),.i2(i2[ 5]),.i3(i3[ 5]),.s0(s0_x),.s1(s1_x));
mx41d2 u_06(.z(z[ 6]),.i0(i0[ 6]),.i1(i1[ 6]),.i2(i2[ 6]),.i3(i3[ 6]),.s0(s0_x),.s1(s1_x));
mx41d2 u_07(.z(z[ 7]),.i0(i0[ 7]),.i1(i1[ 7]),.i2(i2[ 7]),.i3(i3[ 7]),.s0(s0_x),.s1(s1_x));

mx41d2 u_08(.z(z[ 8]),.i0(i0[ 8]),.i1(i1[ 8]),.i2(i2[ 8]),.i3(i3[ 8]),.s0(s0_y),.s1(s1_y));
mx41d2 u_09(.z(z[ 9]),.i0(i0[ 9]),.i1(i1[ 9]),.i2(i2[ 9]),.i3(i3[ 9]),.s0(s0_y),.s1(s1_y));
mx41d2 u_10(.z(z[10]),.i0(i0[10]),.i1(i1[10]),.i2(i2[10]),.i3(i3[10]),.s0(s0_y),.s1(s1_y));
mx41d2 u_11(.z(z[11]),.i0(i0[11]),.i1(i1[11]),.i2(i2[11]),.i3(i3[11]),.s0(s0_y),.s1(s1_y));
mx41d2 u_12(.z(z[12]),.i0(i0[12]),.i1(i1[12]),.i2(i2[12]),.i3(i3[12]),.s0(s0_y),.s1(s1_y));
mx41d2 u_13(.z(z[13]),.i0(i0[13]),.i1(i1[13]),.i2(i2[13]),.i3(i3[13]),.s0(s0_y),.s1(s1_y));
mx41d2 u_14(.z(z[14]),.i0(i0[14]),.i1(i1[14]),.i2(i2[14]),.i3(i3[14]),.s0(s0_y),.s1(s1_y));
mx41d2 u_15(.z(z[15]),.i0(i0[15]),.i1(i1[15]),.i2(i2[15]),.i3(i3[15]),.s0(s0_y),.s1(s1_y));

endmodule

module lsdp_mux4x1_128(z, i0, i1, i2, i3, s);
input [127:0] i0, i1, i2, i3;
input [1:0] s;
output [127:0] z;

wire [1:0] sx;

ni01d5 u_sx0(.z(sx[0]), .i(s[0]));
ni01d5 u_sx1(.z(sx[1]), .i(s[1]));

lsdp_mux4x1_16 u_1(.z(z[15:0]), .i0(i0[15:0]), .i1(i1[15:0]), .i2(i2[15:0]), .i3(i3[15:0]), .s(sx));
lsdp_mux4x1_16 u_2(.z(z[31:16]),.i0(i0[31:16]),.i1(i1[31:16]),.i2(i2[31:16]),.i3(i3[31:16]),.s(sx));
lsdp_mux4x1_16 u_3(.z(z[47:32]),.i0(i0[47:32]),.i1(i1[47:32]),.i2(i2[47:32]),.i3(i3[47:32]),.s(sx));
lsdp_mux4x1_16 u_4(.z(z[63:48]),.i0(i0[63:48]),.i1(i1[63:48]),.i2(i2[63:48]),.i3(i3[63:48]),.s(sx));
lsdp_mux4x1_16 u_5(.z(z[79:64]),.i0(i0[79:64]),.i1(i1[79:64]),.i2(i2[79:64]),.i3(i3[79:64]),.s(sx));
lsdp_mux4x1_16 u_6(.z(z[95:80]),.i0(i0[95:80]),.i1(i1[95:80]),.i2(i2[95:80]),.i3(i3[95:80]),.s(sx));
lsdp_mux4x1_16 u_7(.z(z[111:96]),.i0(i0[111:96]),.i1(i1[111:96]),.i2(i2[111:96]),.i3(i3[111:96]),.s(sx));
lsdp_mux4x1_16 u_8(.z(z[127:112]),.i0(i0[127:112]),.i1(i1[127:112]),.i2(i2[127:112]),.i3(i3[127:112]),.s(sx));
endmodule