lsctl.v 13.6 KB
/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1994, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright  law.  They  may not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 *************************************************************************/
// $Id: lsctl.v,v 1.3 2002/11/17 19:55:47 berndt Exp $

// lsctl.v		control section for the RSP load/store unit,
//			generating addresses and write enables.

`timescale 1ns / 10ps

module lsctl (clk, reset_l, rd_base, ls_drive_rd_base, ls_base, 
	rd_offset, rd_elem_num,
	address, df_ls_drive_ls_in_wb, df_pass_thru, 
	su_ex_store, su_ex_load, vu_ex_store, vu_ex_load, 
	ex_mtc2, ex_mfc2, ex_cfc2, 
	vu_rd_ld_dec_k, vu_rd_st_dec_k, vu_ex_st_dec, vu_wb_ld_dec, 
	vu_bwe, chip_sel, 
	ex_dma_wen_noswap, ex_dma_wen_swap, df_wen_l, df_chip_sel_l,
	df_addr_low, df_addr_high, debug_df_dma_rd_to_dm, 
	ex_su_byte_ls, ex_su_half_ls, ex_su_uns_ls,
 	dma_address, dma_wen, ex_dma_rd_to_dm, ex_dma_dm_to_rd,
	ex_rot, wb_rot, wb_dma_dm_to_rd, 
	wb_su_uns_ls, wb_su_load, wb_pass_thru, wb_mfc2, wb_cfc2, 
	ls_drive_ls);

    input		clk;
    input		reset_l;
						// RD Stage Inputs
    input 	[3:0] 	rd_base;
    input 	 	ls_drive_rd_base;
    input	[3:0]	ls_base;
    input 	[3:0] 	rd_offset;
    input	[3:0]	rd_elem_num;

    input	[11:0]	vu_rd_ld_dec_k;
    input	[11:0]	vu_rd_st_dec_k;
						// EX Stage Inputs
    input	[11:0]	address;		// byte address
    input		su_ex_store;
    input		su_ex_load;
    input		ex_su_byte_ls;
    input		ex_su_half_ls;
    input		ex_su_uns_ls;
    input		vu_ex_store;
    input		vu_ex_load;
    input		ex_mtc2;
    input		ex_mfc2;
    input		ex_cfc2;
    input		chip_sel;
    input	[11:3]	dma_address;
    input	[3:0]	dma_wen;		// active high
    input 		ex_dma_rd_to_dm;
    input 		ex_dma_dm_to_rd;
						// DF Stage Inputs
    input		df_ls_drive_ls_in_wb;
    input		df_pass_thru;

						// EX Stage Outputs
    output	[3:0]	ex_rot;
    output	[3:0]	wb_rot;
    output	[11:0]	vu_ex_st_dec;
    output		ex_dma_wen_noswap;	// dma write
    output		ex_dma_wen_swap;	// dma write, swap 8-byte words

						// DF Stage Outputs
    output		df_chip_sel_l;		// dmem chip select, active low
    output	[15:0]	df_wen_l;		// dmem wr enable, active low
    output	[11:0]	df_addr_low;
    output	[11:0]	df_addr_high;
    output		debug_df_dma_rd_to_dm;	// for debug only

						// WB Stage Outputs
    output		wb_dma_dm_to_rd;
    output		wb_su_uns_ls;
    output		wb_su_load;
    output		wb_pass_thru;
    output		wb_mfc2;
    output		wb_cfc2;
    output	[9:6]	vu_wb_ld_dec;
    output	[15:0]	vu_bwe;
    output		ls_drive_ls;


    wire	[3:0]	ls_rd_base;
    wire	[3:0]	ex_base;
    wire	[3:0]	ex_offset;
    wire	[3:0]	ex_elem_num;
    wire	[3:0]	ex_addr_low;
    wire		su_df_load;
    wire		df_mtc2;
    wire		df_mfc2;
    wire		df_cfc2;
    wire		vu_df_load;
    wire	[10:0]	vu_ex_ld_dec;
    wire		q_st;
    wire	[10:0]	vu_df_ld_dec;
    wire	[15:0]	wen_l_raw;		// dp to dmem write enables
    reg		[15:0]	wen_l_1st;
    reg		[15:0]	wen_l_2nd;
    wire	[15:0]	wen_l_3rd;
    reg		[3:0]	ex_rot;
    wire	[11:0]	addr_low;
    wire		inc_addr_high;
    wire		chip_sel_l;
    wire		df_chip_sel_tmp;

    wire	[3:0]	tmp_df_rot;
    wire	[3:0]	df_rot;
    wire		df_su_uns_ls;

    wire	[15:0]	vu_bwe_raw;
    wire	[15:0]	vu_bwe_1st;
    wire	[15:0]	vu_bwe_2nd;
    wire	[3:0]	wb_addr;
    wire	[3:0]	df_elem;
    wire	[15:0]	vu_mask_raw;
    wire	[15:0]	vu_mask_1st;
    wire	[15:0]	vu_mask_2nd;

    // DMA interface signals

    wire		dma_cycle;			// EX stage 
    wire 		df_dma_dm_to_rd;


assign ls_rd_base = ls_drive_rd_base ? ls_base : rd_base;

asdff #(4, 0) ls_re_elem_ff (ex_elem_num, rd_elem_num, clk, 1'b1);
asdff #(11, 0) ls_re_ld_dec_ff (vu_ex_ld_dec, vu_rd_ld_dec_k[10:0], clk, 1'b1);
asdff #(12, 0) ls_re_st_dec_ff (vu_ex_st_dec, vu_rd_st_dec_k, clk, 1'b1);
asdff #(4, 0) ls_re_base_ff (ex_base, ls_rd_base, clk, 1'b1);
asdff #(4, 0) ls_re_offset_ff (ex_offset, rd_offset, clk, 1'b1);

assign dma_cycle = ex_dma_dm_to_rd || ex_dma_rd_to_dm;
assign ex_dma_wen_noswap = (dma_wen != 4'b0) && !dma_address[3];
assign ex_dma_wen_swap = (dma_wen != 4'b0) && dma_address[3];

assign addr_low = dma_cycle ? {dma_address, 3'b0} : address;
assign inc_addr_high = !(vu_ex_ld_dec[5] || vu_ex_st_dec[5] || !addr_low[3]);
assign chip_sel_l = !(chip_sel || dma_cycle);

// Rotate amount is computed here for the cases that use it in EX.  More 
// terms are added in DF for WB use.

wire [9:0] ex_rot_sel;

wire [3:0] ex_rot_addr_low;
wire [3:0] ex_rot_data0;
wire [3:0] ex_rot_data1;
wire [3:0] ex_rot_data2;
wire [3:0] ex_rot_data3;
wire [3:0] ex_rot_data4;
wire [3:0] ex_rot_data5;
wire [3:0] ex_rot_data6;
wire [3:0] ex_rot_data9;

assign ex_rot_sel[0] = vu_ex_st_dec[7];
assign ex_rot_sel[1] = vu_ex_st_dec[9] && ex_elem_num[3];
assign ex_rot_sel[2] = vu_ex_store && !ex_rot_sel[0] && !ex_rot_sel[1] && !ex_rot_sel[7];
assign ex_rot_sel[3] = su_ex_store && ex_su_byte_ls;
assign ex_rot_sel[4] = su_ex_store && ex_su_half_ls;
assign ex_rot_sel[5] = ex_mtc2;
assign ex_rot_sel[6] = vu_ex_ld_dec[10];
assign ex_rot_sel[7] = vu_ex_st_dec[10];
assign ex_rot_sel[8] = su_ex_store && !ex_su_byte_ls && !ex_su_half_ls;
assign ex_rot_sel[9] = ex_mfc2;

ls_ex_rot_values  u_ex_rot_values(.rd_addr_30(ls_rd_base),
				  .rd_inst_data_30(rd_offset),
				  .rd_elem_num(rd_elem_num),
				  .clk(clk),
				  //
				  .ex_rot_addr_low(ex_rot_addr_low),
			          .ex_rot_data0(ex_rot_data0),
			          .ex_rot_data1(ex_rot_data1),
			          .ex_rot_data2(ex_rot_data2),
			          .ex_rot_data3(ex_rot_data3),
			          .ex_rot_data4(ex_rot_data4),
			          .ex_rot_data5(ex_rot_data5),
			          .ex_rot_data6(ex_rot_data6),
			          .ex_rot_data9(ex_rot_data9)
			         );

always @(ex_rot_sel or ex_rot_addr_low or ex_rot_data0 or 
	 ex_rot_data2 or ex_rot_data1 or ex_rot_data3 or 
	 ex_rot_data4 or ex_rot_data5 or ex_rot_data6 or ex_rot_data9)
 begin
   ex_rot = 4'b0;
   case (1'b1) //
    ex_rot_sel[0]: ex_rot = ex_rot_data0;
    ex_rot_sel[1]: ex_rot = ex_rot_data1;
    ex_rot_sel[2]: ex_rot = ex_rot_data2;
    ex_rot_sel[3]: ex_rot = ex_rot_data3;
    ex_rot_sel[4]: ex_rot = ex_rot_data4;
    ex_rot_sel[5]: ex_rot = ex_rot_data5;
    ex_rot_sel[6]: ex_rot = ex_rot_data6;
    ex_rot_sel[7]: ex_rot = ex_rot_addr_low;
    ex_rot_sel[8]: ex_rot = ex_rot_addr_low;
    ex_rot_sel[9]: ex_rot = ex_rot_data9;
    default:       ex_rot = 4'b0;
   endcase
 end
         

// one extra byte of left rotation for store_4th && low order VU half
// the inversion of ex_elem_num[3] below effectively adds 8:

/********************************************************************/

// *** All or part of this wen stuff could move to DF.

assign wen_l_raw = 
    ex_dma_rd_to_dm ? 					    // dma
       ~{{4{dma_wen[3]}}, {4{dma_wen[2]}}, {4{dma_wen[1]}}, {4{dma_wen[0]}}} : 
    (ex_su_byte_ls && su_ex_store)		      ? 16'hefff :  // su byte
    (ex_su_half_ls && su_ex_store)		      ? 16'hcfff :  // su short
    (vu_ex_st_dec[0])				      ? 16'h7fff :  // vu byte

    (vu_ex_st_dec[1])				      ? 16'h3fff :  // vu short
    ((!ex_su_byte_ls && !ex_su_half_ls && su_ex_store) || vu_ex_st_dec[2]) 
						      ? 16'h0fff :  // long
    (vu_ex_st_dec[3]||vu_ex_st_dec[6]||vu_ex_st_dec[7]) ? 16'h00ff :  // d,[u]p
    (vu_ex_st_dec[4] || vu_ex_st_dec[5] || vu_ex_st_dec[10] ||
				  vu_ex_st_dec[11])   ? 16'h0000 :  // q,r,t,w
    (vu_ex_st_dec[8])                                 ? 16'h5555 :  // half
    (vu_ex_st_dec[9]) 				      ? 16'h7777 :  // fourth
							16'hffff;   // nothing
assign q_st = vu_ex_st_dec[4] || vu_ex_st_dec[5];
assign ex_addr_low = dma_cycle ? {dma_address[3], 3'b0} : ex_base + ex_offset;
always @(ex_addr_low or wen_l_raw or q_st)
begin
   case (ex_addr_low[3:2]) //
    2'b00   : wen_l_1st = wen_l_raw;
    2'b01   : wen_l_1st = {({4{q_st}} | wen_l_raw[3:0]),wen_l_raw[15:4]};
    2'b10   : wen_l_1st = {({8{q_st}} | wen_l_raw[7:0]),wen_l_raw[15:8]};
    2'b11   : wen_l_1st = {({12{q_st}}| wen_l_raw[11:0]),wen_l_raw[15:12]};
    default : wen_l_1st = wen_l_raw;
   endcase
end

always @(ex_addr_low or wen_l_1st or q_st)
begin
   case (ex_addr_low[1:0]) //
    2'b00   : wen_l_2nd = wen_l_1st;
    2'b01   : wen_l_2nd = {({1{q_st}} | wen_l_1st[0]),wen_l_1st[15:1]};
    2'b10   : wen_l_2nd = {({2{q_st}} | wen_l_1st[1:0]),wen_l_1st[15:2]};
    2'b11   : wen_l_2nd = {({3{q_st}} | wen_l_1st[2:0]),wen_l_1st[15:3]};
    default : wen_l_2nd = wen_l_1st;
   endcase
end

assign wen_l_3rd = 
	vu_ex_st_dec[5] ? ~wen_l_2nd : 			// rest
	ex_su_byte_ls ? {wen_l_2nd[12:0], wen_l_2nd[15:13]} : 
	ex_su_half_ls ? {wen_l_2nd[13:0], wen_l_2nd[15:14]} : 
	wen_l_2nd;

asdff #(1, 0) ls_ed_suld_ff (su_df_load, su_ex_load, clk, 1'b1);
asdff #(1, 0) ls_ed_subyte_ff (df_su_byte_ls, ex_su_byte_ls, clk, 1'b1);
asdff #(1, 0) ls_ed_suhalf_ff (df_su_half_ls, ex_su_half_ls, clk, 1'b1);
asdff #(1, 0) ls_ed_mtc2_ff (df_mtc2, ex_mtc2, clk, 1'b1);
asdff #(1, 0) ls_ed_mfc2_ff (df_mfc2, ex_mfc2, clk, 1'b1);
asdff #(1, 0) ls_ed_cfc2_ff (df_cfc2, ex_cfc2, clk, 1'b1);
asdff #(1, 0) ls_vu_ed_ld_ff (vu_df_load, vu_ex_load, clk, 1'b1);
asdff #(11, 0) vu_ed_ld_dec_ff (vu_df_ld_dec, vu_ex_ld_dec[10:0], clk, 1'b1);
asdff #(1, 0) vu_ed_dmCS_ff (df_chip_sel_l_tmp, chip_sel_l, clk, reset_l);


wire [15:0] df_wen_l_tmp;
wire df_wen_valid_ex;
wire df_wen_valid;

assign df_wen_valid_ex = vu_ex_store || su_ex_store || ex_dma_rd_to_dm;
asdff #(16, 'hffff) vu_ed_dmWen_ff (df_wen_l_tmp, wen_l_3rd, clk, reset_l);
asdff #(1, 0) df_wen_valid_ff (df_wen_valid,df_wen_valid_ex, clk, reset_l);
assign df_wen_l = df_wen_l_tmp | {16{!df_wen_valid}};

asdff #(12, 0) vu_ed_dmAddr_low_ff (df_addr_low, addr_low, clk,1'b1);
asdff #(1, 0) vu_ed_dmAddr_high_ff (df_inc_addr_high,inc_addr_high, clk, 1'b1);

asdff #(4, 0) vu_ed_rot_ff (tmp_df_rot, ex_rot, clk, 1'b1);
asdff #(4, 0) vu_ed_elem_ff (df_elem, ex_elem_num, clk, 1'b1);
asdff #(1, 0) vu_ed_uns_ff (df_su_uns_ls, ex_su_uns_ls, clk, 1'b1);
asdff #(1, 0) dma_ed_dm_rd_ff (df_dma_dm_to_rd, ex_dma_dm_to_rd, clk,1'b1);

asdff #(1, 0) dma_ed_rd_dm_ff (debug_df_dma_rd_to_dm,ex_dma_rd_to_dm,clk,1'b1);

assign df_addr_high = df_inc_addr_high ? df_addr_low+8: df_addr_low; 
assign df_chip_sel_l = df_chip_sel_l_tmp;
assign df_rot = (df_dma_dm_to_rd) ? {df_addr_low[3], 3'b0} : 
   (vu_df_load && !vu_df_ld_dec[10]) ? (df_elem - df_addr_low[3:0]) :
	  (su_df_load && df_su_byte_ls) ? 3 - df_addr_low[3:0] :
	  (su_df_load && df_su_half_ls) ? 2 - df_addr_low[3:0] :
	   		   (su_df_load) ? - df_addr_low[3:0] :
		   (df_mfc2 || df_cfc2) ? 2 :    // rotate in sign extension
		           df_pass_thru ? 0 : // rot done in ex for pass_th
				          tmp_df_rot; 

// DMem to Datapaths

asdff #(1, 0) ls_dw_suld_ff (wb_su_load, su_df_load, clk, 1'b1); 
asdff #(1, 0) ls_dw_mfc2_ff (wb_mfc2, df_mfc2, clk, 1'b1);
asdff #(1, 0) ls_dw_cfc2_ff (wb_cfc2, df_cfc2, clk, 1'b1);
asdff #(1, 0) pass_thru_ff (wb_pass_thru, df_pass_thru, clk, 1'b1);
asdff #(4, 0) vu_dw_ld_dec_ff (vu_wb_ld_dec, vu_df_ld_dec[9:6], clk, reset_l);
asdff #(4, 0) vu_dw_addr_ff (wb_addr, df_addr_low[3:0], clk, 1'b1);
asdff #(4, 0) vu_dw_rot_ff (wb_rot, df_rot, clk, 1'b1);
asdff #(1, 0) vu_dw_uns_ff (wb_su_uns_ls, df_su_uns_ls, clk, 1'b1);
asdff #(1, 0) vu_ls_drive_ff (ls_drive_ls, df_ls_drive_ls_in_wb, clk, 1'b1);
asdff #(1, 0) dma_dw_dm_rd_ff (wb_dma_dm_to_rd, df_dma_dm_to_rd, clk, 1'b1);

// Byte write enables for VU RFile

// *** Optimize: Only need to mask bwe for quad and rest, but elem always = 0
// VU mask raw is masking out parts of the data that aren't obtained in 
// the original memory access.  It's used only for quad and rest.

assign vu_mask_raw = 16'hffff;
assign vu_mask_1st = (df_addr_low[3:2] == 2'b00) ? vu_mask_raw : 
		     (df_addr_low[3:2] == 2'b01) ? {vu_mask_raw[11:0], 4'h0} : 
		     (df_addr_low[3:2] == 2'b10) ? {vu_mask_raw[7:0], 8'h0} : 
		  /* (df_addr_low[3:2] == 2'b11)*/ {vu_mask_raw[3:0], 12'h0};
assign vu_mask_2nd = (df_addr_low[1:0] == 2'b00) ? {vu_mask_1st} : 
		     (df_addr_low[1:0] == 2'b01) ? {vu_mask_1st[14:0], 1'h0} : 
		     (df_addr_low[1:0] == 2'b10) ? {vu_mask_1st[13:0], 2'h0} : 
		  /* (df_addr_low[1:0] == 2'b11)*/ {vu_mask_1st[12:0], 3'h0};

assign vu_bwe_raw = (vu_df_ld_dec[0]) ? 16'h8000 :	// byte
	 (df_mtc2 || vu_df_ld_dec[1]) ? 16'hc000 : 	// short
		    (vu_df_ld_dec[2]) ? 16'hf000 :	// long
 (vu_df_ld_dec[3] || vu_df_ld_dec[9]) ? 16'hff00 :	// doub, fourth
		    (vu_df_ld_dec[4]) ? vu_mask_2nd : 	// quad
		    (vu_df_ld_dec[5]) ? ~vu_mask_2nd :	// rest
	     	    /* (no vu load) */ 16'h0000 ;

assign vu_bwe_1st =
    (df_elem[3:2] == 2'b00) ? vu_bwe_raw :       
    (df_elem[3:2] == 2'b01) ? {4'b0, vu_bwe_raw[15:4]} : 

    (df_elem[3:2] == 2'b10) ? {8'b0, vu_bwe_raw[15:8]} :  
 /* (df_elem[3:2] == 2'b11)*/ {12'b0, vu_bwe_raw[15:12]};  

assign vu_bwe_2nd =
    (df_elem[1:0] == 2'b00) ? vu_bwe_1st[15:0] : 
    (df_elem[1:0] == 2'b01) ? {1'b0, vu_bwe_1st[15:1]} :
    (df_elem[1:0] == 2'b10) ? {2'b0, vu_bwe_1st[15:2]} :
 /* (df_elem[1:0] == 2'b11)*/ {3'b0, vu_bwe_1st[15:3]};


wire [15:0] vu_bwe_tmp;
wire vu_bwe_valid;
assign vu_bwe_tmp = (vu_df_ld_dec[6] || vu_df_ld_dec[7] || vu_df_ld_dec[8] || 
    vu_df_ld_dec[10]) ? 16'hffff :  vu_bwe_2nd;
assign vu_bwe_valid = df_mtc2 || vu_df_load; 
assign vu_bwe = vu_bwe_tmp & {16{vu_bwe_valid}};

endmodule