lsdp.v
12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
// Module instances modified by /home/rws/workarea/rf/sw/bbplayer/tools/necprimfix
//
// 16 instances of mx41d2 changed to j_mx41.
// 6 instances of ni01d5 changed to j_ni01.
//
/**************************************************************************
* *
* Copyright (C) 1994, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
*************************************************************************/
// $Id: lsdp.v,v 1.4 2002/11/13 02:11:41 rws Exp $
// lsdp.v muxes and rotators for RSP SU and VU loads and stores
`timescale 1ns / 10ps
module lsdp (clk, reset_l, halt, pc, ex_dma_wen_swap, ex_dma_wen_noswap,
vu_ex_st_dec, ex_su_byte_ls, ex_su_half_ls,
ex_mfc0, cp0_write, cp0_data, cp0_data_out,
mem_write_data, ex_rot, wb_rot, wb_pass_thru,
wb_mfc2, wb_cfc2, wb_su_uns_ls, wb_su_load,
dmem_dataout, vu_wb_ld_dec, ls_drive_ls, wb_dma_dm_to_rd,
ls_data, ls_ls_data, df_datain, dmem_rd_data);
input clk;
input reset_l;
input halt;
// IF Stage Inputs
input [11:2] pc;
// EX Stage Inputs
input [11:0] vu_ex_st_dec;
input ex_su_byte_ls;
input ex_su_half_ls;
input ex_mfc0;
input cp0_write;
input ex_dma_wen_swap;
input ex_dma_wen_noswap;
input [63:0] mem_write_data;
input [3:0] ex_rot;
input [3:0] wb_rot;
// WB Stage Inputs
input wb_pass_thru;
input wb_mfc2;
input wb_cfc2;
input wb_su_uns_ls;
input wb_su_load;
input [127:0] dmem_dataout;
input [9:6] vu_wb_ld_dec;
input ls_drive_ls;
input wb_dma_dm_to_rd;
input [127:0] ls_data;
output [127:0] ls_ls_data;
input [31:0] cp0_data;
output [31:0] cp0_data_out;
output [127:0] df_datain; // final store data to dmem
output [63:0] dmem_rd_data;
wire df_su_byte_ls;
wire df_su_half_ls;
wire wb_su_byte_ls;
wire wb_su_half_ls;
wire [127:0] dma_data_to_dmem;
wire [127:0] dp_to_dmem_0th; // compaction/expansion output
wire [127:0] dp_to_dmem_1st; // word rotation
wire [127:0] dp_to_dmem_2nd; // byte rotation
wire [127:0] dp_to_dmem_3rd; // mux in dma data
wire [127:0] wb_datain; // pass-through data (MT, MF, CT, CF)
wire [127:0] wb_datain_sxt;
wire [127:0] dmem_to_dp_1st; // word-rot'd dmem data
wire [127:0] dmem_to_dp_2nd; // byte rotation output
wire [127:0] load_data; // load expansion output
wire [63:0] sec_rd_data;
wire [63:0] secondary_write_data;
wire [31:0] cp0_source;
wire cp0_data_enable;
assign cp0_source = halt ? {22'b0, pc} : ls_data[127:96];
assign cp0_data_enable = cp0_write || halt;
//cp0_driver cp0_driver_ls(cp0_source, cp0_data_enable, cp0_data_out);
wire [31:0] cp0_data_out = cp0_data_enable ? cp0_source : 32'b0;
// Datapaths to DMem
wire [127:0] dp_to_dmem_0th_reg;
wire [1:0] dp_to_dmem_0th_sl;
assign dp_to_dmem_0th = dp_to_dmem_0th_reg;
// mutual exclusion assumed
// assign dp_to_dmem_0th_sl[0] = ex_mfc0;
assign dp_to_dmem_0th_sl[0] = vu_ex_st_dec[6] || vu_ex_st_dec[7] || vu_ex_st_dec[9] ;
assign dp_to_dmem_0th_sl[1] = vu_ex_st_dec[8] || vu_ex_st_dec[9] ;
lsdp_mux4x1_128 ls_dp_to_dmem_0th(.z(dp_to_dmem_0th_reg),
.s(dp_to_dmem_0th_sl),
.i0(ls_data),
.i1( {ls_data[127:120], ls_data[111:104], ls_data[95:88],
ls_data[79:72], ls_data[63:56], ls_data[47:40],
ls_data[31:24], ls_data[15:8],
ls_data[126:119], ls_data[110:103], ls_data[94:87],
ls_data[78:71], ls_data[62:55], ls_data[46:39],
ls_data[30:23], ls_data[14:7]
}
),
.i2( {ls_data[126:0], 1'b0}), // half
.i3( {ls_data[126:119], ls_data[30:23], 16'b0,
ls_data[110:103], ls_data[14:7], 16'b0,
ls_data[94:87], ls_data[62:55], 16'b0,
ls_data[78:71], ls_data[46:39], 16'b0
}
) //fourth
);
lsdp_mux4x1_128 ls_dp_to_dmem_1st(.z(dp_to_dmem_1st), // word rotation
.s(ex_rot[3:2]),
.i0(dp_to_dmem_0th),
.i1({dp_to_dmem_0th[31:0], dp_to_dmem_0th[127:32]}),
.i2({dp_to_dmem_0th[63:0], dp_to_dmem_0th[127:64]}),
.i3({dp_to_dmem_0th[95:0], dp_to_dmem_0th[127:96]})
);
lsdp_mux4x1_128 ls_dp_to_dmem_2nd(.z(dp_to_dmem_2nd), // byte rotation
.s(ex_rot[1:0]),
.i0(dp_to_dmem_1st),
.i1({dp_to_dmem_1st[7:0],dp_to_dmem_1st[127:8]}),
.i2({dp_to_dmem_1st[15:0],dp_to_dmem_1st[127:16]}),
.i3({dp_to_dmem_1st[23:0],dp_to_dmem_1st[127:24]})
);
// Sneak path for dma to dmem to avoid collision with potential load in WB:
// *** Probably no longer an issue with duplicated rotators.
wire [1:0] dp_to_dmem_3rd_sl;
assign dp_to_dmem_3rd_sl[0] = ex_dma_wen_noswap || ex_mfc0;
assign dp_to_dmem_3rd_sl[1] = ex_dma_wen_swap || ex_mfc0;
lsdp_mux4x1_128 ls_dp_to_dmem_3rd(.z(dp_to_dmem_3rd),
.s(dp_to_dmem_3rd_sl[1:0]),
.i0(dp_to_dmem_2nd),
.i1(dma_data_to_dmem),
.i2({dma_data_to_dmem [63:0],dma_data_to_dmem [127:64]}),
.i3({cp0_data,96'h0})
);
asdff #(1, 0) vu_ed_byte_ff (df_su_byte_ls, ex_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_ed_half_ff (df_su_half_ls, ex_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_ed_datain_ff (df_datain, dp_to_dmem_3rd, clk, 1'b1);
asdff #(1, 0) vu_dw_byte_ff (wb_su_byte_ls, df_su_byte_ls, clk, 1'b1);
asdff #(1, 0) vu_dw_half_ff (wb_su_half_ls, df_su_half_ls, clk, 1'b1);
asdff #(128, 0) vu_dw_dp_to_dm_ff (wb_datain, df_datain, clk, 1'b1);
/* ******************************************************************** */
// DMem to Datapaths
lsdp_mux4x1_128 ls_dmem_to_dp_1st(.z(dmem_to_dp_1st),
.s(wb_rot[3:2]),
.i0(dmem_dataout),
.i1({dmem_dataout[31:0], dmem_dataout[127:32]}),
.i2({dmem_dataout[63:0], dmem_dataout[127:64]}),
.i3({dmem_dataout[95:0], dmem_dataout[127:96]})
);
lsdp_mux4x1_128 ls_dmem_to_dp_2nd(.z(dmem_to_dp_2nd),
.s(wb_rot[1:0]),
.i0(dmem_to_dp_1st),
.i1({dmem_to_dp_1st[7:0],dmem_to_dp_1st[127:8]}),
.i2({dmem_to_dp_1st[15:0],dmem_to_dp_1st[127:16]}),
.i3({dmem_to_dp_1st[23:0],dmem_to_dp_1st[127:24]})
);
// mutual exclusion assumed
wire wb_sxt_cp2; // *** move all this to df then just latch for wb.
wire wb_sxt_half; // *** move all this to df then just latch for wb.
wire wb_sxt_byte; // *** move all this to df then just latch for wb.
wire [127:112] wb_datain_high;
assign wb_sxt_cp2 = wb_cfc2 || wb_mfc2;
assign wb_sxt_half = wb_su_load && wb_su_half_ls;
assign wb_sxt_byte = wb_su_load && wb_su_byte_ls;
assign wb_datain_high = wb_sxt_cp2 ? {16{wb_datain[111]}} : wb_datain[127:112];
assign wb_datain_sxt = {wb_datain_high, wb_datain[111:0]};
reg [127:0] load_data_reg;
wire [7:0] load_data_sl;
assign load_data = load_data_reg;
// mutual exclusion assumed
assign load_data_sl[0] = vu_wb_ld_dec[6];
assign load_data_sl[1] = vu_wb_ld_dec[7];
assign load_data_sl[2] = vu_wb_ld_dec[8];
assign load_data_sl[3] = vu_wb_ld_dec[9];
assign load_data_sl[4] = (vu_wb_ld_dec[9:6]==4'h0) && !wb_pass_thru &&
!wb_sxt_half && !wb_sxt_byte;
assign load_data_sl[5] = wb_pass_thru;
assign load_data_sl[6] = wb_sxt_half;
assign load_data_sl[7] = wb_sxt_byte;
always @(load_data_sl or dmem_to_dp_2nd or wb_datain_sxt or wb_su_uns_ls)
begin
load_data_reg = 128'h0;
case (1'b1) //
load_data_sl[0]: load_data_reg = // pack
{dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96], 8'b0,
dmem_to_dp_2nd[95:88], 8'b0, dmem_to_dp_2nd[87:80], 8'b0,
dmem_to_dp_2nd[79:72], 8'b0, dmem_to_dp_2nd[71:64], 8'b0};
load_data_sl[1]: load_data_reg = {1'b0, // unsigned pack
dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[119:112], 8'b0,
dmem_to_dp_2nd[111:104], 8'b0, dmem_to_dp_2nd[103:96], 8'b0,
dmem_to_dp_2nd[95:88], 8'b0, dmem_to_dp_2nd[87:80], 8'b0,
dmem_to_dp_2nd[79:72], 8'b0, dmem_to_dp_2nd[71:64], 7'b0};
load_data_sl[2]: load_data_reg = {1'b0, // half
dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[111:104], 8'b0,
dmem_to_dp_2nd[95:88], 8'b0, dmem_to_dp_2nd[79:72], 8'b0,
dmem_to_dp_2nd[63:56], 8'b0, dmem_to_dp_2nd[47:40], 8'b0,
dmem_to_dp_2nd[31:24], 8'b0, dmem_to_dp_2nd[15:8], 7'b0};
load_data_sl[3]: load_data_reg = {1'b0, // fourth
dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 8'b0,
dmem_to_dp_2nd[63:56], 8'b0, dmem_to_dp_2nd[31:24], 8'b0,
dmem_to_dp_2nd[63:56], 8'b0, dmem_to_dp_2nd[31:24], 8'b0,
dmem_to_dp_2nd[127:120], 8'b0, dmem_to_dp_2nd[95:88], 7'b0};
load_data_sl[4]: load_data_reg = dmem_to_dp_2nd;
load_data_sl[5]: load_data_reg = wb_datain_sxt;
load_data_sl[6]: load_data_reg = {{16{(dmem_to_dp_2nd[111] && !wb_su_uns_ls)}}, dmem_to_dp_2nd[111:0]};
load_data_sl[7]: load_data_reg = {{24{(dmem_to_dp_2nd[103] && !wb_su_uns_ls)}}, dmem_to_dp_2nd[103:0]};
endcase
end
//ls_data_driver ls_data_driver (
// .in(load_data),
// .out(ls_data_out),
// .enable(ls_drive_ls));
wire [127:0] ls_ls_data = ls_drive_ls ? load_data : 128'b0;
// DMA DMem to RDRAM
asdff #(64,0) dma_sec_rd_ff (sec_rd_data, dmem_to_dp_1st[63:0], clk, 1'b1);
assign dmem_rd_data =
wb_dma_dm_to_rd ? dmem_to_dp_1st[127:64] : sec_rd_data;
// DMA RDRAM to DMem
asdff #(64,0) dma_sec_wr_ff (secondary_write_data, mem_write_data, clk, 1'b1);
assign dma_data_to_dmem = {secondary_write_data, mem_write_data};
endmodule
module lsdp_mux4x1_16(z, i0, i1, i2, i3, s);
input [15:0] i0, i1, i2, i3;
input [1:0] s;
output [15:0] z;
wire s0_x,s0_y,s1_x,s1_y;
j_ni01 u_s0x(.z(s0_x), .i(s[0]));
j_ni01 u_s0y(.z(s0_y), .i(s[0]));
j_ni01 u_s1x(.z(s1_x), .i(s[1]));
j_ni01 u_s1y(.z(s1_y), .i(s[1]));
j_mx41 u_00(.z(z[ 0]),.i0(i0[ 0]),.i1(i1[ 0]),.i2(i2[ 0]),.i3(i3[ 0]),.s0(s0_x),.s1(s1_x));
j_mx41 u_01(.z(z[ 1]),.i0(i0[ 1]),.i1(i1[ 1]),.i2(i2[ 1]),.i3(i3[ 1]),.s0(s0_x),.s1(s1_x));
j_mx41 u_02(.z(z[ 2]),.i0(i0[ 2]),.i1(i1[ 2]),.i2(i2[ 2]),.i3(i3[ 2]),.s0(s0_x),.s1(s1_x));
j_mx41 u_03(.z(z[ 3]),.i0(i0[ 3]),.i1(i1[ 3]),.i2(i2[ 3]),.i3(i3[ 3]),.s0(s0_x),.s1(s1_x));
j_mx41 u_04(.z(z[ 4]),.i0(i0[ 4]),.i1(i1[ 4]),.i2(i2[ 4]),.i3(i3[ 4]),.s0(s0_x),.s1(s1_x));
j_mx41 u_05(.z(z[ 5]),.i0(i0[ 5]),.i1(i1[ 5]),.i2(i2[ 5]),.i3(i3[ 5]),.s0(s0_x),.s1(s1_x));
j_mx41 u_06(.z(z[ 6]),.i0(i0[ 6]),.i1(i1[ 6]),.i2(i2[ 6]),.i3(i3[ 6]),.s0(s0_x),.s1(s1_x));
j_mx41 u_07(.z(z[ 7]),.i0(i0[ 7]),.i1(i1[ 7]),.i2(i2[ 7]),.i3(i3[ 7]),.s0(s0_x),.s1(s1_x));
j_mx41 u_08(.z(z[ 8]),.i0(i0[ 8]),.i1(i1[ 8]),.i2(i2[ 8]),.i3(i3[ 8]),.s0(s0_y),.s1(s1_y));
j_mx41 u_09(.z(z[ 9]),.i0(i0[ 9]),.i1(i1[ 9]),.i2(i2[ 9]),.i3(i3[ 9]),.s0(s0_y),.s1(s1_y));
j_mx41 u_10(.z(z[10]),.i0(i0[10]),.i1(i1[10]),.i2(i2[10]),.i3(i3[10]),.s0(s0_y),.s1(s1_y));
j_mx41 u_11(.z(z[11]),.i0(i0[11]),.i1(i1[11]),.i2(i2[11]),.i3(i3[11]),.s0(s0_y),.s1(s1_y));
j_mx41 u_12(.z(z[12]),.i0(i0[12]),.i1(i1[12]),.i2(i2[12]),.i3(i3[12]),.s0(s0_y),.s1(s1_y));
j_mx41 u_13(.z(z[13]),.i0(i0[13]),.i1(i1[13]),.i2(i2[13]),.i3(i3[13]),.s0(s0_y),.s1(s1_y));
j_mx41 u_14(.z(z[14]),.i0(i0[14]),.i1(i1[14]),.i2(i2[14]),.i3(i3[14]),.s0(s0_y),.s1(s1_y));
j_mx41 u_15(.z(z[15]),.i0(i0[15]),.i1(i1[15]),.i2(i2[15]),.i3(i3[15]),.s0(s0_y),.s1(s1_y));
endmodule
module lsdp_mux4x1_128(z, i0, i1, i2, i3, s);
input [127:0] i0, i1, i2, i3;
input [1:0] s;
output [127:0] z;
wire [1:0] sx;
j_ni01 u_sx0(.z(sx[0]), .i(s[0]));
j_ni01 u_sx1(.z(sx[1]), .i(s[1]));
lsdp_mux4x1_16 u_1(.z(z[15:0]), .i0(i0[15:0]), .i1(i1[15:0]), .i2(i2[15:0]), .i3(i3[15:0]), .s(sx));
lsdp_mux4x1_16 u_2(.z(z[31:16]),.i0(i0[31:16]),.i1(i1[31:16]),.i2(i2[31:16]),.i3(i3[31:16]),.s(sx));
lsdp_mux4x1_16 u_3(.z(z[47:32]),.i0(i0[47:32]),.i1(i1[47:32]),.i2(i2[47:32]),.i3(i3[47:32]),.s(sx));
lsdp_mux4x1_16 u_4(.z(z[63:48]),.i0(i0[63:48]),.i1(i1[63:48]),.i2(i2[63:48]),.i3(i3[63:48]),.s(sx));
lsdp_mux4x1_16 u_5(.z(z[79:64]),.i0(i0[79:64]),.i1(i1[79:64]),.i2(i2[79:64]),.i3(i3[79:64]),.s(sx));
lsdp_mux4x1_16 u_6(.z(z[95:80]),.i0(i0[95:80]),.i1(i1[95:80]),.i2(i2[95:80]),.i3(i3[95:80]),.s(sx));
lsdp_mux4x1_16 u_7(.z(z[111:96]),.i0(i0[111:96]),.i1(i1[111:96]),.i2(i2[111:96]),.i3(i3[111:96]),.s(sx));
lsdp_mux4x1_16 u_8(.z(z[127:112]),.i0(i0[127:112]),.i1(i1[127:112]),.i2(i2[127:112]),.i3(i3[127:112]),.s(sx));
endmodule