lsctl.v
13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
/**************************************************************************
* *
* Copyright (C) 1994, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
*************************************************************************/
// $Id: lsctl.v,v 1.1 2002/03/28 00:26:13 berndt Exp $
// lsctl.v control section for the RSP load/store unit,
// generating addresses and write enables.
`timescale 1ns / 10ps
module lsctl (clk, reset_l, iddq_test, rd_base, ls_drive_rd_base, ls_base,
rd_offset, rd_elem_num,
address, df_ls_drive_ls_in_wb, df_pass_thru,
su_ex_store, su_ex_load, vu_ex_store, vu_ex_load,
ex_mtc2, ex_mfc2, ex_cfc2,
vu_rd_ld_dec_k, vu_rd_st_dec_k, vu_ex_st_dec, vu_wb_ld_dec,
vu_bwe, chip_sel,
ex_dma_wen_noswap, ex_dma_wen_swap, df_wen_l, df_chip_sel_l,
df_addr_low, df_addr_high, debug_df_dma_rd_to_dm,
ex_su_byte_ls, ex_su_half_ls, ex_su_uns_ls,
dma_address, dma_wen, ex_dma_rd_to_dm, ex_dma_dm_to_rd,
ex_rot, wb_rot, wb_dma_dm_to_rd,
wb_su_uns_ls, wb_su_load, wb_pass_thru, wb_mfc2, wb_cfc2,
ls_drive_ls);
input clk;
input reset_l;
input iddq_test;
// RD Stage Inputs
input [3:0] rd_base;
input ls_drive_rd_base;
input [3:0] ls_base;
input [3:0] rd_offset;
input [3:0] rd_elem_num;
input [11:0] vu_rd_ld_dec_k;
input [11:0] vu_rd_st_dec_k;
// EX Stage Inputs
input [11:0] address; // byte address
input su_ex_store;
input su_ex_load;
input ex_su_byte_ls;
input ex_su_half_ls;
input ex_su_uns_ls;
input vu_ex_store;
input vu_ex_load;
input ex_mtc2;
input ex_mfc2;
input ex_cfc2;
input chip_sel;
input [11:3] dma_address;
input [3:0] dma_wen; // active high
input ex_dma_rd_to_dm;
input ex_dma_dm_to_rd;
// DF Stage Inputs
input df_ls_drive_ls_in_wb;
input df_pass_thru;
// EX Stage Outputs
output [3:0] ex_rot;
output [3:0] wb_rot;
output [11:0] vu_ex_st_dec;
output ex_dma_wen_noswap; // dma write
output ex_dma_wen_swap; // dma write, swap 8-byte words
// DF Stage Outputs
output df_chip_sel_l; // dmem chip select, active low
output [15:0] df_wen_l; // dmem wr enable, active low
output [11:0] df_addr_low;
output [11:0] df_addr_high;
output debug_df_dma_rd_to_dm; // for debug only
// WB Stage Outputs
output wb_dma_dm_to_rd;
output wb_su_uns_ls;
output wb_su_load;
output wb_pass_thru;
output wb_mfc2;
output wb_cfc2;
output [9:6] vu_wb_ld_dec;
output [15:0] vu_bwe;
output ls_drive_ls;
wire [3:0] ls_rd_base;
wire [3:0] ex_base;
wire [3:0] ex_offset;
wire [3:0] ex_elem_num;
wire [3:0] ex_addr_low;
wire su_df_load;
wire df_mtc2;
wire df_mfc2;
wire df_cfc2;
wire vu_df_load;
wire [10:0] vu_ex_ld_dec;
wire q_st;
wire [10:0] vu_df_ld_dec;
wire [15:0] wen_l_raw; // dp to dmem write enables
reg [15:0] wen_l_1st;
reg [15:0] wen_l_2nd;
wire [15:0] wen_l_3rd;
reg [3:0] ex_rot;
wire [11:0] addr_low;
wire inc_addr_high;
wire chip_sel_l;
wire df_chip_sel_tmp;
wire [3:0] tmp_df_rot;
wire [3:0] df_rot;
wire df_su_uns_ls;
wire [15:0] vu_bwe_raw;
wire [15:0] vu_bwe_1st;
wire [15:0] vu_bwe_2nd;
wire [3:0] wb_addr;
wire [3:0] df_elem;
wire [15:0] vu_mask_raw;
wire [15:0] vu_mask_1st;
wire [15:0] vu_mask_2nd;
// DMA interface signals
wire dma_cycle; // EX stage
wire df_dma_dm_to_rd;
assign ls_rd_base = ls_drive_rd_base ? ls_base : rd_base;
asdff #(4, 0) ls_re_elem_ff (ex_elem_num, rd_elem_num, clk, 1'b1);
asdff #(11, 0) ls_re_ld_dec_ff (vu_ex_ld_dec, vu_rd_ld_dec_k[10:0], clk, 1'b1);
asdff #(12, 0) ls_re_st_dec_ff (vu_ex_st_dec, vu_rd_st_dec_k, clk, 1'b1);
asdff #(4, 0) ls_re_base_ff (ex_base, ls_rd_base, clk, 1'b1);
asdff #(4, 0) ls_re_offset_ff (ex_offset, rd_offset, clk, 1'b1);
assign dma_cycle = ex_dma_dm_to_rd || ex_dma_rd_to_dm;
assign ex_dma_wen_noswap = (dma_wen != 4'b0) && !dma_address[3];
assign ex_dma_wen_swap = (dma_wen != 4'b0) && dma_address[3];
assign addr_low = dma_cycle ? {dma_address, 3'b0} : address;
assign inc_addr_high = !(vu_ex_ld_dec[5] || vu_ex_st_dec[5] || !addr_low[3]);
assign chip_sel_l = !(chip_sel || dma_cycle);
// Rotate amount is computed here for the cases that use it in EX. More
// terms are added in DF for WB use.
wire [9:0] ex_rot_sel;
wire [3:0] ex_rot_addr_low;
wire [3:0] ex_rot_data0;
wire [3:0] ex_rot_data1;
wire [3:0] ex_rot_data2;
wire [3:0] ex_rot_data3;
wire [3:0] ex_rot_data4;
wire [3:0] ex_rot_data5;
wire [3:0] ex_rot_data6;
wire [3:0] ex_rot_data9;
assign ex_rot_sel[0] = vu_ex_st_dec[7];
assign ex_rot_sel[1] = vu_ex_st_dec[9] && ex_elem_num[3];
assign ex_rot_sel[2] = vu_ex_store && !ex_rot_sel[0] && !ex_rot_sel[1] && !ex_rot_sel[7];
assign ex_rot_sel[3] = su_ex_store && ex_su_byte_ls;
assign ex_rot_sel[4] = su_ex_store && ex_su_half_ls;
assign ex_rot_sel[5] = ex_mtc2;
assign ex_rot_sel[6] = vu_ex_ld_dec[10];
assign ex_rot_sel[7] = vu_ex_st_dec[10];
assign ex_rot_sel[8] = su_ex_store && !ex_su_byte_ls && !ex_su_half_ls;
assign ex_rot_sel[9] = ex_mfc2;
ls_ex_rot_values u_ex_rot_values(.rd_addr_30(ls_rd_base),
.rd_inst_data_30(rd_offset),
.rd_elem_num(rd_elem_num),
.clk(clk),
//
.ex_rot_addr_low(ex_rot_addr_low),
.ex_rot_data0(ex_rot_data0),
.ex_rot_data1(ex_rot_data1),
.ex_rot_data2(ex_rot_data2),
.ex_rot_data3(ex_rot_data3),
.ex_rot_data4(ex_rot_data4),
.ex_rot_data5(ex_rot_data5),
.ex_rot_data6(ex_rot_data6),
.ex_rot_data9(ex_rot_data9)
);
always @(ex_rot_sel or ex_rot_addr_low or ex_rot_data0 or
ex_rot_data2 or ex_rot_data1 or ex_rot_data3 or
ex_rot_data4 or ex_rot_data5 or ex_rot_data6 or ex_rot_data9)
begin
ex_rot = 4'b0;
case (1'b1) //synopsys parallel_case full_case
ex_rot_sel[0]: ex_rot = ex_rot_data0;
ex_rot_sel[1]: ex_rot = ex_rot_data1;
ex_rot_sel[2]: ex_rot = ex_rot_data2;
ex_rot_sel[3]: ex_rot = ex_rot_data3;
ex_rot_sel[4]: ex_rot = ex_rot_data4;
ex_rot_sel[5]: ex_rot = ex_rot_data5;
ex_rot_sel[6]: ex_rot = ex_rot_data6;
ex_rot_sel[7]: ex_rot = ex_rot_addr_low;
ex_rot_sel[8]: ex_rot = ex_rot_addr_low;
ex_rot_sel[9]: ex_rot = ex_rot_data9;
default: ex_rot = 4'b0;
endcase
end
// one extra byte of left rotation for store_4th && low order VU half
// the inversion of ex_elem_num[3] below effectively adds 8:
/********************************************************************/
// *** All or part of this wen stuff could move to DF.
assign wen_l_raw =
ex_dma_rd_to_dm ? // dma
~{{4{dma_wen[3]}}, {4{dma_wen[2]}}, {4{dma_wen[1]}}, {4{dma_wen[0]}}} :
(ex_su_byte_ls && su_ex_store) ? 16'hefff : // su byte
(ex_su_half_ls && su_ex_store) ? 16'hcfff : // su short
(vu_ex_st_dec[0]) ? 16'h7fff : // vu byte
(vu_ex_st_dec[1]) ? 16'h3fff : // vu short
((!ex_su_byte_ls && !ex_su_half_ls && su_ex_store) || vu_ex_st_dec[2])
? 16'h0fff : // long
(vu_ex_st_dec[3]||vu_ex_st_dec[6]||vu_ex_st_dec[7]) ? 16'h00ff : // d,[u]p
(vu_ex_st_dec[4] || vu_ex_st_dec[5] || vu_ex_st_dec[10] ||
vu_ex_st_dec[11]) ? 16'h0000 : // q,r,t,w
(vu_ex_st_dec[8]) ? 16'h5555 : // half
(vu_ex_st_dec[9]) ? 16'h7777 : // fourth
16'hffff; // nothing
assign q_st = vu_ex_st_dec[4] || vu_ex_st_dec[5];
assign ex_addr_low = dma_cycle ? {dma_address[3], 3'b0} : ex_base + ex_offset;
always @(ex_addr_low or wen_l_raw or q_st)
begin
case (ex_addr_low[3:2]) //synopsys parallel_case full_case
2'b00 : wen_l_1st = wen_l_raw;
2'b01 : wen_l_1st = {({4{q_st}} | wen_l_raw[3:0]),wen_l_raw[15:4]};
2'b10 : wen_l_1st = {({8{q_st}} | wen_l_raw[7:0]),wen_l_raw[15:8]};
2'b11 : wen_l_1st = {({12{q_st}}| wen_l_raw[11:0]),wen_l_raw[15:12]};
default : wen_l_1st = wen_l_raw;
endcase
end
always @(ex_addr_low or wen_l_1st or q_st)
begin
case (ex_addr_low[1:0]) //synopsys parallel_case full_case
2'b00 : wen_l_2nd = wen_l_1st;
2'b01 : wen_l_2nd = {({1{q_st}} | wen_l_1st[0]),wen_l_1st[15:1]};
2'b10 : wen_l_2nd = {({2{q_st}} | wen_l_1st[1:0]),wen_l_1st[15:2]};
2'b11 : wen_l_2nd = {({3{q_st}} | wen_l_1st[2:0]),wen_l_1st[15:3]};
default : wen_l_2nd = wen_l_1st;
endcase
end
assign wen_l_3rd =
vu_ex_st_dec[5] ? ~wen_l_2nd : // rest
ex_su_byte_ls ? {wen_l_2nd[12:0], wen_l_2nd[15:13]} :
ex_su_half_ls ? {wen_l_2nd[13:0], wen_l_2nd[15:14]} :
wen_l_2nd;
asdff #(1, 0) ls_ed_suld_ff (su_df_load, su_ex_load, clk, 1'b1);
asdff #(1, 0) ls_ed_subyte_ff (df_su_byte_ls, ex_su_byte_ls, clk, 1'b1);
asdff #(1, 0) ls_ed_suhalf_ff (df_su_half_ls, ex_su_half_ls, clk, 1'b1);
asdff #(1, 0) ls_ed_mtc2_ff (df_mtc2, ex_mtc2, clk, 1'b1);
asdff #(1, 0) ls_ed_mfc2_ff (df_mfc2, ex_mfc2, clk, 1'b1);
asdff #(1, 0) ls_ed_cfc2_ff (df_cfc2, ex_cfc2, clk, 1'b1);
asdff #(1, 0) ls_vu_ed_ld_ff (vu_df_load, vu_ex_load, clk, 1'b1);
asdff #(11, 0) vu_ed_ld_dec_ff (vu_df_ld_dec, vu_ex_ld_dec[10:0], clk, 1'b1);
asdff #(1, 0) vu_ed_dmCS_ff (df_chip_sel_l_tmp, chip_sel_l, clk, reset_l);
wire [15:0] df_wen_l_tmp;
wire df_wen_valid_ex;
wire df_wen_valid;
assign df_wen_valid_ex = vu_ex_store || su_ex_store || ex_dma_rd_to_dm;
asdff #(16, 'hffff) vu_ed_dmWen_ff (df_wen_l_tmp, wen_l_3rd, clk, reset_l);
asdff #(1, 0) df_wen_valid_ff (df_wen_valid,df_wen_valid_ex, clk, reset_l);
assign df_wen_l = df_wen_l_tmp | {16{!df_wen_valid}};
asdff #(12, 0) vu_ed_dmAddr_low_ff (df_addr_low, addr_low, clk,1'b1);
asdff #(1, 0) vu_ed_dmAddr_high_ff (df_inc_addr_high,inc_addr_high, clk, 1'b1);
asdff #(4, 0) vu_ed_rot_ff (tmp_df_rot, ex_rot, clk, 1'b1);
asdff #(4, 0) vu_ed_elem_ff (df_elem, ex_elem_num, clk, 1'b1);
asdff #(1, 0) vu_ed_uns_ff (df_su_uns_ls, ex_su_uns_ls, clk, 1'b1);
asdff #(1, 0) dma_ed_dm_rd_ff (df_dma_dm_to_rd, ex_dma_dm_to_rd, clk,1'b1);
asdff #(1, 0) dma_ed_rd_dm_ff (debug_df_dma_rd_to_dm,ex_dma_rd_to_dm,clk,1'b1);
assign df_addr_high = df_inc_addr_high ? df_addr_low+8: df_addr_low;
assign df_chip_sel_l = df_chip_sel_l_tmp || iddq_test;
assign df_rot = (df_dma_dm_to_rd) ? {df_addr_low[3], 3'b0} :
(vu_df_load && !vu_df_ld_dec[10]) ? (df_elem - df_addr_low[3:0]) :
(su_df_load && df_su_byte_ls) ? 3 - df_addr_low[3:0] :
(su_df_load && df_su_half_ls) ? 2 - df_addr_low[3:0] :
(su_df_load) ? - df_addr_low[3:0] :
(df_mfc2 || df_cfc2) ? 2 : // rotate in sign extension
df_pass_thru ? 0 : // rot done in ex for pass_th
tmp_df_rot;
// DMem to Datapaths
asdff #(1, 0) ls_dw_suld_ff (wb_su_load, su_df_load, clk, 1'b1);
asdff #(1, 0) ls_dw_mfc2_ff (wb_mfc2, df_mfc2, clk, 1'b1);
asdff #(1, 0) ls_dw_cfc2_ff (wb_cfc2, df_cfc2, clk, 1'b1);
asdff #(1, 0) pass_thru_ff (wb_pass_thru, df_pass_thru, clk, 1'b1);
asdff #(4, 0) vu_dw_ld_dec_ff (vu_wb_ld_dec, vu_df_ld_dec[9:6], clk, reset_l);
asdff #(4, 0) vu_dw_addr_ff (wb_addr, df_addr_low[3:0], clk, 1'b1);
asdff #(4, 0) vu_dw_rot_ff (wb_rot, df_rot, clk, 1'b1);
asdff #(1, 0) vu_dw_uns_ff (wb_su_uns_ls, df_su_uns_ls, clk, 1'b1);
asdff #(1, 0) vu_ls_drive_ff (ls_drive_ls, df_ls_drive_ls_in_wb, clk, 1'b1);
asdff #(1, 0) dma_dw_dm_rd_ff (wb_dma_dm_to_rd, df_dma_dm_to_rd, clk, 1'b1);
// Byte write enables for VU RFile
// *** Optimize: Only need to mask bwe for quad and rest, but elem always = 0
// VU mask raw is masking out parts of the data that aren't obtained in
// the original memory access. It's used only for quad and rest.
assign vu_mask_raw = 16'hffff;
assign vu_mask_1st = (df_addr_low[3:2] == 2'b00) ? vu_mask_raw :
(df_addr_low[3:2] == 2'b01) ? {vu_mask_raw[11:0], 4'h0} :
(df_addr_low[3:2] == 2'b10) ? {vu_mask_raw[7:0], 8'h0} :
/* (df_addr_low[3:2] == 2'b11)*/ {vu_mask_raw[3:0], 12'h0};
assign vu_mask_2nd = (df_addr_low[1:0] == 2'b00) ? {vu_mask_1st} :
(df_addr_low[1:0] == 2'b01) ? {vu_mask_1st[14:0], 1'h0} :
(df_addr_low[1:0] == 2'b10) ? {vu_mask_1st[13:0], 2'h0} :
/* (df_addr_low[1:0] == 2'b11)*/ {vu_mask_1st[12:0], 3'h0};
assign vu_bwe_raw = (vu_df_ld_dec[0]) ? 16'h8000 : // byte
(df_mtc2 || vu_df_ld_dec[1]) ? 16'hc000 : // short
(vu_df_ld_dec[2]) ? 16'hf000 : // long
(vu_df_ld_dec[3] || vu_df_ld_dec[9]) ? 16'hff00 : // doub, fourth
(vu_df_ld_dec[4]) ? vu_mask_2nd : // quad
(vu_df_ld_dec[5]) ? ~vu_mask_2nd : // rest
/* (no vu load) */ 16'h0000 ;
assign vu_bwe_1st =
(df_elem[3:2] == 2'b00) ? vu_bwe_raw :
(df_elem[3:2] == 2'b01) ? {4'b0, vu_bwe_raw[15:4]} :
(df_elem[3:2] == 2'b10) ? {8'b0, vu_bwe_raw[15:8]} :
/* (df_elem[3:2] == 2'b11)*/ {12'b0, vu_bwe_raw[15:12]};
assign vu_bwe_2nd =
(df_elem[1:0] == 2'b00) ? vu_bwe_1st[15:0] :
(df_elem[1:0] == 2'b01) ? {1'b0, vu_bwe_1st[15:1]} :
(df_elem[1:0] == 2'b10) ? {2'b0, vu_bwe_1st[15:2]} :
/* (df_elem[1:0] == 2'b11)*/ {3'b0, vu_bwe_1st[15:3]};
wire [15:0] vu_bwe_tmp;
wire vu_bwe_valid;
assign vu_bwe_tmp = (vu_df_ld_dec[6] || vu_df_ld_dec[7] || vu_df_ld_dec[8] ||
vu_df_ld_dec[10]) ? 16'hffff : vu_bwe_2nd;
assign vu_bwe_valid = df_mtc2 || vu_df_load;
assign vu_bwe = vu_bwe_tmp & {16{vu_bwe_valid}};
endmodule