issue.v
58.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
/**************************************************************************
* *
* Copyright (C) 1994, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
*************************************************************************/
// $Id: issue.v,v 1.1.1.1 2002/05/17 06:07:48 blythe Exp $
// issue.v: RSP issue logic
`timescale 1ns / 10ps
module issue (clk, reset_l, halt, single_step,
pc_in_wr_en, pc_data_in, pc_sel, halting,
br_addr, rd_inst, set_broke, wr_br_addr,
imem_dma_pif,
taken, old_taken, adv_ir, kill_re, should_have_stalled,
old_delay_pending, clear_target_pending, old_target_pending,
su_inst_a, vu_inst_a, su_inst_b, vu_inst_b,
choose_su_inst_b, choose_vu_inst_b,
su_nop_debug, vu_nop_debug,
link_pc_delay_pc, pc, pc_wr_en,
rd_bubble, br_target, delay_slot,
imem_stall, start_ext_halt, kill_su_issue, kill_vu_issue,
vu_comp, vs, vs_eq_one, rd_pre_vt, vt_sel,
su_rd_pc_debug, vu_rd_pc_debug);
input clk;
input reset_l;
input halt;
input single_step;
input pc_in_wr_en;
input [11:2] pc_data_in;
input [4:0] pc_sel;
input halting;
input [11:2] br_addr;
input [63:0] rd_inst; // imem out. [63:32] is low order inst
input set_broke;
input wr_br_addr;
input imem_dma_pif;
input taken;
input old_taken;
input adv_ir;
input kill_re;
input should_have_stalled;
input old_delay_pending;
input clear_target_pending;
input old_target_pending;
output [31:0] su_inst_a;
output [31:0] vu_inst_a; // RD stage
output [31:0] su_inst_b;
output [31:0] vu_inst_b; // RD stage
output choose_su_inst_b;
output choose_vu_inst_b;
output su_nop_debug; // RD stage to nowhere
output vu_nop_debug; // RD stage to nowhere
output [23:0] link_pc_delay_pc; // EX stage to LS
output [11:2] pc;
output pc_wr_en;
output rd_bubble;
output br_target;
output delay_slot;
output imem_stall;
output start_ext_halt;
output kill_su_issue;
output kill_vu_issue;
output [11:0] su_rd_pc_debug; // for debug only
output [11:0] vu_rd_pc_debug; // for debug only
// Generation of these outputs is moved from suvuctl for timing reasons.
output vu_comp;
output [4:0] vs;
output vs_eq_one;
output [4:0] rd_pre_vt;
output [1:0] vt_sel;
wire start_ext_halt;
wire adv_ir;
wire halting;
wire imem_dma_if;
/* ********************************************************************** */
// Issue logic
wire adv_pcs;
wire [11:3] if_next_pc; // IF stage
wire [11:2] pc_mux_out;
wire [11:2] sav_pc; // pc of saved instruction
wire [11:2] int_halt_pc;
wire [63:0] stalled_rd_inst;
wire [63:0] muxed_rd_inst;
wire [31:0] sav_inst;
wire [11:2] old_br_addr;
wire [11:3] next_pc; // RD stage
wire [11:2] cur_rd_pc;
wire [11:2] delay_pc;
wire [11:2] link_pc;
wire [23:0] rd_link_pc_delay_pc; // RD stage
wire if_bubble;
wire if_other_bubble;
wire rd_other_bubble;
wire rd_other_bubble_tmp;
wire if_next_is_target;
wire rd_next_is_target;
wire target;
wire next_br_target;
wire even_target; // target is 64-bit aligned
wire odd_target; // target is not 64-bit aligned
wire delay_slot;
wire sav_br;
wire sav_su;
wire sav_vu;
wire fst_br;
wire snd_br;
wire fst_vu; // first instruction of imem pair is VU type
wire snd_vu; // second instruction of imem pair is VU type
wire s_f_dep;
wire f_s_dep;
wire next_target;
wire next_delay_slot;
wire next_sav_br;
wire next_sav_su;
wire next_sav_vu;
wire [4:0] su_inst_sel;
wire [4:0] vu_inst_sel;
// Dual Issue Signals:
wire [31:0] fst_inst;
wire [31:0] sec_inst;
wire [4:0] fst_vs;
wire [4:0] fst_vt;
wire [4:0] sec_vs;
wire [4:0] sec_vt;
wire [4:0] sav_vd;
wire [4:0] fst_vd;
wire [4:0] sec_vd;
wire [4:0] sav_rt;
wire [4:0] fst_rt;
wire [4:0] sec_rt;
wire [4:0] sav_rd;
wire [4:0] fst_rd;
wire [4:0] sec_rd;
wire [5:0] sav_vu_func;
wire [5:0] fst_vu_func;
wire [5:0] sec_vu_func;
wire sav_vu_comp;
wire fst_vu_comp;
wire sec_vu_comp;
wire sav_break;
wire fst_break;
wire sav_lwc2;
wire fst_lwc2;
wire sec_lwc2;
wire fst_swc2;
wire sec_swc2;
wire sav_lst;
wire fst_lst;
wire sec_lst;
wire sav_mtc2;
wire fst_mtc2;
wire sec_mtc2;
wire sav_ctc2;
wire fst_ctc2;
wire sec_ctc2;
wire fst_mfc2;
wire sec_mfc2;
wire fst_cfc2;
wire sec_cfc2;
wire fst_use_vs;
wire sec_use_vs;
wire fst_use_vt;
wire sec_use_vt;
wire sav_ctc2_vc0;
wire fst_ctc2_vc0;
wire sec_ctc2_vc0;
wire sav_ctc2_vc1;
wire fst_ctc2_vc1;
wire sec_ctc2_vc1;
wire sav_ctc2_vc2;
wire fst_ctc2_vc2;
wire sec_ctc2_vc2;
wire fst_use_vc0;
wire sec_use_vc0;
wire fst_use_vc1;
wire sec_use_vc1;
wire fst_use_vc2;
wire sec_use_vc2;
wire sav_vu_nop;
wire fst_vu_nop;
wire sec_vu_nop;
wire sav_vu_rd_wr_en;
wire fst_vu_rd_wr_en;
wire sec_vu_rd_wr_en;
wire sav_set_vc0;
wire fst_set_vc0;
wire sec_set_vc0;
wire sav_set_vc1;
wire fst_set_vc1;
wire sec_set_vc1;
wire sav_set_vc2;
wire fst_set_vc2;
wire sec_set_vc2;
wire fst_cfc2_vc0;
wire sec_cfc2_vc0;
wire fst_cfc2_vc1;
wire sec_cfc2_vc1;
wire fst_cfc2_vc2;
wire sec_cfc2_vc2;
wire sav_fst_hazard;
wire fst_sec_hazard;
wire ex_break;
wire issued_sav;
wire issued_fst;
wire issued_sec;
wire prev_stalled;
wire save_rd_inst;
wire [11:2] pc_n;
wire [11:2] cur_rd_pc_n;
wire [11:3] next_pc_n;
wire [23:0] link_pc_delay_pc_n;
wire prev_stalled_n;
wire [63:0] stalled_rd_inst_n;
wire [31:0] sav_inst_n;
wire rd_next_is_target_n;
wire imem_dma_if_n;
wire rd_bubble_n;
wire rd_other_bubble_tmp_n;
wire br_target_n;
wire target_n;
wire clr_state_n;
wire sav_br_n;
wire sav_su_n;
wire sav_vu_n;
wire tmp_delay_slot_n;
wire [11:2] sav_pc_n;
wire [11:2] old_br_addr_n;
wire ex_break_n;
function [11:2] pc_mux;
input [4:0] pc_sel;
input [11:2] in_a, in_b, in_c, in_d, in_e;
begin
pc_mux = 'h0; //default
case (1'b1) // synopsys parallel_case full_case
pc_sel[0] : pc_mux = in_a;
pc_sel[1] : pc_mux = in_b;
pc_sel[2] : pc_mux = in_c;
pc_sel[3] : pc_mux = in_d;
pc_sel[4] : pc_mux = in_e;
endcase
end
endfunction
assign pc_mux_out = pc_mux(pc_sel, pc_data_in[11:2], int_halt_pc, br_addr,
old_br_addr, {if_next_pc, 1'b0});
spasdffen_10_0_h pc_ff (pc, pc_n, pc_mux_out, pc_wr_en, clk, reset_l);
assign if_next_pc = pc[11:3] + 1;
// Delay_pc is used for branch target calculations. It is the pc of the
// instruction following the branch. If the branch instruction comes
// from sav_inst (sav_br), then delay_pc is the pc of rd_inst. If there
// is no sav_br, but the first instruction of rd_inst is a branch (fst_br),
// the delay_pc is the pc of the second instruction in rd_inst. Finally,
// if there is no sav_br and no fst_br, but the second instruction in rd_inst
// is a branch, then delay_pc is the pc of the next sequential imem access
// after rd_inst. If there is no branch in sight, delay_pc is a don't care.
// Link_pc is the link address used for bxx_al and jalx. It is the pc of
// the second sequential instruction following the branch. If the branch
// instruction comes from sav_inst (sav_br), then link_pc is the pc of the
// second instruction in rd_inst. If there is no sav_br, but the first
// instruction of rd_inst is a branch (fst_br), the link_pc is the pc of
// the next sequential imem access after rd_inst. Finally, if there is no
// sav_br and no fst_br, but the second instruction in rd_inst is a branch,
// then link_pc is the pc of the second instruction in the next sequential
// imem access after rd_inst. If there is no branch in sight, link_pc is a
// don't care.
// *** This use of adv_pcs should be adv_ir ???:
spasdffen_10_0 cur_pc_ff (cur_rd_pc, cur_rd_pc_n, pc, adv_pcs, clk, 1'b1);
// *** Perhaps this one could be, too:
spasdffen_9_0 su_ir_pc_ff (next_pc, next_pc_n, if_next_pc, adv_pcs, clk, 1'b1);
assign delay_pc =
sav_br ? {cur_rd_pc[11:3], 1'b0} :
fst_br ? {cur_rd_pc[11:3], 1'b1} :
/* snd_br */ {next_pc, 1'b0};
assign link_pc =
sav_br ? {cur_rd_pc[11:3], 1'b1} :
fst_br ? {next_pc, 1'b0} :
/* snd_br */ {next_pc, 1'b1};
assign rd_link_pc_delay_pc = {link_pc, 2'b0, delay_pc, 2'b0};
spasdff_24_0 su_re_l_d_pc_ff (link_pc_delay_pc,link_pc_delay_pc_n,rd_link_pc_delay_pc,clk,1'b1);
/*
Bubble generation: if pc_sel == 0010, there is a branch in EX, the branch
delay slot is in RD, and the instruction sequentially following the delay
slot is in IF. The instructions in IF have to be killed. (Also, only one
RD instruction is issued.) This is difficult
in IF, because the IR flip-flop is included in the imem module. So we wait
one cycle, then do it in RD. We pipe pc_sel to the next cycle ( *** should
this be conditional on adv_ir? Is adv_ir guaranteed to be 1 at this point?)
and, conditional on it, kill the instruction now in RD.
*/
assign fst_vu = prev_stalled ? (stalled_rd_inst[63:57] == 7'h25) :
(rd_inst[63:57] == 7'h25);
assign snd_vu = prev_stalled ? (stalled_rd_inst[31:25] == 7'h25) :
(rd_inst[31:25] == 7'h25);
assign fst_br = !odd_target && (
(muxed_rd_inst[63:60] == 4'b0001) ||
(muxed_rd_inst[63:59] == 5'b00001) ||
(muxed_rd_inst[63:58] == 6'b000001) ||
((muxed_rd_inst[63:58]==6'b000000) && (muxed_rd_inst[35:33]==3'b100)));
assign snd_br =
(muxed_rd_inst[31:28] == 4'b0001) ||
(muxed_rd_inst[31:27] == 5'b00001) ||
(muxed_rd_inst[31:26] == 6'b000001) ||
((muxed_rd_inst[31:26]==6'b000000) && (muxed_rd_inst[3:1]==3'b100));
assign s_f_dep = sav_fst_hazard || ((sav_su || sav_vu) && single_step);
assign f_s_dep = fst_sec_hazard || ((!sav_su && !sav_vu) && single_step);
// Halt sets if_bubble, therefore clearing all of this state during the
// following cycle:
// *** No, it doesn't. To force this, should "halting" also go into
// *** next_delay_slot and next_target?
assign odd_target = target && cur_rd_pc[2];
assign even_target = target && !cur_rd_pc[2];
// *** Is this true in the presence of stalls, etc.? :
assign next_br_target = rd_bubble || rd_next_is_target ||
(old_target_pending && !clear_target_pending);
assign next_target = rd_other_bubble ||
(old_target_pending && !(target && !kill_re));
// Next_delay_slot: there's a branch available to issue and it will issue.
assign next_delay_slot =
!kill_re && !rd_other_bubble && (sav_br ||
(fst_br && !sav_su && !sav_vu) ||
(fst_br && sav_vu && !delay_slot && !s_f_dep) ||
(snd_br && odd_target) ||
(snd_br && fst_vu && !odd_target &&
!sav_su && !sav_vu && !delay_slot && !f_s_dep));
// *** check use of !delay_slot in next_sav_br, next_sav_su, next_sav_vu
// *** next_br_target and rd_bubble are somewhat redundant in next_sav_xx
// *** remove redundant !odd target in next_sav_xx clauses.
assign next_sav_br = !next_br_target && !odd_target && !halting && !rd_other_bubble &&
!(delay_slot && (taken || old_taken)) && // = 2nd_br && next_sav_su;
((!fst_vu && snd_br && !odd_target && !sav_su && !sav_vu) ||
(!fst_vu && snd_br && !odd_target && !sav_su &&
!delay_slot && sav_vu && !s_f_dep) ||
(fst_vu && snd_br && delay_slot && !sav_su && !sav_vu) ||
(fst_vu&& snd_br&& !delay_slot && !sav_br && sav_su && !s_f_dep) ||
(fst_vu && snd_br && !odd_target && !delay_slot &&
!sav_su && !sav_vu && f_s_dep));
assign next_sav_su = !next_br_target && !odd_target && !halting && !rd_other_bubble &&
!(delay_slot && (taken || old_taken)) &&
((!fst_vu && !snd_vu && !odd_target && !sav_su && !sav_vu) ||
(!fst_vu && !snd_vu && !odd_target && !sav_su &&
!delay_slot && sav_vu && !s_f_dep) ||
(fst_vu && !snd_vu && delay_slot && !sav_su && !sav_vu) ||
(fst_vu && !snd_vu && !delay_slot && !sav_br && sav_su &&
!s_f_dep) ||
(fst_vu && !snd_vu && !odd_target && !delay_slot &&
!sav_su && !sav_vu && f_s_dep));
assign next_sav_vu = !next_br_target && !odd_target && !halting && !rd_other_bubble &&
!(delay_slot && (taken || old_taken)) &&
((fst_vu && snd_vu && !odd_target && !sav_vu && !sav_su) ||
(fst_vu && snd_vu && !odd_target && !sav_vu &&
!delay_slot && !sav_br && sav_su && !s_f_dep) ||
(!fst_vu && snd_vu && delay_slot && !sav_su && !sav_vu) ||
(!fst_vu && snd_vu && !delay_slot && sav_vu && !s_f_dep) ||
(/* !fst_vu && */ snd_vu && !delay_slot && !sav_su && !sav_vu && fst_br) ||
(!fst_vu && snd_vu && !odd_target && !delay_slot &&
!sav_su && !sav_vu && f_s_dep));
/*
The enable on the IMem output FF is the same signal as chip select, which
is required to be valid about 1 ns before the *falling* edge of the clock.
We would like to use this FF to hold stalled instructions, but it's
impossible to produce the stall signal (adv_ir) in time. Therefore we
create this side FF to hold instructions which should have been stalled,
and add a leg to the su_inst_sel and vu_inst_sel muxes to choose the stalled
instruction.
*/
spasdff_1_0_h prev_imem_stall_ff (prev_stalled, prev_stalled_n, should_have_stalled, clk, reset_l);
assign save_rd_inst = should_have_stalled && !prev_stalled;
spasdffen_64_h rd_inst_ff (stalled_rd_inst, stalled_rd_inst_n, rd_inst, save_rd_inst, clk);
assign muxed_rd_inst = prev_stalled ? stalled_rd_inst : rd_inst;
// *** Need assertion here to check for multiple (or no) selects valid
// Mutual exclusivity depends on: xxx_target implies !sav_xx
// su_inst_sel: [0]: sav_inst, [1]: fst_inst, [2]: snd_inst
// vu_inst_sel: ditto
wire fst_rd_vu;
wire snd_rd_vu;
wire fst_st_vu;
wire snd_st_vu;
wire fst_rd_br;
wire fst_st_br;
assign fst_rd_vu = (rd_inst[63:57] == 7'h25);
assign snd_rd_vu = (rd_inst[31:25] == 7'h25);
assign fst_st_vu = (stalled_rd_inst[63:57] == 7'h25);
assign snd_st_vu = (stalled_rd_inst[31:25] == 7'h25);
assign fst_rd_br = !odd_target && (
(rd_inst[63:60] == 4'b0001) ||
(rd_inst[63:59] == 5'b00001) ||
(rd_inst[63:58] == 6'b000001) ||
((rd_inst[63:58]==6'b000000) && (rd_inst[35:33]==3'b100)));
assign fst_st_br = !odd_target && (
(stalled_rd_inst[63:60] == 4'b0001) ||
(stalled_rd_inst[63:59] == 5'b00001) ||
(stalled_rd_inst[63:58] == 6'b000001) ||
((stalled_rd_inst[63:58]==6'b000000)&&(stalled_rd_inst[35:33]==3'b100)));
assign su_inst_sel[0] = sav_su;
assign su_inst_sel[1] = !prev_stalled && !sav_su && !fst_rd_vu&&!odd_target;
assign su_inst_sel[2] = !prev_stalled &&
((!snd_rd_vu && odd_target) ||
(fst_rd_vu && !snd_rd_vu && even_target) ||
(fst_rd_vu && !snd_rd_vu && !sav_su && !sav_vu && !delay_slot));
assign su_inst_sel[3] = prev_stalled && !sav_su && !fst_st_vu &&!odd_target;
assign su_inst_sel[4] = prev_stalled &&
((!snd_st_vu && odd_target) ||
(fst_st_vu && !snd_st_vu && even_target) ||
(fst_st_vu && !snd_st_vu && !sav_su && !sav_vu && !delay_slot));
wire no_su_inst_rd;
assign no_su_inst_rd =
!(sav_su) &&
!(!fst_rd_vu && !odd_target) &&
!(!snd_rd_vu && odd_target) &&
!(fst_rd_vu && !snd_rd_vu && even_target) &&
!(fst_rd_vu && !snd_rd_vu && !sav_vu && !delay_slot);
wire no_su_inst_st;
assign no_su_inst_st =
!(sav_su) &&
!(!fst_st_vu && !odd_target) &&
!(!snd_st_vu && odd_target) &&
!(fst_st_vu && !snd_st_vu && even_target) &&
!(fst_st_vu && !snd_st_vu && !sav_vu && !delay_slot);
wire no_su_inst = (prev_stalled && no_su_inst_st) || (!prev_stalled && no_su_inst_rd);
assign vu_inst_sel[0] = sav_vu;
assign vu_inst_sel[1] = !prev_stalled && !sav_vu && fst_rd_vu &&!odd_target;
assign vu_inst_sel[2] = !prev_stalled &&
((snd_rd_vu && odd_target) ||
(!fst_rd_vu && snd_rd_vu && even_target && !fst_rd_br) ||
(!fst_rd_vu && snd_rd_vu &&!delay_slot&&!sav_su&&!sav_vu&&!fst_rd_br));
assign vu_inst_sel[3] = prev_stalled && !sav_vu && fst_st_vu && !odd_target;
assign vu_inst_sel[4] = prev_stalled &&
((snd_st_vu && odd_target) ||
(!fst_st_vu && snd_st_vu && even_target && !fst_st_br) ||
(!fst_st_vu && snd_st_vu &&!delay_slot&&!sav_su&&!sav_vu&&!fst_st_br));
wire no_vu_inst_rd;
assign no_vu_inst_rd =
!(sav_vu) &&
!(fst_rd_vu &&!odd_target) &&
!(snd_rd_vu && odd_target) &&
!(!fst_rd_vu && snd_rd_vu && even_target && !fst_rd_br) &&
!(!fst_rd_vu && snd_rd_vu && !delay_slot && !sav_su && !fst_rd_br);
wire no_vu_inst_st;
assign no_vu_inst_st =
!(sav_vu) &&
!(fst_st_vu &&!odd_target) &&
!(snd_st_vu && odd_target) &&
!(!fst_st_vu && snd_st_vu && even_target && !fst_st_br) &&
!(!fst_st_vu && snd_st_vu && !delay_slot && !sav_su && !fst_st_br);
wire no_vu_inst = (prev_stalled && no_vu_inst_st) || (!prev_stalled && no_vu_inst_rd);
reg [31:0] su_inst_b_reg;
reg [31:0] vu_inst_b_reg;
wire [1:0] su_inst_b_sel;
wire [1:0] vu_inst_b_sel;
assign su_inst_b_sel[0] = !prev_stalled;
assign su_inst_b_sel[1] = prev_stalled;
assign vu_inst_b_sel[0] = !prev_stalled;
assign vu_inst_b_sel[1] = prev_stalled;
wire choose_su_inst_b;
wire choose_vu_inst_b;
wire [31:0] rd_inst_high_n_buf;
wire [31:0] st_rd_inst_high_n_buf;
wire [31:0] sav_inst_n_buf;
big_inv_bufs_32 rd_inst_high_bufs (rd_inst_high_n_buf, rd_inst[63:32]);
big_inv_bufs_32 st_rd_inst_high_bufs (st_rd_inst_high_n_buf, stalled_rd_inst[63:32]);
big_inv_bufs_32 sav_inst_bufs (sav_inst_n_buf, sav_inst);
wire [31:0] ded_sav_su;
wire [31:0] ded_sav_su_n;
wire [31:0] ded_sav_vu;
wire [31:0] ded_sav_vu_n;
wire [31:0] ded_prev_stalled;
wire [31:0] ded_prev_stalled_n;
spasdffen_32_0 ded_su_sv_su_ff (
ded_sav_su, ded_sav_su_n, {32{next_sav_su}},
(!kill_re||halting), clk, reset_l);
spasdffen_32_0 ded_su_sv_vu_ff (
ded_sav_vu, ded_sav_vu_n, {32{next_sav_vu}},
(!kill_re||halting), clk, reset_l);
spasdff_32_0 ded_prev_imem_stall_ff (ded_prev_stalled, ded_prev_stalled_n,
{32{should_have_stalled}}, clk, reset_l);
inst_a_mux su_inst_a_mux (ded_prev_stalled, ded_prev_stalled_n, ded_sav_su,
ded_sav_su_n, sav_inst_n_buf, rd_inst_high_n_buf,
st_rd_inst_high_n_buf, su_inst_a);
inst_a_mux vu_inst_a_mux (ded_prev_stalled, ded_prev_stalled_n, ded_sav_vu,
ded_sav_vu_n, sav_inst_n_buf, rd_inst_high_n_buf,
st_rd_inst_high_n_buf, vu_inst_a);
always @(su_inst_b_sel or rd_inst or stalled_rd_inst) begin
case (1'b1) //synopsys parallel_case full_case
su_inst_b_sel[0] : su_inst_b_reg = rd_inst[31:0];
su_inst_b_sel[1] : su_inst_b_reg = stalled_rd_inst[31:0];
endcase
end
always @(vu_inst_b_sel or rd_inst or stalled_rd_inst) begin
case (1'b1) //synopsys parallel_case full_case
vu_inst_b_sel[0] : vu_inst_b_reg = rd_inst[31:0];
vu_inst_b_sel[1] : vu_inst_b_reg = stalled_rd_inst[31:0];
endcase
end
assign su_inst_b = su_inst_b_reg;
assign vu_inst_b = vu_inst_b_reg;
assign choose_vu_inst_b = !sav_vu &&
((prev_stalled && !fst_st_vu) ||
(!fst_rd_vu && !prev_stalled) ||
(!fst_rd_vu && !fst_st_vu) ||
odd_target);
assign choose_su_inst_b = !sav_su &&
((prev_stalled && fst_st_vu) ||
(fst_rd_vu && !prev_stalled) ||
(fst_rd_vu && fst_st_vu) ||
odd_target);
wire [31:0] muxed_sav_inst;
assign muxed_sav_inst =prev_stalled ? stalled_rd_inst[31:0] : rd_inst[31:0];
spasdffen_32_0 su_sav_inst_ff (sav_inst, sav_inst_n, muxed_sav_inst,adv_ir, clk,reset_l);
// If delay slot is stalled one cycle, don't want to kill it. Hence,
// (rd_bubble && !delay_slot).
assign kill_su_issue = rd_other_bubble_tmp ||
(rd_bubble && !delay_slot) || (odd_target && snd_vu) ||
// not odd target; 1st is su, but don't issue 1st
((su_inst_sel[1]||su_inst_sel[3])&&((delay_slot&&sav_vu)||s_f_dep)) ||
// not odd target; 2nd is su (???), but don't issue 2nd
(!odd_target && fst_vu && !sav_su && delay_slot) ||
(!odd_target && fst_vu && sav_vu) ||
((vu_inst_sel[1] || vu_inst_sel[3]) && f_s_dep) ||
// no su instructions available
// (!sav_su && fst_vu && snd_vu);
no_su_inst;
assign kill_vu_issue = rd_other_bubble_tmp ||
(rd_bubble && !delay_slot) || (odd_target && !snd_vu) ||
// not high target; 1st is vu, but don't issue 1st
((vu_inst_sel[1] || vu_inst_sel[3]) && ((delay_slot && sav_su) || sav_br || (sav_break && sav_su) || s_f_dep)) ||
// not high target; 2nd is vu, but don't issue 2nd
(!odd_target && !fst_vu && !sav_vu && delay_slot) ||
(!odd_target && !fst_vu && sav_su) ||
((su_inst_sel[1] || su_inst_sel[3]) && fst_break && !sav_vu) ||
((su_inst_sel[1] || su_inst_sel[3]) && f_s_dep) ||
// no vu instructions available
// (!sav_vu && !fst_vu && !snd_vu);
no_vu_inst;
// *** What about !sav_su && !sav_vu && fst_br and snd_vu? And do we get sav_vu in this case?
assign su_nop_debug = (kill_su_issue || kill_re);
assign vu_nop_debug = (kill_vu_issue || kill_re);
assign imem_stall = !rd_bubble &&
((sav_su && (!fst_vu || delay_slot || sav_br || s_f_dep)) ||
(sav_vu && (fst_vu || delay_slot || s_f_dep)));
// = (sav_su && !(vu_inst_sel[1] || vu_inst_sel[3])) ||
// (sav_vu && (!su_inst_sel[1] || su_inst_sel[3]));
// Only need bubble when there is no interruption between the taken
// branch and the target instruction. pc_sel[2] indicates there is
// a taken branch in EX and the target pc will enter IF in the next
// cycle.
assign if_bubble = (pc_sel[2] || pc_sel[3]) && pc_wr_en;
// These are used to figure out when we're at the target of a branch even
// though there was no rd_bubble.
assign if_next_is_target = pc_sel[3] && pc_wr_en;
spasdffen_1_0 is_ir_next_tar_ff (rd_next_is_target, rd_next_is_target_n, if_next_is_target, adv_ir, clk, reset_l);
// if pc is target pc = prev pc_wr_en && prev_pc_mux=
spasdff_1_0 su_pifif_dma_ff(imem_dma_if, imem_dma_if_n, imem_dma_pif, clk, reset_l);
assign if_other_bubble = halt || (imem_dma_if & !imem_dma_pif);
spasdffen_1_0 su_ir_bubble_ff (rd_bubble, rd_bubble_n, if_bubble, adv_ir,clk, reset_l);
spasdff_1_0 su_ir_obubble_ff (rd_other_bubble_tmp, rd_other_bubble_tmp_n, if_other_bubble,clk,reset_l);
spasdffen_1_0 su_btarget_ff (br_target, br_target_n, next_br_target,!kill_re,clk, reset_l);
spasdffen_1_0 su_target_ff (target, target_n, next_target, adv_ir, clk, reset_l);
wire tmp_delay_slot;
spasdff_1_0_h su_sv_br_ff ( // rigged high-perf ff w/ en
sav_br, sav_br_n,
((!kill_re||halting) ? next_sav_br : sav_br),
clk, reset_l);
spasdff_1_0_h su_sv_su_ff ( // rigged high-perf ff w/ en
sav_su, sav_su_n,
((!kill_re||halting) ? next_sav_su : sav_su),
clk, reset_l);
spasdff_1_0_h su_sv_vu_ff ( // rigged high-perf ff w/ en
sav_vu, sav_vu_n,
((!kill_re||halting) ? next_sav_vu : sav_vu),
clk, reset_l);
spasdffen_1_0 su_del_slot_ff(tmp_delay_slot,tmp_delay_slot_n,next_delay_slot,adv_ir,clk,reset_l);
assign delay_slot = tmp_delay_slot || old_delay_pending;
assign rd_other_bubble = rd_other_bubble_tmp || rd_bubble;
spasdffen_10_0 su_sav_pc_ff (sav_pc,sav_pc_n, {cur_rd_pc[11:3],1'b1},adv_ir,clk,1'b1);
assign su_rd_pc_debug = // for debug only
su_inst_sel[0] ? {sav_pc, 2'b00} :
su_inst_sel[1] ? {cur_rd_pc[11:3], 3'b000} :
su_inst_sel[2] ? {cur_rd_pc[11:3], 3'b100} :
su_inst_sel[3] ? {cur_rd_pc[11:3], 3'b000} :
su_inst_sel[4] ? {cur_rd_pc[11:3], 3'b100} :
12'b0;
assign vu_rd_pc_debug = // for debug only
vu_inst_sel[0] ? {sav_pc, 2'b00} :
vu_inst_sel[1] ? {cur_rd_pc[11:3], 3'b000} :
vu_inst_sel[2] ? {cur_rd_pc[11:3], 3'b100} :
vu_inst_sel[3] ? {cur_rd_pc[11:3], 3'b000} :
vu_inst_sel[4] ? {cur_rd_pc[11:3], 3'b100} :
12'b0;
/* ********************************************************************** */
// Dual-issue hazards
// Detect hazards (VU register and VU control register) between the two
// instructions that are otherwise expected to dual-issue.
// *** This could be very bad, timing-wise.
assign fst_inst = muxed_rd_inst[63:32];
assign sec_inst = muxed_rd_inst[31:0];
assign fst_vs = fst_inst[15:11];
assign sec_vs = sec_inst[15:11];
assign fst_vt = fst_inst[20:16];
assign sec_vt = sec_inst[20:16];
assign sav_vd = sav_inst[10:6];
assign fst_vd = fst_inst[10:6];
assign sec_vd = sec_inst[10:6];
assign sav_rt = sav_inst[20:16];
assign fst_rt = fst_inst[20:16];
assign sec_rt = sec_inst[20:16];
assign sav_rd = sav_inst[15:11];
assign fst_rd = fst_inst[15:11];
assign sec_rd = sec_inst[15:11];
assign sav_vu_comp = sav_inst[31:25] == 7'b0100101;
assign fst_vu_comp = fst_inst[31:25] == 7'b0100101;
assign sec_vu_comp = sec_inst[31:25] == 7'b0100101;
assign sav_vu_func = sav_inst[5:0];
assign fst_vu_func = fst_inst[5:0];
assign sec_vu_func = sec_inst[5:0];
assign sav_break = (sav_inst[31:26]==6'b000000)&&(sav_inst[5:0]==6'b001101);
assign fst_break = (fst_inst[31:26]==6'b000000)&&(fst_inst[5:0]==6'b001101);
assign sav_lwc2 = (sav_inst[31:26]==6'b110010);
assign fst_lwc2 = (fst_inst[31:26]==6'b110010);
assign sec_lwc2 = (sec_inst[31:26]==6'b110010);
assign fst_swc2 = (fst_inst[31:26]==6'b111010);
assign sec_swc2 = (sec_inst[31:26]==6'b111010);
assign sav_lst = (sav_inst[14:11]==4'b1011);
assign fst_lst = (fst_inst[14:11]==4'b1011);
assign sec_lst = (sec_inst[14:11]==4'b1011);
assign sav_mtc2 = (sav_inst[31:22]==10'b0100100010);
assign fst_mtc2 = (fst_inst[31:22]==10'b0100100010);
assign sec_mtc2 = (sec_inst[31:22]==10'b0100100010);
assign sav_ctc2 = (sav_inst[31:22]==10'b0100100011);
assign fst_ctc2 = (fst_inst[31:22]==10'b0100100011);
assign sec_ctc2 = (sec_inst[31:22]==10'b0100100011);
assign fst_mfc2 = (fst_inst[31:22]==10'b0100100000);
assign sec_mfc2 = (sec_inst[31:22]==10'b0100100000);
assign fst_cfc2 = (fst_inst[31:22]==10'b0100100001);
assign sec_cfc2 = (sec_inst[31:22]==10'b0100100001);
assign fst_use_vs =
fst_vu_comp && !((fst_vu_func[5:0] == 6'b110111) || // !nop
(fst_vu_func[5:0] == 6'b111111) || // !nop
(fst_vu_func[5:0] == 6'b001011) || // !macq
(fst_vu_func[5:0] == 6'b011100) || // !vsum
(fst_vu_func[5:0] == 6'b000010) || // !rnd
(fst_vu_func[5:0] == 6'b001010) || // !rnd
(fst_vu_func[5:2] == 4'b0111)); // !sar
assign sec_use_vs =
sec_vu_comp && !((sec_vu_func[5:0] == 6'b110111) || // !nop
(sec_vu_func[5:0] == 6'b111111) || // !nop
(sec_vu_func[5:0] == 6'b001011) || // !macq
(sec_vu_func[5:0] == 6'b011100) || // !vsum
(sec_vu_func[5:0] == 6'b000010) || // !rnd
(sec_vu_func[5:0] == 6'b001010) || // !rnd
(sec_vu_func[5:2] == 4'b0111)); // !sar
assign fst_use_vt =
fst_vu_comp && !((fst_vu_func[5:0] == 6'b001011) || // !macq
(fst_vu_func[5:0] == 6'b011100) || // !vsum
(fst_vu_func[5:0] == 6'b110111) || // !nop
(fst_vu_func[5:0] == 6'b111111) || // !nop
(fst_vu_func[5:2] == 4'b1111) || // !extract
(fst_vu_func[5:2] == 4'b0111)); // !sar
assign sec_use_vt =
sec_vu_comp && !((sec_vu_func[5:0] == 6'b001011) || // !macq
(sec_vu_func[5:0] == 6'b011100) || // !vsum
(sec_vu_func[5:0] == 6'b110111) || // !nop
(sec_vu_func[5:0] == 6'b111111) || // !nop
(sec_vu_func[5:2] == 4'b1111) || // !extract
(sec_vu_func[5:2] == 4'b0111)); // !sar
assign sav_ctc2_vc0 = sav_ctc2 && (sav_rd[1:0]==2'b00);
assign fst_ctc2_vc0 = fst_ctc2 && (fst_rd[1:0]==2'b00);
assign sec_ctc2_vc0 = sec_ctc2 && (sec_rd[1:0]==2'b00);
assign sav_ctc2_vc1 = sav_ctc2 && (sav_rd[1:0]==2'b01);
assign fst_ctc2_vc1 = fst_ctc2 && (fst_rd[1:0]==2'b01);
assign sec_ctc2_vc1 = sec_ctc2 && (sec_rd[1:0]==2'b01);
assign sav_ctc2_vc2 = sav_ctc2 && (sav_rd[1:0]==2'b10);
assign fst_ctc2_vc2 = fst_ctc2 && (fst_rd[1:0]==2'b10);
assign sec_ctc2_vc2 = sec_ctc2 && (sec_rd[1:0]==2'b10);
// In some cases instructions that don't actually have the described
// behavior are included in the following use/set assignments for
// hardware convenience.
// vco: use_vc0: vabs and vmrg; set_vc0: vabs, vaddb, vsubb, and vmrg
// vcc: use_vc1: vmrg, cl, cr; set_vc1: all selects
// (vmrg doesn't really set vc1, but we say it does to prevent
// it from dual issuing with ctc2)
// vce: use_vc2: cl; set_vc2: cl, ch, cr
assign fst_use_vc0 = fst_vu_comp && // cmp, not mrg
(((fst_vu_func[5:3] == 3'b100) && (fst_vu_func[2:0] != 3'b111)) ||
(fst_vu_func[5:2] == 4'b0100)); // add, sub, sut, abs
assign sec_use_vc0 = sec_vu_comp && // cmp, not mrg
(((sec_vu_func[5:3] == 3'b100) && (sec_vu_func[2:0] != 3'b111)) ||
(sec_vu_func[5:2] == 4'b0100)); // add, sub, sut, abs
assign fst_use_vc1 = fst_vu_comp &&
((fst_vu_func[5:0]==6'b100101) || // cl
(fst_vu_func[5:1]==5'b10011)); // mrg, cr
assign sec_use_vc1 = sec_vu_comp &&
((sec_vu_func[5:0]==6'b100101) || // cl
(sec_vu_func[5:1]==5'b10011)); // mrg, cr
assign fst_use_vc2 = fst_vu_comp && (fst_vu_func[5:0]==6'b100101); // cl
assign sec_use_vc2 = sec_vu_comp && (sec_vu_func[5:0]==6'b100101); // cl
assign sav_set_vc0 = sav_vu_comp &&
((sav_vu_func[5:3] == 3'b010) || // add, sub class
(sav_vu_func[5:3] == 3'b100)); // compare
assign fst_set_vc0 = fst_vu_comp &&
((fst_vu_func[5:3] == 3'b010) || // add, sub class
(fst_vu_func[5:3] == 3'b100)); // compare
assign sec_set_vc0 = sec_vu_comp &&
((sec_vu_func[5:3] == 3'b010) || // add, sub class
(sec_vu_func[5:3] == 3'b100)); // compare
assign sav_set_vc1 = sav_vu_comp && // comp
(sav_vu_func[5:3] == 3'b100);
assign fst_set_vc1 = fst_vu_comp && // comp
(fst_vu_func[5:3] == 3'b100);
assign sec_set_vc1 = sec_vu_comp && // comp
(sec_vu_func[5:3] == 3'b100);
assign sav_set_vc2 = sav_vu_comp && // cl,ch,cr
((sav_vu_func[5:1]==5'b10010) || (sav_vu_func==6'b100110));
assign fst_set_vc2 = fst_vu_comp && // cl,ch,cr
((fst_vu_func[5:1]==5'b10010) || (fst_vu_func==6'b100110));
assign sec_set_vc2 = sec_vu_comp && // cl,ch,cr
((sec_vu_func[5:1]==5'b10010) || (sec_vu_func==6'b100110));
assign fst_cfc2_vc0 = fst_cfc2 && (fst_rd[1:0]==2'b00);
assign sec_cfc2_vc0 = sec_cfc2 && (sec_rd[1:0]==2'b00);
assign fst_cfc2_vc1 = fst_cfc2 && (fst_rd[1:0]==2'b01);
assign sec_cfc2_vc1 = sec_cfc2 && (sec_rd[1:0]==2'b01);
assign fst_cfc2_vc2 = fst_cfc2 && (fst_rd[1:0]==2'b10);
assign sec_cfc2_vc2 = sec_cfc2 && (sec_rd[1:0]==2'b10);
assign sav_vu_nop = sav_vu_comp &&
((sav_vu_func == 6'b110111) || (sav_vu_func == 6'b111111));
assign fst_vu_nop = fst_vu_comp &&
((fst_vu_func == 6'b110111) || (fst_vu_func == 6'b111111));
assign sec_vu_nop = sec_vu_comp &&
((sec_vu_func == 6'b110111) || (sec_vu_func == 6'b111111));
assign sav_vu_rd_wr_en = sav_vu_comp && !(sav_vu_nop);
assign fst_vu_rd_wr_en = fst_vu_comp && !(fst_vu_nop);
assign sec_vu_rd_wr_en = sec_vu_comp && !(sec_vu_nop);
assign sav_fst_hazard =
(sav_su && (
(sav_lwc2 && (sav_rt == fst_vs) && fst_use_vs) ||
(sav_lwc2 && (sav_rt == fst_vt) && fst_use_vt) ||
(sav_lwc2 && (sav_rt == fst_vd) && fst_vu_rd_wr_en) || // dest to dest
(sav_mtc2 && (sav_rd==fst_vs) && fst_use_vs) ||
(sav_mtc2 && (sav_rd==fst_vt) && fst_use_vt) ||
(sav_mtc2 && (sav_rd==fst_vd) && fst_vu_comp) || // dest to dest
(sav_lwc2 &&sav_lst&&(sav_rt[4:3]==fst_vs[4:3])&&fst_use_vs) ||
(sav_lwc2 &&sav_lst&&(sav_rt[4:3]==fst_vt[4:3])&&fst_use_vt) ||
(sav_lwc2 &&sav_lst&&(sav_rt[4:3]==fst_vd[4:3])&&fst_vu_comp) || // dest to dest
(sav_ctc2_vc0 && (fst_use_vc0 || fst_set_vc0)) ||
(sav_ctc2_vc1 && (fst_use_vc1 || fst_set_vc1)) ||
(sav_ctc2_vc2 && (fst_use_vc2 || fst_set_vc2)))) ||
(sav_vu && (
(sav_vu_rd_wr_en && (sav_vd == fst_rt) && fst_swc2) ||
(sav_vu_rd_wr_en && (sav_vd == fst_rd) && fst_mfc2) ||
(sav_vu_rd_wr_en && (sav_vd == fst_rd) && fst_mtc2) || // dest to dest
(sav_vu_rd_wr_en && (sav_vd == fst_rt) && fst_lwc2) || // dest to dest
(sav_vu_rd_wr_en && (sav_vd[4:3]==fst_rt[4:3]) && fst_swc2&&fst_lst) ||
(sav_vu_rd_wr_en && (sav_vd[4:3]==fst_rt[4:3]) && fst_lwc2 && fst_lst) || // dest to dest
(sav_set_vc0 && (fst_cfc2_vc0 || fst_ctc2_vc0)) ||
(sav_set_vc1 && (fst_cfc2_vc1 || fst_ctc2_vc1)) ||
(sav_set_vc2 && (fst_cfc2_vc2 || fst_ctc2_vc2))));
assign fst_sec_hazard = !odd_target && !sav_su && !sav_vu &&
((!fst_vu && (
(fst_lwc2 && (fst_rt == sec_vs) && sec_use_vs) ||
(fst_lwc2 && (fst_rt == sec_vt) && sec_use_vt) ||
(fst_lwc2 && (fst_rt == sec_vd) && sec_vu_rd_wr_en) || // dest to dest
(fst_mtc2 && (fst_rd==sec_vs) && sec_use_vs) ||
(fst_mtc2 && (fst_rd==sec_vt) && sec_use_vt) ||
(fst_mtc2 && (fst_rd==sec_vd) && sec_vu_rd_wr_en) || // dest to dest
(fst_lwc2 &&fst_lst&&(fst_rt[4:3]==sec_vs[4:3])&&sec_use_vs) ||
(fst_lwc2 &&fst_lst&&(fst_rt[4:3]==sec_vt[4:3])&&sec_use_vt) ||
(fst_lwc2 &&fst_lst&&(fst_rt[4:3]==sec_vd[4:3])&&sec_vu_comp) || // dest to dest
(fst_ctc2_vc0 && (sec_use_vc0 || sec_set_vc0)) ||
(fst_ctc2_vc1 && (sec_use_vc1 || sec_set_vc1)) ||
(fst_ctc2_vc2 && (sec_use_vc2 || sec_set_vc2)))) ||
(fst_vu && (
(fst_vu_rd_wr_en && (fst_vd == sec_rt) && sec_swc2) ||
(fst_vu_rd_wr_en && (fst_vd == sec_rd) && sec_mfc2) ||
(fst_vu_rd_wr_en && (fst_vd == sec_rd) && sec_mtc2) || // dest to dest
(fst_vu_rd_wr_en && (fst_vd == sec_rt) && sec_lwc2) || // dest to dest
(fst_vu_rd_wr_en && (fst_vd[4:3]==sec_rt[4:3]) && sec_swc2&&sec_lst) ||
(fst_vu_rd_wr_en && (fst_vd[4:3]==sec_rt[4:3]) && sec_lwc2 && sec_lst) || // dest to dest
(fst_set_vc0 && (sec_cfc2_vc0 || sec_ctc2_vc0)) ||
(fst_set_vc1 && (sec_cfc2_vc1 || sec_ctc2_vc1)) ||
(fst_set_vc2 && (sec_cfc2_vc2 || sec_ctc2_vc2)))));
/*
If there is a taken branch (pc_sel[2 or 3]) but adv_ir is deasserted, we
have to latch both taken and the branch address until adv_ir is next
asserted and the pc ff can be loaded. Old_br_addr implements the address
latch; taken is latched in suctl.
*/
spasdffen_10_0 su_br_addr_ff (old_br_addr, old_br_addr_n, br_addr,wr_br_addr,clk,reset_l);
// When halt is deasserted, pc is pointing to the first instruction to execute
// and the rd_inst is garbage. Therefore, when halt is deasserted, *don't*
// issue an instruction in that cycle. Wait til the next one.
// The PC is only written when the RSP is halted. The pipe is empty,
// and cur_rd_pc, link_pc_delay_pc, and next_pc are don't cares (so it's
// ok to write them, too).
// *** What happens here if an instruction is issued but stalls in rd.
// *** It should complete and be unaffected by stall.
// *** But this won't work for IMem DMA, where we have to get out of the way
// *** in a fixed time (2 cycles) for DMA. But DMA shouldn't be happening
// *** where we're executing ... that's a software restriction. (Whether we're
// *** halted or not?)
// *** IMem write: just stall everything, including rd_inst, and continue
// *** when it's done.
// *** IMem read: assume we've already come to a graceful halt
assign issued_sav = (sav_su && !(kill_su_issue || kill_re)) ||
(sav_vu && !(kill_vu_issue || kill_re));
assign issued_fst = // issued first implies !odd_target
(((su_inst_sel[1] || su_inst_sel[3]) && !(kill_su_issue || kill_re)) ||
((vu_inst_sel[1] || vu_inst_sel[3]) && !(kill_vu_issue || kill_re)));
assign issued_sec =
(((su_inst_sel[2] || su_inst_sel[4]) && !(kill_su_issue || kill_re)) ||
((vu_inst_sel[2] || vu_inst_sel[4]) && !(kill_vu_issue || kill_re)));
assign start_ext_halt = halt && !ex_break;
wire [7:0] halt_sel;
reg [11:2] int_halt_pc_reg;
assign halt_sel[0] = (halt_sel[2:1]==2'b0) && !(delay_slot && kill_re) && (rd_bubble || (old_target_pending && !br_target));
assign halt_sel[1] = delay_slot && !kill_re && taken;
assign halt_sel[2] = delay_slot && !kill_re && !taken && old_taken;
assign halt_sel[3] = (halt_sel[2:0]==3'b0) && (sav_su || sav_vu) && !issued_sav && !issued_fst && !issued_sec && !odd_target;
assign halt_sel[4] = (halt_sel[2:0]==3'b0) && issued_sav && !issued_fst && !issued_sec && !odd_target;
assign halt_sel[5] = (halt_sel[2:0]==3'b0) && issued_fst && !issued_sec; // !odd_target implied by issued_fst
assign halt_sel[6] = (halt_sel[2:0]==3'b0) && !issued_sec && odd_target;
assign halt_sel[7] = (halt_sel[2:0]==3'b0) && issued_sec;
always @(halt_sel or pc or sav_pc or cur_rd_pc or br_addr or old_br_addr)
begin
case (1'b1) //synopsys parallel_case
halt_sel[0] : int_halt_pc_reg = pc[11:2];
halt_sel[1] : int_halt_pc_reg = br_addr[11:2];
halt_sel[2] : int_halt_pc_reg = old_br_addr[11:2];
halt_sel[3] : int_halt_pc_reg = sav_pc[11:2];
halt_sel[4] : int_halt_pc_reg = {cur_rd_pc[11:3], 1'b0};
halt_sel[5] : int_halt_pc_reg = {cur_rd_pc[11:3], 1'b1};
halt_sel[6] : int_halt_pc_reg = {cur_rd_pc[11:3], 1'b1};
halt_sel[7] : int_halt_pc_reg = {pc[11:3], 1'b0};
default : int_halt_pc_reg = {cur_rd_pc[11:3], 1'b0}; // first instruction, used for delay_slot && kill_re && !sav_xu
endcase
end
assign int_halt_pc = int_halt_pc_reg;
// *** There is an identical ex_break ff in suctl:
spasdff_1_0 is_re_break_ff (ex_break, ex_break_n, set_broke, clk, reset_l);
// Enable pc write with pc input or int_halt_pc or "normal" next pc.
assign pc_wr_en =
pc_in_wr_en || // external write
halting ||
(adv_pcs && !halt && !imem_dma_if); // normal condition
// Advance unless there are instructions that have to be executed in the
// IR latch (imem_stall and not taking a branch).
assign adv_pcs = adv_ir && (!imem_stall || pc_sel[2] || pc_sel[3]);
// Moved from suvuctl for improved synthesis:
wire [5:0] vu_func_a;
wire [5:0] vu_func_b;
wire vu_comp_a;
wire vu_comp_b;
wire [4:0] vs_a;
wire [4:0] vs_b;
assign vu_func_a = vu_inst_a[5:0];
assign vu_func_b = vu_inst_b[5:0];
assign vu_comp_a = (vu_inst_a[31:25] == 7'b0100101) && // nop
!((vu_func_a == 6'b110111) || (vu_func_a == 6'b111111));
assign vu_comp_b = (vu_inst_b[31:25] == 7'b0100101) && // nop
!((vu_func_b == 6'b110111) || (vu_func_b == 6'b111111));
assign vu_comp = choose_vu_inst_b ? vu_comp_b : vu_comp_a;
assign vs_a = vu_inst_a[15:11];
assign vs_b = vu_inst_b[15:11];
assign vs = choose_vu_inst_b ? vs_b : vs_a;
assign vs_eq_one = (vs[0]==1);
// VT to the VU is a very critical path. There are five possible
// for vt: sav_inst, stalled_rd_inst[63:32], stalled_rd_inst[31:0],
// rd_inst[63:32], and rd_inst[31:0]. The first 3, while RD stage
// instructions, are available in the previous stage as
// muxed_sav_inst (advanced with adv_ir) and rd_inst (advanced
// with save_rd_inst). The logic below muxes these 3 early
// then latches them. The result, rd_pre_vt, is output to the vt
// logic, where it is decoded and muxed with decoded versions of
// vt obtained from rd_inst[63:32] and rd_inst[31:0]. The control
// for this final mux, vt_sel, is also generated below.
// vt_sel == 00 vt = pre_vt (use stalled_rd_inst or sav_inst)
// vt_sel == 01 vt = rd_inst[63:32][vt]
// vt_sel == 10 vt = rd_inst[31:0]][vt]
// vt_sel == 11 illegal condition
wire adv_sav_inst;
wire adv_ctl;
wire adv_st_inst;
wire [7:0] pre_vt_sel;
wire [4:0] if_pre_vt;
wire use_pre_vt;
wire [4:0] rd_pre_vt_n;
assign adv_sav_inst = adv_ir; // advances muxed_sav_inst -> sav_inst
assign adv_ctl = !kill_re || halting; // advances next_sav_vu > sav_vu
assign adv_st_inst = save_rd_inst; // advances rd_inst -> stalled_rd_inst
// odd_target = target && cur_rd_pc[2]; To generate pre_odd_target (a version for IF
// pipe stage) we use previous versions of target (next_target) and cur_rd_pc (pc).
// We need to consider three situations: both advance, target advances but cur_rd_pc
// does not, and neither advance. (cur_rd_pc advancing but target not is impossible.)
wire pre_odd_target;
assign pre_odd_target =
(next_target && adv_ir && pc[2] && (!imem_stall || pc_sel[2] || pc_sel[3])) || // advance both
(next_target && adv_ir && cur_rd_pc[2] && !(!imem_stall || pc_sel[2] || pc_sel[3])) || // advance target but not pc
(target && !adv_ir && cur_rd_pc[2]); // advance neither
assign pre_vt_sel[0] = next_sav_vu && adv_ctl && adv_sav_inst;
assign pre_vt_sel[1] = next_sav_vu && adv_ctl && !adv_sav_inst;
assign pre_vt_sel[2] = sav_vu && !adv_ctl && adv_sav_inst;
assign pre_vt_sel[3] = sav_vu && !adv_ctl && !adv_sav_inst;
assign pre_vt_sel[4] = (pre_vt_sel[3:0] == 4'b0) && (fst_rd_vu && !pre_odd_target) && adv_st_inst;
assign pre_vt_sel[5] = (pre_vt_sel[3:0] == 4'b0) && !(fst_rd_vu && !pre_odd_target) && adv_st_inst;
assign pre_vt_sel[6] = (pre_vt_sel[3:0] == 4'b0) && (fst_st_vu && !pre_odd_target) && !adv_st_inst;
assign pre_vt_sel[7] = (pre_vt_sel[3:0] == 4'b0) && !(fst_st_vu && !pre_odd_target) && !adv_st_inst;
reg [4:0] if_pre_vt_reg;
always @(pre_vt_sel or muxed_sav_inst or sav_inst or rd_inst or stalled_rd_inst)
begin
case (1'b1) //synopsys parallel_case full_case
pre_vt_sel[0] : if_pre_vt_reg = muxed_sav_inst[20:16];
pre_vt_sel[1] : if_pre_vt_reg = sav_inst[20:16];
pre_vt_sel[2] : if_pre_vt_reg = muxed_sav_inst[20:16];
pre_vt_sel[3] : if_pre_vt_reg = sav_inst[20:16];
pre_vt_sel[4] : if_pre_vt_reg = rd_inst[52:48];
pre_vt_sel[5] : if_pre_vt_reg = rd_inst[20:16];
pre_vt_sel[6] : if_pre_vt_reg = stalled_rd_inst[52:48];
pre_vt_sel[7] : if_pre_vt_reg = stalled_rd_inst[20:16];
endcase
end
assign if_pre_vt = if_pre_vt_reg;
// Always advance pre_vt. Various stall conditions are taken into account in
// if_pre_vt mux.
spasdff_5_0 is_vt_ff (rd_pre_vt, rd_pre_vt_n, if_pre_vt, clk, 1'b1);
assign use_pre_vt = sav_vu || prev_stalled;
assign vt_sel[0] = !use_pre_vt && fst_rd_vu && !odd_target;
assign vt_sel[1] = !use_pre_vt && (!fst_rd_vu || odd_target);
endmodule
module inst_a_mux (ded_prev_stalled, ded_prev_stalled_n, ded_sav, ded_sav_n,
sav_inst_n_buf, rd_inst_high_n_buf, st_rd_inst_high_n_buf, inst_a);
input [31:0] ded_prev_stalled;
input [31:0] ded_prev_stalled_n;
input [31:0] ded_sav;
input [31:0] ded_sav_n;
input [31:0] sav_inst_n_buf;
input [31:0] rd_inst_high_n_buf;
input [31:0] st_rd_inst_high_n_buf;
output [31:0] inst_a;
wire [31:0] leg_1;
wire [31:0] leg_2;
wire [31:0] leg_3;
wire [31:0] inst_a_unbuf;
nr02d0 nr02d0_leg1_0 (.zn(leg_1[0]), .a1(sav_inst_n_buf[0]), .a2(ded_sav_n[0]));
nr02d0 nr02d0_leg1_1 (.zn(leg_1[1]), .a1(sav_inst_n_buf[1]), .a2(ded_sav_n[1]));
nr02d0 nr02d0_leg1_2 (.zn(leg_1[2]), .a1(sav_inst_n_buf[2]), .a2(ded_sav_n[2]));
nr02d0 nr02d0_leg1_3 (.zn(leg_1[3]), .a1(sav_inst_n_buf[3]), .a2(ded_sav_n[3]));
nr02d0 nr02d0_leg1_4 (.zn(leg_1[4]), .a1(sav_inst_n_buf[4]), .a2(ded_sav_n[4]));
nr02d0 nr02d0_leg1_5 (.zn(leg_1[5]), .a1(sav_inst_n_buf[5]), .a2(ded_sav_n[5]));
nr02d0 nr02d0_leg1_6 (.zn(leg_1[6]), .a1(sav_inst_n_buf[6]), .a2(ded_sav_n[6]));
nr02d0 nr02d0_leg1_7 (.zn(leg_1[7]), .a1(sav_inst_n_buf[7]), .a2(ded_sav_n[7]));
nr02d0 nr02d0_leg1_8 (.zn(leg_1[8]), .a1(sav_inst_n_buf[8]), .a2(ded_sav_n[8]));
nr02d0 nr02d0_leg1_9 (.zn(leg_1[9]), .a1(sav_inst_n_buf[9]), .a2(ded_sav_n[9]));
nr02d0 nr02d0_leg1_10 (.zn(leg_1[10]), .a1(sav_inst_n_buf[10]), .a2(ded_sav_n[10]));
nr02d0 nr02d0_leg1_11 (.zn(leg_1[11]), .a1(sav_inst_n_buf[11]), .a2(ded_sav_n[11]));
nr02d0 nr02d0_leg1_12 (.zn(leg_1[12]), .a1(sav_inst_n_buf[12]), .a2(ded_sav_n[12]));
nr02d0 nr02d0_leg1_13 (.zn(leg_1[13]), .a1(sav_inst_n_buf[13]), .a2(ded_sav_n[13]));
nr02d0 nr02d0_leg1_14 (.zn(leg_1[14]), .a1(sav_inst_n_buf[14]), .a2(ded_sav_n[14]));
nr02d0 nr02d0_leg1_15 (.zn(leg_1[15]), .a1(sav_inst_n_buf[15]), .a2(ded_sav_n[15]));
nr02d0 nr02d0_leg1_16 (.zn(leg_1[16]), .a1(sav_inst_n_buf[16]), .a2(ded_sav_n[16]));
nr02d0 nr02d0_leg1_17 (.zn(leg_1[17]), .a1(sav_inst_n_buf[17]), .a2(ded_sav_n[17]));
nr02d0 nr02d0_leg1_18 (.zn(leg_1[18]), .a1(sav_inst_n_buf[18]), .a2(ded_sav_n[18]));
nr02d0 nr02d0_leg1_19 (.zn(leg_1[19]), .a1(sav_inst_n_buf[19]), .a2(ded_sav_n[19]));
nr02d0 nr02d0_leg1_20 (.zn(leg_1[20]), .a1(sav_inst_n_buf[20]), .a2(ded_sav_n[20]));
nr02d0 nr02d0_leg1_21 (.zn(leg_1[21]), .a1(sav_inst_n_buf[21]), .a2(ded_sav_n[21]));
nr02d0 nr02d0_leg1_22 (.zn(leg_1[22]), .a1(sav_inst_n_buf[22]), .a2(ded_sav_n[22]));
nr02d0 nr02d0_leg1_23 (.zn(leg_1[23]), .a1(sav_inst_n_buf[23]), .a2(ded_sav_n[23]));
nr02d0 nr02d0_leg1_24 (.zn(leg_1[24]), .a1(sav_inst_n_buf[24]), .a2(ded_sav_n[24]));
nr02d0 nr02d0_leg1_25 (.zn(leg_1[25]), .a1(sav_inst_n_buf[25]), .a2(ded_sav_n[25]));
nr02d0 nr02d0_leg1_26 (.zn(leg_1[26]), .a1(sav_inst_n_buf[26]), .a2(ded_sav_n[26]));
nr02d0 nr02d0_leg1_27 (.zn(leg_1[27]), .a1(sav_inst_n_buf[27]), .a2(ded_sav_n[27]));
nr02d0 nr02d0_leg1_28 (.zn(leg_1[28]), .a1(sav_inst_n_buf[28]), .a2(ded_sav_n[28]));
nr02d0 nr02d0_leg1_29 (.zn(leg_1[29]), .a1(sav_inst_n_buf[29]), .a2(ded_sav_n[29]));
nr02d0 nr02d0_leg1_30 (.zn(leg_1[30]), .a1(sav_inst_n_buf[30]), .a2(ded_sav_n[30]));
nr02d0 nr02d0_leg1_31 (.zn(leg_1[31]), .a1(sav_inst_n_buf[31]), .a2(ded_sav_n[31]));
nr03d0 nr03d0_leg2_0 (.zn(leg_2[0]), .a1(st_rd_inst_high_n_buf[0]), .a2(ded_prev_stalled_n[0]), .a3(ded_sav[0]));
nr03d0 nr03d0_leg2_1 (.zn(leg_2[1]), .a1(st_rd_inst_high_n_buf[1]), .a2(ded_prev_stalled_n[1]), .a3(ded_sav[1]));
nr03d0 nr03d0_leg2_2 (.zn(leg_2[2]), .a1(st_rd_inst_high_n_buf[2]), .a2(ded_prev_stalled_n[2]), .a3(ded_sav[2]));
nr03d0 nr03d0_leg2_3 (.zn(leg_2[3]), .a1(st_rd_inst_high_n_buf[3]), .a2(ded_prev_stalled_n[3]), .a3(ded_sav[3]));
nr03d0 nr03d0_leg2_4 (.zn(leg_2[4]), .a1(st_rd_inst_high_n_buf[4]), .a2(ded_prev_stalled_n[4]), .a3(ded_sav[4]));
nr03d0 nr03d0_leg2_5 (.zn(leg_2[5]), .a1(st_rd_inst_high_n_buf[5]), .a2(ded_prev_stalled_n[5]), .a3(ded_sav[5]));
nr03d0 nr03d0_leg2_6 (.zn(leg_2[6]), .a1(st_rd_inst_high_n_buf[6]), .a2(ded_prev_stalled_n[6]), .a3(ded_sav[6]));
nr03d0 nr03d0_leg2_7 (.zn(leg_2[7]), .a1(st_rd_inst_high_n_buf[7]), .a2(ded_prev_stalled_n[7]), .a3(ded_sav[7]));
nr03d0 nr03d0_leg2_8 (.zn(leg_2[8]), .a1(st_rd_inst_high_n_buf[8]), .a2(ded_prev_stalled_n[8]), .a3(ded_sav[8]));
nr03d0 nr03d0_leg2_9 (.zn(leg_2[9]), .a1(st_rd_inst_high_n_buf[9]), .a2(ded_prev_stalled_n[9]), .a3(ded_sav[9]));
nr03d0 nr03d0_leg2_10 (.zn(leg_2[10]), .a1(st_rd_inst_high_n_buf[10]), .a2(ded_prev_stalled_n[10]), .a3(ded_sav[10]));
nr03d0 nr03d0_leg2_11 (.zn(leg_2[11]), .a1(st_rd_inst_high_n_buf[11]), .a2(ded_prev_stalled_n[11]), .a3(ded_sav[11]));
nr03d0 nr03d0_leg2_12 (.zn(leg_2[12]), .a1(st_rd_inst_high_n_buf[12]), .a2(ded_prev_stalled_n[12]), .a3(ded_sav[12]));
nr03d0 nr03d0_leg2_13 (.zn(leg_2[13]), .a1(st_rd_inst_high_n_buf[13]), .a2(ded_prev_stalled_n[13]), .a3(ded_sav[13]));
nr03d0 nr03d0_leg2_14 (.zn(leg_2[14]), .a1(st_rd_inst_high_n_buf[14]), .a2(ded_prev_stalled_n[14]), .a3(ded_sav[14]));
nr03d0 nr03d0_leg2_15 (.zn(leg_2[15]), .a1(st_rd_inst_high_n_buf[15]), .a2(ded_prev_stalled_n[15]), .a3(ded_sav[15]));
nr03d0 nr03d0_leg2_16 (.zn(leg_2[16]), .a1(st_rd_inst_high_n_buf[16]), .a2(ded_prev_stalled_n[16]), .a3(ded_sav[16]));
nr03d0 nr03d0_leg2_17 (.zn(leg_2[17]), .a1(st_rd_inst_high_n_buf[17]), .a2(ded_prev_stalled_n[17]), .a3(ded_sav[17]));
nr03d0 nr03d0_leg2_18 (.zn(leg_2[18]), .a1(st_rd_inst_high_n_buf[18]), .a2(ded_prev_stalled_n[18]), .a3(ded_sav[18]));
nr03d0 nr03d0_leg2_19 (.zn(leg_2[19]), .a1(st_rd_inst_high_n_buf[19]), .a2(ded_prev_stalled_n[19]), .a3(ded_sav[19]));
nr03d0 nr03d0_leg2_20 (.zn(leg_2[20]), .a1(st_rd_inst_high_n_buf[20]), .a2(ded_prev_stalled_n[20]), .a3(ded_sav[20]));
nr03d0 nr03d0_leg2_21 (.zn(leg_2[21]), .a1(st_rd_inst_high_n_buf[21]), .a2(ded_prev_stalled_n[21]), .a3(ded_sav[21]));
nr03d0 nr03d0_leg2_22 (.zn(leg_2[22]), .a1(st_rd_inst_high_n_buf[22]), .a2(ded_prev_stalled_n[22]), .a3(ded_sav[22]));
nr03d0 nr03d0_leg2_23 (.zn(leg_2[23]), .a1(st_rd_inst_high_n_buf[23]), .a2(ded_prev_stalled_n[23]), .a3(ded_sav[23]));
nr03d0 nr03d0_leg2_24 (.zn(leg_2[24]), .a1(st_rd_inst_high_n_buf[24]), .a2(ded_prev_stalled_n[24]), .a3(ded_sav[24]));
nr03d0 nr03d0_leg2_25 (.zn(leg_2[25]), .a1(st_rd_inst_high_n_buf[25]), .a2(ded_prev_stalled_n[25]), .a3(ded_sav[25]));
nr03d0 nr03d0_leg2_26 (.zn(leg_2[26]), .a1(st_rd_inst_high_n_buf[26]), .a2(ded_prev_stalled_n[26]), .a3(ded_sav[26]));
nr03d0 nr03d0_leg2_27 (.zn(leg_2[27]), .a1(st_rd_inst_high_n_buf[27]), .a2(ded_prev_stalled_n[27]), .a3(ded_sav[27]));
nr03d0 nr03d0_leg2_28 (.zn(leg_2[28]), .a1(st_rd_inst_high_n_buf[28]), .a2(ded_prev_stalled_n[28]), .a3(ded_sav[28]));
nr03d0 nr03d0_leg2_29 (.zn(leg_2[29]), .a1(st_rd_inst_high_n_buf[29]), .a2(ded_prev_stalled_n[29]), .a3(ded_sav[29]));
nr03d0 nr03d0_leg2_30 (.zn(leg_2[30]), .a1(st_rd_inst_high_n_buf[30]), .a2(ded_prev_stalled_n[30]), .a3(ded_sav[30]));
nr03d0 nr03d0_leg2_31 (.zn(leg_2[31]), .a1(st_rd_inst_high_n_buf[31]), .a2(ded_prev_stalled_n[31]), .a3(ded_sav[31]));
nr03d0 nr03d0_leg3_0 (.zn(leg_3[0]), .a1(rd_inst_high_n_buf[0]), .a2(ded_prev_stalled[0]), .a3(ded_sav[0]));
nr03d0 nr03d0_leg3_1 (.zn(leg_3[1]), .a1(rd_inst_high_n_buf[1]), .a2(ded_prev_stalled[1]), .a3(ded_sav[1]));
nr03d0 nr03d0_leg3_2 (.zn(leg_3[2]), .a1(rd_inst_high_n_buf[2]), .a2(ded_prev_stalled[2]), .a3(ded_sav[2]));
nr03d0 nr03d0_leg3_3 (.zn(leg_3[3]), .a1(rd_inst_high_n_buf[3]), .a2(ded_prev_stalled[3]), .a3(ded_sav[3]));
nr03d0 nr03d0_leg3_4 (.zn(leg_3[4]), .a1(rd_inst_high_n_buf[4]), .a2(ded_prev_stalled[4]), .a3(ded_sav[4]));
nr03d0 nr03d0_leg3_5 (.zn(leg_3[5]), .a1(rd_inst_high_n_buf[5]), .a2(ded_prev_stalled[5]), .a3(ded_sav[5]));
nr03d0 nr03d0_leg3_6 (.zn(leg_3[6]), .a1(rd_inst_high_n_buf[6]), .a2(ded_prev_stalled[6]), .a3(ded_sav[6]));
nr03d0 nr03d0_leg3_7 (.zn(leg_3[7]), .a1(rd_inst_high_n_buf[7]), .a2(ded_prev_stalled[7]), .a3(ded_sav[7]));
nr03d0 nr03d0_leg3_8 (.zn(leg_3[8]), .a1(rd_inst_high_n_buf[8]), .a2(ded_prev_stalled[8]), .a3(ded_sav[8]));
nr03d0 nr03d0_leg3_9 (.zn(leg_3[9]), .a1(rd_inst_high_n_buf[9]), .a2(ded_prev_stalled[9]), .a3(ded_sav[9]));
nr03d0 nr03d0_leg3_10 (.zn(leg_3[10]), .a1(rd_inst_high_n_buf[10]), .a2(ded_prev_stalled[10]), .a3(ded_sav[10]));
nr03d0 nr03d0_leg3_11 (.zn(leg_3[11]), .a1(rd_inst_high_n_buf[11]), .a2(ded_prev_stalled[11]), .a3(ded_sav[11]));
nr03d0 nr03d0_leg3_12 (.zn(leg_3[12]), .a1(rd_inst_high_n_buf[12]), .a2(ded_prev_stalled[12]), .a3(ded_sav[12]));
nr03d0 nr03d0_leg3_13 (.zn(leg_3[13]), .a1(rd_inst_high_n_buf[13]), .a2(ded_prev_stalled[13]), .a3(ded_sav[13]));
nr03d0 nr03d0_leg3_14 (.zn(leg_3[14]), .a1(rd_inst_high_n_buf[14]), .a2(ded_prev_stalled[14]), .a3(ded_sav[14]));
nr03d0 nr03d0_leg3_15 (.zn(leg_3[15]), .a1(rd_inst_high_n_buf[15]), .a2(ded_prev_stalled[15]), .a3(ded_sav[15]));
nr03d0 nr03d0_leg3_16 (.zn(leg_3[16]), .a1(rd_inst_high_n_buf[16]), .a2(ded_prev_stalled[16]), .a3(ded_sav[16]));
nr03d0 nr03d0_leg3_17 (.zn(leg_3[17]), .a1(rd_inst_high_n_buf[17]), .a2(ded_prev_stalled[17]), .a3(ded_sav[17]));
nr03d0 nr03d0_leg3_18 (.zn(leg_3[18]), .a1(rd_inst_high_n_buf[18]), .a2(ded_prev_stalled[18]), .a3(ded_sav[18]));
nr03d0 nr03d0_leg3_19 (.zn(leg_3[19]), .a1(rd_inst_high_n_buf[19]), .a2(ded_prev_stalled[19]), .a3(ded_sav[19]));
nr03d0 nr03d0_leg3_20 (.zn(leg_3[20]), .a1(rd_inst_high_n_buf[20]), .a2(ded_prev_stalled[20]), .a3(ded_sav[20]));
nr03d0 nr03d0_leg3_21 (.zn(leg_3[21]), .a1(rd_inst_high_n_buf[21]), .a2(ded_prev_stalled[21]), .a3(ded_sav[21]));
nr03d0 nr03d0_leg3_22 (.zn(leg_3[22]), .a1(rd_inst_high_n_buf[22]), .a2(ded_prev_stalled[22]), .a3(ded_sav[22]));
nr03d0 nr03d0_leg3_23 (.zn(leg_3[23]), .a1(rd_inst_high_n_buf[23]), .a2(ded_prev_stalled[23]), .a3(ded_sav[23]));
nr03d0 nr03d0_leg3_24 (.zn(leg_3[24]), .a1(rd_inst_high_n_buf[24]), .a2(ded_prev_stalled[24]), .a3(ded_sav[24]));
nr03d0 nr03d0_leg3_25 (.zn(leg_3[25]), .a1(rd_inst_high_n_buf[25]), .a2(ded_prev_stalled[25]), .a3(ded_sav[25]));
nr03d0 nr03d0_leg3_26 (.zn(leg_3[26]), .a1(rd_inst_high_n_buf[26]), .a2(ded_prev_stalled[26]), .a3(ded_sav[26]));
nr03d0 nr03d0_leg3_27 (.zn(leg_3[27]), .a1(rd_inst_high_n_buf[27]), .a2(ded_prev_stalled[27]), .a3(ded_sav[27]));
nr03d0 nr03d0_leg3_28 (.zn(leg_3[28]), .a1(rd_inst_high_n_buf[28]), .a2(ded_prev_stalled[28]), .a3(ded_sav[28]));
nr03d0 nr03d0_leg3_29 (.zn(leg_3[29]), .a1(rd_inst_high_n_buf[29]), .a2(ded_prev_stalled[29]), .a3(ded_sav[29]));
nr03d0 nr03d0_leg3_30 (.zn(leg_3[30]), .a1(rd_inst_high_n_buf[30]), .a2(ded_prev_stalled[30]), .a3(ded_sav[30]));
nr03d0 nr03d0_leg3_31 (.zn(leg_3[31]), .a1(rd_inst_high_n_buf[31]), .a2(ded_prev_stalled[31]), .a3(ded_sav[31]));
nr03d0 nr03d0_final_0 (.zn(inst_a_unbuf[0]), .a1(leg_1[0]), .a2(leg_2[0]), .a3(leg_3[0]));
nr03d0 nr03d0_final_1 (.zn(inst_a_unbuf[1]), .a1(leg_1[1]), .a2(leg_2[1]), .a3(leg_3[1]));
nr03d0 nr03d0_final_2 (.zn(inst_a_unbuf[2]), .a1(leg_1[2]), .a2(leg_2[2]), .a3(leg_3[2]));
nr03d0 nr03d0_final_3 (.zn(inst_a_unbuf[3]), .a1(leg_1[3]), .a2(leg_2[3]), .a3(leg_3[3]));
nr03d0 nr03d0_final_4 (.zn(inst_a_unbuf[4]), .a1(leg_1[4]), .a2(leg_2[4]), .a3(leg_3[4]));
nr03d0 nr03d0_final_5 (.zn(inst_a_unbuf[5]), .a1(leg_1[5]), .a2(leg_2[5]), .a3(leg_3[5]));
nr03d0 nr03d0_final_6 (.zn(inst_a_unbuf[6]), .a1(leg_1[6]), .a2(leg_2[6]), .a3(leg_3[6]));
nr03d0 nr03d0_final_7 (.zn(inst_a_unbuf[7]), .a1(leg_1[7]), .a2(leg_2[7]), .a3(leg_3[7]));
nr03d0 nr03d0_final_8 (.zn(inst_a_unbuf[8]), .a1(leg_1[8]), .a2(leg_2[8]), .a3(leg_3[8]));
nr03d0 nr03d0_final_9 (.zn(inst_a_unbuf[9]), .a1(leg_1[9]), .a2(leg_2[9]), .a3(leg_3[9]));
nr03d0 nr03d0_final_10 (.zn(inst_a_unbuf[10]), .a1(leg_1[10]), .a2(leg_2[10]), .a3(leg_3[10]));
nr03d0 nr03d0_final_11 (.zn(inst_a_unbuf[11]), .a1(leg_1[11]), .a2(leg_2[11]), .a3(leg_3[11]));
nr03d0 nr03d0_final_12 (.zn(inst_a_unbuf[12]), .a1(leg_1[12]), .a2(leg_2[12]), .a3(leg_3[12]));
nr03d0 nr03d0_final_13 (.zn(inst_a_unbuf[13]), .a1(leg_1[13]), .a2(leg_2[13]), .a3(leg_3[13]));
nr03d0 nr03d0_final_14 (.zn(inst_a_unbuf[14]), .a1(leg_1[14]), .a2(leg_2[14]), .a3(leg_3[14]));
nr03d0 nr03d0_final_15 (.zn(inst_a_unbuf[15]), .a1(leg_1[15]), .a2(leg_2[15]), .a3(leg_3[15]));
nr03d0 nr03d0_final_16 (.zn(inst_a_unbuf[16]), .a1(leg_1[16]), .a2(leg_2[16]), .a3(leg_3[16]));
nr03d0 nr03d0_final_17 (.zn(inst_a_unbuf[17]), .a1(leg_1[17]), .a2(leg_2[17]), .a3(leg_3[17]));
nr03d0 nr03d0_final_18 (.zn(inst_a_unbuf[18]), .a1(leg_1[18]), .a2(leg_2[18]), .a3(leg_3[18]));
nr03d0 nr03d0_final_19 (.zn(inst_a_unbuf[19]), .a1(leg_1[19]), .a2(leg_2[19]), .a3(leg_3[19]));
nr03d0 nr03d0_final_20 (.zn(inst_a_unbuf[20]), .a1(leg_1[20]), .a2(leg_2[20]), .a3(leg_3[20]));
nr03d0 nr03d0_final_21 (.zn(inst_a_unbuf[21]), .a1(leg_1[21]), .a2(leg_2[21]), .a3(leg_3[21]));
nr03d0 nr03d0_final_22 (.zn(inst_a_unbuf[22]), .a1(leg_1[22]), .a2(leg_2[22]), .a3(leg_3[22]));
nr03d0 nr03d0_final_23 (.zn(inst_a_unbuf[23]), .a1(leg_1[23]), .a2(leg_2[23]), .a3(leg_3[23]));
nr03d0 nr03d0_final_24 (.zn(inst_a_unbuf[24]), .a1(leg_1[24]), .a2(leg_2[24]), .a3(leg_3[24]));
nr03d0 nr03d0_final_25 (.zn(inst_a_unbuf[25]), .a1(leg_1[25]), .a2(leg_2[25]), .a3(leg_3[25]));
nr03d0 nr03d0_final_26 (.zn(inst_a_unbuf[26]), .a1(leg_1[26]), .a2(leg_2[26]), .a3(leg_3[26]));
nr03d0 nr03d0_final_27 (.zn(inst_a_unbuf[27]), .a1(leg_1[27]), .a2(leg_2[27]), .a3(leg_3[27]));
nr03d0 nr03d0_final_28 (.zn(inst_a_unbuf[28]), .a1(leg_1[28]), .a2(leg_2[28]), .a3(leg_3[28]));
nr03d0 nr03d0_final_29 (.zn(inst_a_unbuf[29]), .a1(leg_1[29]), .a2(leg_2[29]), .a3(leg_3[29]));
nr03d0 nr03d0_final_30 (.zn(inst_a_unbuf[30]), .a1(leg_1[30]), .a2(leg_2[30]), .a3(leg_3[30]));
nr03d0 nr03d0_final_31 (.zn(inst_a_unbuf[31]), .a1(leg_1[31]), .a2(leg_2[31]), .a3(leg_3[31]));
big_inv_bufs_32 inst_a_bufs (inst_a, inst_a_unbuf);
endmodule
module big_inv_bufs_32 (out_n_buf, in);
`timescale 1ns / 10ps
output [31:0] out_n_buf; input [31:0] in;
in01d5 in01d5_32_0_0 (.zn(out_n_buf[0]), .i(in[0]));
in01d5 in01d5_32_0_1 (.zn(out_n_buf[1]), .i(in[1]));
in01d5 in01d5_32_0_2 (.zn(out_n_buf[2]), .i(in[2]));
in01d5 in01d5_32_0_3 (.zn(out_n_buf[3]), .i(in[3]));
in01d5 in01d5_32_0_4 (.zn(out_n_buf[4]), .i(in[4]));
in01d5 in01d5_32_0_5 (.zn(out_n_buf[5]), .i(in[5]));
in01d5 in01d5_32_0_6 (.zn(out_n_buf[6]), .i(in[6]));
in01d5 in01d5_32_0_7 (.zn(out_n_buf[7]), .i(in[7]));
in01d5 in01d5_32_0_8 (.zn(out_n_buf[8]), .i(in[8]));
in01d5 in01d5_32_0_9 (.zn(out_n_buf[9]), .i(in[9]));
in01d5 in01d5_32_0_10 (.zn(out_n_buf[10]), .i(in[10]));
in01d5 in01d5_32_0_11 (.zn(out_n_buf[11]), .i(in[11]));
in01d5 in01d5_32_0_12 (.zn(out_n_buf[12]), .i(in[12]));
in01d5 in01d5_32_0_13 (.zn(out_n_buf[13]), .i(in[13]));
in01d5 in01d5_32_0_14 (.zn(out_n_buf[14]), .i(in[14]));
in01d5 in01d5_32_0_15 (.zn(out_n_buf[15]), .i(in[15]));
in01d5 in01d5_32_0_16 (.zn(out_n_buf[16]), .i(in[16]));
in01d5 in01d5_32_0_17 (.zn(out_n_buf[17]), .i(in[17]));
in01d5 in01d5_32_0_18 (.zn(out_n_buf[18]), .i(in[18]));
in01d5 in01d5_32_0_19 (.zn(out_n_buf[19]), .i(in[19]));
in01d5 in01d5_32_0_20 (.zn(out_n_buf[20]), .i(in[20]));
in01d5 in01d5_32_0_21 (.zn(out_n_buf[21]), .i(in[21]));
in01d5 in01d5_32_0_22 (.zn(out_n_buf[22]), .i(in[22]));
in01d5 in01d5_32_0_23 (.zn(out_n_buf[23]), .i(in[23]));
in01d5 in01d5_32_0_24 (.zn(out_n_buf[24]), .i(in[24]));
in01d5 in01d5_32_0_25 (.zn(out_n_buf[25]), .i(in[25]));
in01d5 in01d5_32_0_26 (.zn(out_n_buf[26]), .i(in[26]));
in01d5 in01d5_32_0_27 (.zn(out_n_buf[27]), .i(in[27]));
in01d5 in01d5_32_0_28 (.zn(out_n_buf[28]), .i(in[28]));
in01d5 in01d5_32_0_29 (.zn(out_n_buf[29]), .i(in[29]));
in01d5 in01d5_32_0_30 (.zn(out_n_buf[30]), .i(in[30]));
in01d5 in01d5_32_0_31 (.zn(out_n_buf[31]), .i(in[31]));
endmodule