at_ew.v
16.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
/**************************************************************************
* *
* Copyright (C) 1994, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
*************************************************************************/
// $Id: at_ew.v,v 1.1 2002/03/28 00:26:12 berndt Exp $
////////////////////////////////////////////////////////////////////////
//
// Project Reality
//
// module: at_ew
// description: Attribute buffers for edge walker. Primitives data updates
// the cycle before it is needed (via mux), but is only used
// the following cycle. Hardware synchronized attribute data
// is updated the cycle of the attribute (via mux), but is
// only used the following cycle, which lines up with the
// first cycle of a following primitive. QTV will barf on
// this, since it will see the update cycle as not making
// timing, while only the following cycle really matters.
// Unsynchronized attributes update immediately, producing
// trash for one cycle, followed by good data the next cycle.
// The csim should generate garbage (0xDEADBEEF, for example)
// during this trashed update cycle. Some logically synced
// data require no special buffering because the timing
// just works out (scissor, the EW dx's and dy's). Unsynced
// attributes must be maintained by software, using the
// sync_tile and sync_pipe commands after the last primitive
// before any unsynced attribute update commands if necessary.
//
// designer: Phil Gossett
// date: 6/9/95
//
////////////////////////////////////////////////////////////////////////
module at_ew (gclk, reset_l, ncyc, atomic, ew_ep_startspan,
cs_st_prim, cs_st_attr, cs_cmd, cs_ew_d, ew_cs_busy,
ew_dxr, ew_dxg, ew_dxb, ew_dxa, ew_dxz,
ew_dxs, ew_dxt, ew_dxw,
ew_dyr, ew_dyg, ew_dyb, ew_dya, ew_dyz,
ew_dys, ew_dyt, ew_dyw,
scissor, strobe_sync_full,
rel_sync_tile, rel_sync_pipe, rel_sync_load,
ew_image_load, ew_scissor_load, ew_stall_load, ew_offset_load,
tc_load, st_ncyc, ew_major_sign, ew_offset_sign,
ew_major_left, ew_minor_left, ew_offset_left, cv_left,
st_r_left, st_g_left, st_b_left, st_a_left, st_z_left,
st_s_left, st_t_left, st_w_left,
ew_scissor_tlut, ew_stall_tlut, at_cs_busy);
input gclk;
input reset_l;
input ncyc;
input atomic;
input ew_ep_startspan;
input cs_st_prim;
input cs_st_attr;
input [5:0] cs_cmd;
input [63:0] cs_ew_d;
input ew_cs_busy;
output [22:0] ew_dxr; // s15.7 (single buffer)
output [22:0] ew_dxg; // s15.7 (single buffer)
output [22:0] ew_dxb; // s15.7 (double buffer off st span)
output [22:0] ew_dxa; // s15.7 (double buffer off st span)
output [22:0] ew_dxz; // s15.7 (single buffer)
output [22:0] ew_dxs; // s15.7 (double buffer off st span)
output [22:0] ew_dxt; // s15.7 (double buffer off st span)
output [22:0] ew_dxw; // s15.7 (single buffer)
output [22:0] ew_dyr; // s15.7 (single buffer)
output [22:0] ew_dyg; // s15.7 (single buffer)
output [22:0] ew_dyb; // s15.7 (single buffer)
output [22:0] ew_dya; // s15.7 (single buffer)
output [22:0] ew_dyz; // s15.7 (single buffer)
output [22:0] ew_dys; // s15.7 (single buffer)
output [22:0] ew_dyt; // s15.7 (single buffer)
output [22:0] ew_dyw; // s15.7 (single buffer)
output [55:0] scissor; // 2d 55:32, 25:0 (single buffer)
output strobe_sync_full; // 29 (counter)
output rel_sync_tile; // 28 (counter)
output rel_sync_pipe; // 27 (counter)
output rel_sync_load; // 31 (counter)
output ew_image_load; // tile(34),block(33),tlut(30) (pipe)
output ew_scissor_load; // tile(34),block(33),tlut(30) (pipe)
output ew_stall_load; // tile(34),block(33),tlut(30) (pipe)
output ew_offset_load; // tile(34),block(33),tlut(30) (pipe)
output tc_load; // tile(34),block(33),tlut(30) (pipe)
output st_ncyc; // ncyc for s,t steppers (load overrides)
output ew_major_sign; // primitives (36,24,25,0f-08) (pipe)
output ew_offset_sign; // primitives (36,24,25,0f-08) (pipe)
output ew_major_left; // primitives (36,24,25,0f-08) (pipe)
output ew_minor_left; // primitives (36,24,25,0f-08) (pipe)
output ew_offset_left; // primitives (36,24,25,0f-08) (pipe)
output cv_left; // primitives (36,24,25,0f-08) (pipe)
output st_r_left; // primitives (36,24,25,0f-08) (pipe)
output st_g_left; // primitives (36,24,25,0f-08) (pipe)
output st_b_left; // primitives (36,24,25,0f-08) (pipe)
output st_a_left; // primitives (36,24,25,0f-08) (pipe)
output st_z_left; // primitives (36,24,25,0f-08) (pipe)
output st_s_left; // primitives (36,24,25,0f-08) (pipe)
output st_t_left; // primitives (36,24,25,0f-08) (pipe)
output st_w_left; // primitives (36,24,25,0f-08) (pipe)
output ew_scissor_tlut; // tlut(30) (pipe)
output ew_stall_tlut; // tlut(30) (pipe)
output at_cs_busy; // extended version of ew_cs_busy (counter)
wire [63:0] d_lat; // delayed latched input
wire [7:0] code_0d; // control pipeline input
reg [2:0] code_1d; // pipeline for control
reg [2:0] code_2d;
reg [2:0] code_3d;
reg [2:0] code_4d;
reg [2:0] code_5d;
reg [2:0] code_6d;
reg [2:0] code_7d;
reg [2:0] code_8d;
reg [2:0] code_9d;
reg [2:0] code_10d;
reg [2:0] code_11d;
reg [2:0] code_12d;
reg [2:0] code_13d;
reg [2:0] code_14d;
reg [2:0] code_15d;
reg [2:0] code_16d;
reg [2:0] code_17d;
reg [2:0] code_18d;
reg [2:0] code_19d;
reg [2:0] code_20d;
reg [2:0] code_21d;
wire [1:0] dxr_g; // latch enables
wire [1:0] dxg_g;
wire [1:0] dxb_g;
wire [1:0] dxa_g;
wire [1:0] dxz_g;
wire [1:0] dxs_g;
wire [1:0] dxt_g;
wire [1:0] dxw_g;
wire [1:0] dyr_g;
wire [1:0] dyg_g;
wire [1:0] dyb_g;
wire [1:0] dya_g;
wire [1:0] dyz_g;
wire [1:0] dys_g;
wire [1:0] dyt_g;
wire [1:0] dyw_g;
wire sc_g;
wire load_g;
wire sign_g;
wire left_g;
wire tlut_g;
wire sync_full;
wire sync_tile;
wire sync_pipe;
wire sync_load;
wire d_ld; // synthesized for primitives
wire d_tl;
wire [31:0] ew_dxr_a; // latch outputs
wire [31:0] ew_dxg_a;
wire [31:0] ew_dxb_a;
wire [22:0] ew_dxb_b;
wire [31:0] ew_dxa_a;
wire [22:0] ew_dxa_b;
wire [31:0] ew_dxz_a;
wire [31:0] ew_dxs_a;
wire [22:0] ew_dxs_b;
wire [31:0] ew_dxt_a;
wire [22:0] ew_dxt_b;
wire [31:0] ew_dxw_a;
wire [31:0] ew_dyr_a;
wire [31:0] ew_dyg_a;
wire [31:0] ew_dyb_a;
wire [31:0] ew_dya_a;
wire [31:0] ew_dyz_a;
wire [31:0] ew_dys_a;
wire [31:0] ew_dyt_a;
wire [31:0] ew_dyw_a;
wire [55:0] scissor_a;
wire load_1d;
wire sign_3d;
wire left_1d;
wire tlut_1d;
reg load_2d; // pipeline
reg load_3d;
reg load_4d;
reg load_5d;
reg load_6d;
reg load_7d;
reg load_8d;
reg load_9d;
reg load_10d;
reg load_11d;
reg load_12d;
reg load_13d;
reg load_14d;
reg load_15d;
reg load_16d;
reg load_17d;
reg load_18d;
reg load_19d;
reg load_20d;
reg load_21d;
reg sign_4d; // pipeline
reg sign_5d;
reg sign_6d;
reg sign_7d;
reg sign_8d;
reg sign_9d;
reg sign_10d;
reg sign_11d;
reg sign_12d;
reg sign_13d;
reg sign_14d;
reg sign_15d;
reg sign_16d;
reg sign_17d;
reg left_2d; // pipeline
reg left_3d;
reg left_4d;
reg left_5d;
reg left_6d;
reg left_7d;
reg left_8d;
reg left_9d;
reg left_10d;
reg left_11d;
reg left_12d;
reg left_13d;
reg left_14d;
reg left_15d;
reg left_16d;
reg left_17d;
reg left_18d;
reg left_19d;
reg left_20d;
reg left_21d;
reg left_22d;
reg left_23d;
reg left_24d;
reg left_25d;
reg left_26d;
reg left_27d;
reg left_28d;
reg left_29d;
reg left_30d;
reg left_31d;
reg left_32d;
reg left_33d;
reg left_34d;
reg left_35d;
reg left_36d;
reg left_37d;
reg left_38d;
reg left_39d;
reg left_40d;
reg left_41d;
reg tlut_2d; // pipeline
reg tlut_3d;
reg tlut_4d;
reg tlut_5d;
reg tlut_6d;
reg tlut_7d;
reg tlut_8d;
reg tlut_9d;
reg tlut_10d;
reg tlut_11d;
reg tlut_12d;
reg tlut_13d;
reg st_ncyc; // outputs
wire reset;
// invert reset (this week)
assign reset = ~reset_l;
// control pipeline input
assign code_0d = {cs_st_prim,cs_st_attr,cs_cmd};
// pipeline for control
always @(posedge gclk)
begin
code_1d <= {code_0d[7], (code_0d[6:0] == 7'h7a), // prim color
(code_0d[6:0] == 7'h6e)}; // prim depth
code_2d <= code_1d;
code_3d <= code_2d;
code_4d <= code_3d;
code_5d <= code_4d;
code_6d <= code_5d;
code_7d <= code_6d;
code_8d <= code_7d;
code_9d <= code_8d;
code_10d <= code_9d;
code_11d <= code_10d;
code_12d <= code_11d;
code_13d <= code_12d;
code_14d <= code_13d;
code_15d <= code_14d;
code_16d <= code_15d;
code_17d <= code_16d;
code_18d <= code_17d;
code_19d <= code_18d;
code_20d <= code_19d;
code_21d <= code_20d;
end
// generate latch enables for single buffers
assign dxr_g[1] = code_11d[2];
assign dxr_g[0] = code_10d[2];
assign dxg_g[1] = code_11d[2];
assign dxg_g[0] = code_10d[2];
assign dxb_g[1] = code_9d[2];
assign dxb_g[0] = code_8d[2];
assign dxa_g[1] = code_9d[2];
assign dxa_g[0] = code_8d[2];
assign dxz_g[1] = code_12d[2];
assign dxz_g[0] = code_12d[2];
assign dxs_g[1] = code_5d[2];
assign dxs_g[0] = code_4d[2];
assign dxt_g[1] = code_5d[2];
assign dxt_g[0] = code_4d[2];
assign dxw_g[1] = code_7d[2];
assign dxw_g[0] = code_6d[2];
assign dyr_g[1] = code_20d[2];
assign dyr_g[0] = code_19d[2];
assign dyg_g[1] = code_20d[2];
assign dyg_g[0] = code_19d[2];
assign dyb_g[1] = code_18d[2];
assign dyb_g[0] = code_17d[2];
assign dya_g[1] = code_18d[2];
assign dya_g[0] = code_17d[2];
assign dyz_g[1] = code_21d[2];
assign dyz_g[0] = code_21d[2];
assign dys_g[1] = code_14d[2];
assign dys_g[0] = code_13d[2];
assign dyt_g[1] = code_14d[2];
assign dyt_g[0] = code_13d[2];
assign dyw_g[1] = code_16d[2];
assign dyw_g[0] = code_15d[2];
assign sc_g = (code_0d[6:0] == 7'h6d);
assign sync_full= (code_0d[6:0] == 7'h69);
assign sync_tile= (code_0d[6:0] == 7'h68);
assign sync_pipe= (code_0d[6:0] == 7'h67);
assign sync_load= (code_0d[6:0] == 7'h66);
assign load_g = code_0d[7];
assign left_g = code_0d[7];
assign sign_g = code_2d[2];
assign tlut_g = code_0d[7];
assign d_ld = (code_0d[5:0] == 6'h34) || // load tile
(code_0d[5:0] == 6'h33) || // load block
(code_0d[5:0] == 6'h30); // load tlut
assign d_tl = code_0d[5:0] == 6'h30; // load tlut
// instanciated latches
at_latch64 dlat (.clkn( gclk), .i(cs_ew_d), .z(d_lat));
at_latch32 ewdxr (.clk(gclk),.g( dxr_g),.i({2{d_lat[31:16]}}),.z(ew_dxr_a));
at_latch32 ewdxg (.clk(gclk),.g( dxg_g),.i({2{d_lat[15: 0]}}),.z(ew_dxg_a));
at_latch32 ewdxb (.clk(gclk),.g( dxb_g),.i({2{d_lat[31:16]}}),.z(ew_dxb_a));
at_latch32 ewdxa (.clk(gclk),.g( dxa_g),.i({2{d_lat[15: 0]}}),.z(ew_dxa_a));
at_latch32 ewdxz (.clk(gclk),.g( dxz_g),.i( d_lat[31: 0] ),.z(ew_dxz_a));
at_latch32 ewdxs (.clk(gclk),.g( dxs_g),.i({2{d_lat[31:16]}}),.z(ew_dxs_a));
at_latch32 ewdxt (.clk(gclk),.g( dxt_g),.i({2{d_lat[15: 0]}}),.z(ew_dxt_a));
at_latch32 ewdxw (.clk(gclk),.g( dxw_g),.i({2{d_lat[31:16]}}),.z(ew_dxw_a));
at_latch32 ewdyr (.clk(gclk),.g( dyr_g),.i({2{d_lat[31:16]}}),.z(ew_dyr_a));
at_latch32 ewdyg (.clk(gclk),.g( dyg_g),.i({2{d_lat[15: 0]}}),.z(ew_dyg_a));
at_latch32 ewdyb (.clk(gclk),.g( dyb_g),.i({2{d_lat[31:16]}}),.z(ew_dyb_a));
at_latch32 ewdya (.clk(gclk),.g( dya_g),.i({2{d_lat[15: 0]}}),.z(ew_dya_a));
at_latch32 ewdyz (.clk(gclk),.g( dyz_g),.i( d_lat[31: 0] ),.z(ew_dyz_a));
at_latch32 ewdys (.clk(gclk),.g( dys_g),.i({2{d_lat[31:16]}}),.z(ew_dys_a));
at_latch32 ewdyt (.clk(gclk),.g( dyt_g),.i({2{d_lat[15: 0]}}),.z(ew_dyt_a));
at_latch32 ewdyw (.clk(gclk),.g( dyw_g),.i({2{d_lat[31:16]}}),.z(ew_dyw_a));
at_latch23 ewdzb (.g(ew_ep_startspan), .i(ew_dxb_a[31:9]), .z(ew_dxb_b));
at_latch23 ewdza (.g(ew_ep_startspan), .i(ew_dxa_a[31:9]), .z(ew_dxa_b));
at_latch23 ewdzs (.g(ew_ep_startspan), .i(ew_dxs_a[31:9]), .z(ew_dxs_b));
at_latch23 ewdzt (.g(ew_ep_startspan), .i(ew_dxt_a[31:9]), .z(ew_dxt_b));
at_latch56 sca (.clk(gclk),.g( sc_g),.i(d_lat[55:0]), .z(scissor_a));
at_latch1 loada (.clk(gclk),.g(load_g),.i(d_ld), .z(load_1d));
at_latch1 sgn3d (.clk(gclk),.g(sign_g),.i(d_lat[31]), .z(sign_3d));
at_latch1 lft1d (.clk(gclk),.g(left_g),.i(d_lat[55]), .z(left_1d));
at_latch1 tluta (.clk(gclk),.g(tlut_g),.i(d_tl), .z(tlut_1d));
// pipeline for load, sign and left
always @(posedge gclk)
begin
load_2d <= load_1d;
load_3d <= load_2d;
load_4d <= load_3d;
load_5d <= load_4d;
load_6d <= load_5d;
load_7d <= load_6d;
load_8d <= load_7d;
load_9d <= load_8d;
load_10d <= load_9d;
load_11d <= load_10d;
load_12d <= load_11d;
load_13d <= load_12d;
load_14d <= load_13d;
load_15d <= load_14d;
load_16d <= load_15d;
load_17d <= load_16d;
load_18d <= load_17d;
load_19d <= load_18d;
load_20d <= load_19d;
load_21d <= load_20d;
sign_4d <= sign_3d;
sign_5d <= sign_4d;
sign_6d <= sign_5d;
sign_7d <= sign_6d;
sign_8d <= sign_7d;
sign_9d <= sign_8d;
sign_10d <= sign_9d;
sign_11d <= sign_10d;
sign_12d <= sign_11d;
sign_13d <= sign_12d;
sign_14d <= sign_13d;
sign_15d <= sign_14d;
sign_16d <= sign_15d;
sign_17d <= sign_16d;
left_2d <= left_1d;
left_3d <= left_2d;
left_4d <= left_3d;
left_5d <= left_4d;
left_6d <= left_5d;
left_7d <= left_6d;
left_8d <= left_7d;
left_9d <= left_8d;
left_10d <= left_9d;
left_11d <= left_10d;
left_12d <= left_11d;
left_13d <= left_12d;
left_14d <= left_13d;
left_15d <= left_14d;
left_16d <= left_15d;
left_17d <= left_16d;
left_18d <= left_17d;
left_19d <= left_18d;
left_20d <= left_19d;
left_21d <= left_20d;
left_22d <= left_21d;
left_23d <= left_22d;
left_24d <= left_23d;
left_25d <= left_24d;
left_26d <= left_25d;
left_27d <= left_26d;
left_28d <= left_27d;
left_29d <= left_28d;
left_30d <= left_29d;
left_31d <= left_30d;
left_32d <= left_31d;
left_33d <= left_32d;
left_34d <= left_33d;
left_35d <= left_34d;
left_36d <= left_35d;
left_37d <= left_36d;
left_38d <= left_37d;
left_39d <= left_38d;
left_40d <= left_39d;
left_41d <= left_40d;
tlut_2d <= tlut_1d;
tlut_3d <= tlut_2d;
tlut_4d <= tlut_3d;
tlut_5d <= tlut_4d;
tlut_6d <= tlut_5d;
tlut_7d <= tlut_6d;
tlut_8d <= tlut_7d;
tlut_9d <= tlut_8d;
tlut_10d <= tlut_9d;
tlut_11d <= tlut_10d;
tlut_12d <= tlut_11d;
tlut_13d <= tlut_12d;
end
// counters for sync release
at_ctrn ctsyfu (.clk(gclk), .rst(reset), .enb(sync_full),
.cnt(6'd47), .z(strobe_sync_full));
at_ctrn ctsypi (.clk(gclk), .rst(reset), .enb(sync_pipe),
.cnt(6'd47), .z(rel_sync_pipe));
at_ctrn ctsyti (.clk(gclk), .rst(reset), .enb(sync_tile),
.cnt(6'd30), .z(rel_sync_tile));
at_ctrn ctsyld (.clk(gclk), .rst(reset), .enb(sync_load),
.cnt(6'd22), .z(rel_sync_load));
at_ctrb ctbusy (.clk(gclk), .rst(reset), .enb(ew_cs_busy & atomic),
.cnt(6'd42), .z(at_cs_busy));
// read latches with bit assignments and padding (unused latches eaten)
assign ew_dxr = ew_dxr_a[31:9]; // s15.7
assign ew_dxg = ew_dxg_a[31:9];
assign ew_dxb = ew_dxb_b;
assign ew_dxa = ew_dxa_b;
assign ew_dxz = ew_dxz_a[31:9];
assign ew_dxs = ew_dxs_b;
assign ew_dxt = ew_dxt_b;
assign ew_dxw = ew_dxw_a[31:9];
assign ew_dyr = ew_dyr_a[31:9];
assign ew_dyg = ew_dyg_a[31:9];
assign ew_dyb = ew_dyb_a[31:9];
assign ew_dya = ew_dya_a[31:9];
assign ew_dyz = ew_dyz_a[31:9];
assign ew_dys = ew_dys_a[31:9];
assign ew_dyt = ew_dyt_a[31:9];
assign ew_dyw = ew_dyw_a[31:9];
assign scissor = {scissor_a[55:32], 6'b0, scissor_a[25:0]};
assign ew_image_load = load_1d; // 1
assign ew_scissor_load = load_4d; // 4
assign ew_stall_load = load_13d; // 13
assign ew_offset_load = load_17d; // 17
assign ew_major_sign = sign_4d; // 4
assign ew_offset_sign = sign_17d; // 17
assign ew_major_left = left_4d; // 4
assign ew_minor_left = left_13d; // 13
assign ew_offset_left = left_17d; // 17
assign cv_left = left_13d; // 13
assign tc_load = load_21d; // 21
assign st_s_left = left_23d; // 23
assign st_t_left = left_23d; // 23
assign st_w_left = left_21d; // 21
assign st_r_left = left_39d; // 39
assign st_g_left = left_39d; // 39
assign st_b_left = left_39d; // 39
assign st_a_left = left_39d; // 39
assign st_z_left = left_41d; // 41
assign ew_scissor_tlut = tlut_4d; // 4
assign ew_stall_tlut = tlut_13d; // 13
// reclock load and left mux outputs for speed
always @(posedge gclk)
begin
st_ncyc <= ncyc & !load_21d; // 22
end
endmodule // at_ew