divctl.v
15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
module divctl(CLK, Reset_l, OpCode, OpCodeValid, VTL, VTH, El2, ROMData, RADDR, ROMCLK,DivOut);
input CLK; // Clock input
input Reset_l; // Reset (Active Low)
input [5:0] OpCode; // OpCode. Low 6 bits of instruction
input OpCodeValid; // OpCode is Valid.
input [15:0] VTH; // Scalar Value from Elements(4:7)
input [15:0] VTL; // Scalar Value from Elements(0:3)
input [15:0] ROMData; // ROM Lookup Data
input El2; // Element bit 2
output [9:0] RADDR; // ROM Lookup Address
output [15:0] DivOut; // Output of the Divide block.
output ROMCLK; // Clock to ROM
/****************************************************
* Signal Definitions
****************************************************/
wire Divide ;
wire SP_R ;
wire DPL_R ;
wire DPH_R ;
wire Sqrt_R ;
wire NOP_R ;
wire Sign_R ;
wire AllZero ;
wire OldCntSel ;
wire [1:0] MSO_G0;
wire [1:0] MSO_G1;
wire [1:0] MSO_G2;
wire [1:0] MSO_G3;
wire [9:0] Merge ;
wire [3:0] ShCnt ;
wire [3:0] ShCntX ;
wire [9:0] RA_B ;
//wire [15:0] Data ;
wire [3:0] NZG ;
wire [1:0] MSOH ;
wire [9:0] Mask ;
wire [9:0] NewMask ;
wire [9:0] OldMask ;
reg [1:0] MSOL;
reg AllZeroD;
reg SP;
reg DPL;
reg DPH;
reg SP_A;
reg DPL_A;
wire Sign;
reg Sqrt;
reg NOP;
reg NOP_A;
reg NOP_W;
reg [15:0] VT_M;
reg [3:0] MSOD;
reg [15:0] BusA;
reg [9:0] BusB;
reg [9:0] RA ;
reg [4:0] ShCnt_A;
reg DPH_A;
reg DPH_W;
reg Sign_A;
reg Sign_W;
reg [4:0] RtShCnt_W;
reg Sqrt_A;
reg [15:0] ROMData_W;
reg [22:0] OBusA;
reg [15:0] OBusB;
reg Sl2C;
wire [14:0] Low15_2C;
wire [1:0] DataSl;
wire [15:0] Data;
wire MinPos;
wire MinNeg;
wire Num8000;
reg MinPos_A;
reg MinPos_W;
reg MinNeg_A;
reg MinNeg_W;
/**********************************************************************
* Reg Fetch stage. OpCode Decode
*
* OpCode[2:0] Operation
* =========== ==========
* 000 RCP
* 001 RCPL
* 010 RCPH
* 100 RSQ
* 101 RSQL
* 110 RSQH
* 111 NOP
* 011 MOVE
**********************************************************************/
reg DPH_SEEN;
wire DPL_R_tmp;
assign Divide = OpCode[5] & OpCode[4] & !OpCode[3] & OpCodeValid & Reset_l;
assign DPL_R_tmp = Divide & !OpCode[1] & OpCode[0];
assign DPL_R = DPL_R_tmp & DPH_SEEN; //RCPL | RSQL
assign DPH_R = Divide & OpCode[1] & !OpCode[0]; //RCPH | RSQH
assign SP_R = Divide & !OpCode[1] & !OpCode[0] | DPL_R_tmp&!DPH_SEEN; //RCP | RSQ
assign Sqrt_R = Divide & OpCode[2]; //RSQ|RSQL|RSQH
assign NOP_R = OpCode[1] & OpCode[0] | !Divide | !Reset_l; //!Divide|NOP|MOVE
wire VTSign = (El2) ? VTH[15] : VTL[15];
assign Sign_R = (DPH_R|SP_R) & VTSign;
always @(posedge CLK) SP <= SP_R ;
always @(posedge CLK) DPL <= DPL_R;
always @(posedge CLK) DPH <= DPH_R;
always @(posedge CLK) DPH_SEEN <= (DPH_R | SP_R | DPL_R | !Reset_l) ? DPH_R : DPH_SEEN;
always @(posedge CLK) Sqrt <= Sqrt_R;
always @(posedge CLK) NOP <= NOP_R;
always @(posedge CLK) VT_M <= (El2) ? VTH : VTL;
//always @(posedge CLK) Sign <= (DPH_R | SP_R | !Reset_l) ? Sign_R : Sign;
wire Sign_CE = (DPH_R | SP_R | !Reset_l);
mfntnh u_sgnff(.q(Sign), .da(Sign_R), .db(Sign), .sa(Sign_CE), .cp(CLK));
always @(posedge CLK) Sl2C <= (DPL_R & (DPH&AllZero | !DPH&AllZeroD) | SP_R) & VTSign;
/*******************************************************************
* Complement of sign
*******************************************************************/
//assign Low15_2C = ~VT_M[14:0] + 1'b1;
wire [15:0] VT_M_B = ~VT_M[15:0];
wire co_0,co_1;
div_inc5 u_inc0(.in(VT_M_B[4:0]), .out(Low15_2C[4:0]), .cin(1'b1), .cout(co_0));
div_inc5 u_inc1(.in(VT_M_B[9:5]), .out(Low15_2C[9:5]), .cin(co_0), .cout(co_1));
div_inc5 u_inc2(.in(VT_M_B[14:10]),.out(Low15_2C[14:10]),.cin(co_1));
//00 NOP
//01 +ve
//10 -ve, 1'scomplement
//11 -ve, 2'scomplement
assign DataSl[1] = Sign & !NOP;
assign DataSl[0] = (!Sign | Sl2C) & !NOP;
/*
* always @(VT_M or DataSl or Low15_2C)
* case (DataSl) //synopsys parallel_case full_case
* 2'b00: Data = 16'h0;
* 2'b01: Data = VT_M;
* 2'b10: Data = ~VT_M;
* 2'b11: Data = {1'b0,Low15_2C};
* endcase
*/
div_mux4x1_16 u_data_mux(.z(Data),
.i0(16'h0),
.i1(VT_M),
.i2(VT_M_B),
.i3({1'b0,Low15_2C}),
.s(DataSl)
);
/******************************************************************
* Pri. Encoding or shift count calculation.
* -Data is 16 bits so shift count is 4 bit value
* -lower 2 bits of this count is independantly determined for four groups
* of four bits each. G3=[15:12], G2=[11:8], G1=[7:4], G0=[3:0]
* -upper two bits are determined on this four groups(G3,G2,G1,G0).
* Pri. Encoding or shift count on four bits group
* Shift[1:0] In[3:0]
* 00 1xxx
* 01 01xx
* 10 001x
* 11 0001
*****************************************************************/
//
assign MSO_G3[1] = !Data[15] & !Data[14] & (Data[13] | Data[12]);
assign MSO_G3[0] = !Data[15] & (Data[14] | !Data[13]&Data[12]);
assign MSO_G2[1] = !Data[11] & !Data[10] & (Data[9] | Data[8]);
assign MSO_G2[0] = !Data[11] & (Data[10] | !Data[9]&Data[8]);
assign MSO_G1[1] = !Data[7] & !Data[6] & (Data[5] | Data[4]);
assign MSO_G1[0] = !Data[7] & (Data[6] | !Data[5]&Data[4]);
assign MSO_G0[1] = !Data[3] & !Data[2] & (Data[1] | Data[0]);
assign MSO_G0[0] = !Data[3] & (Data[2] | !Data[1]&Data[0]);
// NZG[X] is true if any one of Data[A:B] bits is one.
assign NZG[3] = Data[15] | Data[14] | Data[13] | Data[12];
assign NZG[2] = Data[11] | Data[10] | Data[9] | Data[8];
assign NZG[1] = Data[7] | Data[6] | Data[5] | Data[4];
assign NZG[0] = Data[3] | Data[2] | Data[1] | Data[0];
assign MSOH[1] = !NZG[3] & !NZG[2] & (NZG[1] | NZG[0]);
assign MSOH[0] = !NZG[3] & (NZG[2] | !NZG[1]&NZG[0]);
// Get MSOL[1:0] using MSOH[1:0]
always @(MSO_G0 or MSO_G1 or MSO_G2 or MSO_G3 or MSOH)
begin
case (MSOH) //synopsys parallel_case full_case
2'b00 : MSOL = MSO_G3;
2'b01 : MSOL = MSO_G2;
2'b10 : MSOL = MSO_G1;
2'b11 : MSOL = MSO_G0;
endcase
end
assign AllZero = !(NZG[3] | NZG[2] | NZG[1] | NZG[0]);
always @(posedge CLK) AllZeroD <= (DPH | !Reset_l) ? AllZero : AllZeroD;
always @(posedge CLK) MSOD <= (DPH | !Reset_l) ? {MSOH,MSOL} : MSOD;
assign OldCntSel = DPL & !AllZeroD;
assign ShCnt = (OldCntSel) ? MSOD : {MSOH,MSOL};
assign ShCntX = (OldCntSel) ? MSOD : {MSOH,MSOL};
assign Num8000 = (VT_M==16'h8000);
assign MinPos = !Sign & ((SP & AllZero) | (DPL & AllZero & AllZeroD)); // 32'h0000_0000
assign MinNeg = Sign & ((SP & Num8000) | (DPL & Num8000 & AllZeroD)); //32'hffff_8000
/******************************************************************
* Muxing and shifting
* ShCnt[3:2] shifts left by 4*Group_with_nonZero_bit.
* ShCnt[1:0] shifts left by the offset of 1st nonZero within the group.
*****************************************************************/
always @(ShCnt or Data)
begin
case (ShCnt[3:2]) //synopsys parallel_case full_case
2'b00 : BusA[15:12] = Data[15:12];
2'b01 : BusA[15:12] = Data[11:8];
2'b10 : BusA[15:12] = Data[7:4];
2'b11 : BusA[15:12] = Data[3:0];
endcase
case (ShCnt[3:2]) //synopsys parallel_case full_case
2'b00 : BusA[11:8] = Data[11:8];
2'b01 : BusA[11:8] = Data[7:4];
2'b10 : BusA[11:8] = Data[3:0];
2'b11 : BusA[11:8] = Data[15:12];
endcase
case (ShCnt[3:2]) //synopsys parallel_case full_case
2'b00 : BusA[7:4] = Data[7:4];
2'b01 : BusA[7:4] = Data[3:0];
2'b10 : BusA[7:4] = Data[15:12];
2'b11 : BusA[7:4] = Data[11:8];
endcase
case (ShCnt[3:2]) //synopsys parallel_case full_case
2'b00 : BusA[3:0] = Data[3:0];
2'b01 : BusA[3:0] = Data[15:12];
2'b10 : BusA[3:0] = Data[11:8];
2'b11 : BusA[3:0] = Data[7:4];
endcase
end
always @(ShCnt or BusA)
begin
case (ShCnt[1:0]) //synopsys parallel_case full_case
2'b00 : BusB = BusA[15:6];
2'b01 : BusB = BusA[14:5];
2'b10 : BusB = BusA[13:4];
2'b11 : BusB = BusA[12:3];
endcase
end
/***************************************************************************
* Mask Generation
***************************************************************************/
assign Mask[9] = 1;
assign Mask[8] = !ShCntX[3] | !ShCntX[2] | !ShCntX[1] | !ShCntX[0];
assign Mask[7] = !ShCntX[3] | !ShCntX[2] | !ShCntX[1];
assign Mask[6] = !ShCntX[3] | !ShCntX[2] | (!ShCntX[1]&!ShCntX[0]);
assign Mask[5] = !ShCntX[3] | !ShCntX[2];
assign Mask[4] = !ShCntX[3] | (!ShCntX[2]&!ShCntX[1]) | (!ShCntX[2]&!ShCntX[0]);
assign Mask[3] = !ShCntX[3] | (!ShCntX[2]&!ShCntX[1]);
assign Mask[2] = !ShCntX[3] | (!ShCntX[2]&!ShCntX[1]&!ShCntX[0]);
assign Mask[1] = !ShCntX[3];
assign Mask[0] = !ShCntX[3] & (!ShCntX[2]|!ShCntX[1]|!ShCntX[0]);
/************************************************************************
* Output of ACC stage.
************************************************************************/
assign Merge = {10{DPL & !AllZeroD}};
assign NewMask = Mask^Merge;
assign OldMask = Mask&Merge;
assign RA_B = BusB&NewMask | RA&OldMask;
assign RADDR[9] = Sqrt;
assign RADDR[8:1] = RA_B[8:1];
assign RADDR[0] = (Sqrt&(DPL|SP)) ? ShCntX[0] : RA_B[0];
always @(posedge CLK) RA <= (DPH | !Reset_l) ? RA_B : RA;
always @(posedge CLK) MinPos_A <= (DPL|SP|!Reset_l) ? MinPos : MinPos_A;
always @(posedge CLK) MinNeg_A <= (DPL|SP|!Reset_l) ? MinNeg : MinNeg_A ;
always @(posedge CLK) Sign_A <= (DPL|SP|!Reset_l) ? Sign : Sign_A;
always @(posedge CLK) Sqrt_A <= (DPL|SP|!Reset_l) ? Sqrt : Sqrt_A;
always @(posedge CLK) NOP_A <= NOP;
always @(posedge CLK) SP_A <= SP;
always @(posedge CLK) DPL_A <= DPL;
always @(posedge CLK) DPH_A <= DPH;
always @(posedge CLK) MinPos_W <= MinPos_A;
always @(posedge CLK) MinNeg_W <= MinNeg_A;
always @(posedge CLK) Sign_W <= Sign_A;
always @(posedge CLK) NOP_W <= NOP_A;
always @(posedge CLK) DPH_W <= DPH_A;
always @(posedge CLK) ShCnt_A <= (DPL|SP|!Reset_l) ? {DPL&AllZeroD|SP,ShCntX} : ShCnt_A;
always @(posedge CLK) RtShCnt_W <= (Sqrt_A) ? {1'b0,~ShCnt_A[4:1]}: ~ShCnt_A;
/************************************************************************
* WB or output stage
*
* Muxing and shifting
*
* ROMData is aligned to bit 30. Since there is implicite right shift
* of one. Output_shift = ~input_shift + 1;
*
************************************************************************/
always @(posedge CLK) ROMData_W <= (DPL_A|SP_A|!Reset_l) ? ROMData&{16{Reset_l}} : ROMData_W;
always @(DPH_W or RtShCnt_W or ROMData_W)
begin
case ({DPH_W,RtShCnt_W[4:3]}) //synopsys parallel_case full_case
3'b000 : OBusA = {ROMData_W[8:0],14'h0};
3'b001 : OBusA = {1'b1,ROMData_W[15:0],6'h0};
3'b010 : OBusA = {9'h1,ROMData_W[15:2]};
3'b011 : OBusA = {17'h1,ROMData_W[15:10]};
//
3'b100 : OBusA = {9'h1,ROMData_W[15:2]};
3'b101 : OBusA = {17'h1,ROMData_W[15:10]};
3'b110 : OBusA = 23'h0;
3'b111 : OBusA = 23'h0;
endcase
case (RtShCnt_W[2:0]) //synopsys parallel_case full_case
3'b000 : OBusB = OBusA[15:0];
3'b001 : OBusB = OBusA[16:1];
3'b010 : OBusB = OBusA[17:2];
3'b011 : OBusB = OBusA[18:3];
3'b100 : OBusB = OBusA[19:4];
3'b101 : OBusB = OBusA[20:5];
3'b110 : OBusB = OBusA[21:6];
3'b111 : OBusB = OBusA[22:7];
endcase
end
//assign DivOut = OBusB^{16{Sign_W}} ;
assign DivOut = (!MinPos_W && !MinNeg_W) ? OBusB^{16{Sign_W}} :
( DPH_W && MinPos_W) ? 16'h7fff :
(!DPH_W && MinNeg_W) ? 16'h0000 :
16'hffff ;
/**********************************
* repeaters on ROMData
**********************************/
//rp01d1 u_rp00(.z(ROMData[ 0]));
//rp01d1 u_rp01(.z(ROMData[ 1]));
//rp01d1 u_rp02(.z(ROMData[ 2]));
//rp01d1 u_rp03(.z(ROMData[ 3]));
//rp01d1 u_rp04(.z(ROMData[ 4]));
//rp01d1 u_rp05(.z(ROMData[ 5]));
//rp01d1 u_rp06(.z(ROMData[ 6]));
//rp01d1 u_rp07(.z(ROMData[ 7]));
//rp01d1 u_rp08(.z(ROMData[ 8]));
//rp01d1 u_rp09(.z(ROMData[ 9]));
//rp01d1 u_rp10(.z(ROMData[10]));
//rp01d1 u_rp11(.z(ROMData[11]));
//rp01d1 u_rp12(.z(ROMData[12]));
//rp01d1 u_rp13(.z(ROMData[13]));
//rp01d1 u_rp14(.z(ROMData[14]));
//rp01d1 u_rp15(.z(ROMData[15]));
/**********************************************************
* ROM CLOCK Generator
*********************************************************/
wire RomCE = !(NOP & NOP_A & NOP_W);
wire RunClkB1,RunClkB2,RunClkLat,RunClkLatB1,RunClkLatB2;
ni01d1 u_1(.z(RunClkB1), .i(RomCE));
ni01d1 u_2(.z(RunClkB2), .i(RunClkB1));
lanfnb u_3(.q(RunClkLat),.d(RunClkB2), .en(CLK));
ni01d1 u_4(.z(RunClkLatB1), .i(RunClkLat));
ni01d1 u_5(.z(RunClkLatB2), .i(RunClkLatB1));
an02d2 u_6(.z(ROMCLK), .a1(CLK), .a2(RunClkLatB2));
endmodule//divctl
//////////////////////////////////////
module div_inc5(in,out,cin,cout);
input [4:0] in;
input cin;
output [4:0] out;
output cout;
wire [4:0] co;
an02d1 u_0(.z(co[0]),.a1(cin),.a2(in[0]));
an03d1 u_1(.z(co[1]),.a1(cin),.a2(in[0]),.a3(in[1]));
an04d1 u_2(.z(co[2]),.a1(cin),.a2(in[0]),.a3(in[1]),.a4(in[2]));
an05d1 u_3(.z(co[3]),.a1(cin),.a2(in[0]),.a3(in[1]),.a4(in[2]),.a5(in[3]));
an06d2 u_4(.z(co[4]),.a1(cin),.a2(in[0]),.a3(in[1]),.a4(in[2]),.a5(in[3]),.a6(in[4]));
assign cout = co[4];
xo02d1 u_5(.z(out[0]),.a1(in[0]), .a2(cin));
xo02d1 u_6(.z(out[1]),.a1(in[1]), .a2(co[0]));
xo02d1 u_7(.z(out[2]),.a1(in[2]), .a2(co[1]));
xo02d1 u_8(.z(out[3]),.a1(in[3]), .a2(co[2]));
xo02d1 u_9(.z(out[4]),.a1(in[4]), .a2(co[3]));
endmodule
module div_mux4x1_16(z, i0, i1, i2, i3, s);
input [15:0] i0, i1, i2, i3;
input [1:0] s;
output [15:0] z;
wire s0_x,s0_y,s1_x,s1_y;
ni01d5 u_s0x(.z(s0_x), .i(s[0]));
ni01d5 u_s0y(.z(s0_y), .i(s[0]));
ni01d5 u_s1x(.z(s1_x), .i(s[1]));
ni01d5 u_s1y(.z(s1_y), .i(s[1]));
mx41d2 u_00(.z(z[ 0]),.i0(i0[ 0]),.i1(i1[ 0]),.i2(i2[ 0]),.i3(i3[ 0]),.s0(s0_x),.s1(s1_x));
mx41d2 u_01(.z(z[ 1]),.i0(i0[ 1]),.i1(i1[ 1]),.i2(i2[ 1]),.i3(i3[ 1]),.s0(s0_x),.s1(s1_x));
mx41d2 u_02(.z(z[ 2]),.i0(i0[ 2]),.i1(i1[ 2]),.i2(i2[ 2]),.i3(i3[ 2]),.s0(s0_x),.s1(s1_x));
mx41d2 u_03(.z(z[ 3]),.i0(i0[ 3]),.i1(i1[ 3]),.i2(i2[ 3]),.i3(i3[ 3]),.s0(s0_x),.s1(s1_x));
mx41d2 u_04(.z(z[ 4]),.i0(i0[ 4]),.i1(i1[ 4]),.i2(i2[ 4]),.i3(i3[ 4]),.s0(s0_x),.s1(s1_x));
mx41d2 u_05(.z(z[ 5]),.i0(i0[ 5]),.i1(i1[ 5]),.i2(i2[ 5]),.i3(i3[ 5]),.s0(s0_x),.s1(s1_x));
mx41d2 u_06(.z(z[ 6]),.i0(i0[ 6]),.i1(i1[ 6]),.i2(i2[ 6]),.i3(i3[ 6]),.s0(s0_x),.s1(s1_x));
mx41d2 u_07(.z(z[ 7]),.i0(i0[ 7]),.i1(i1[ 7]),.i2(i2[ 7]),.i3(i3[ 7]),.s0(s0_x),.s1(s1_x));
mx41d2 u_08(.z(z[ 8]),.i0(i0[ 8]),.i1(i1[ 8]),.i2(i2[ 8]),.i3(i3[ 8]),.s0(s0_y),.s1(s1_y));
mx41d2 u_09(.z(z[ 9]),.i0(i0[ 9]),.i1(i1[ 9]),.i2(i2[ 9]),.i3(i3[ 9]),.s0(s0_y),.s1(s1_y));
mx41d2 u_10(.z(z[10]),.i0(i0[10]),.i1(i1[10]),.i2(i2[10]),.i3(i3[10]),.s0(s0_y),.s1(s1_y));
mx41d2 u_11(.z(z[11]),.i0(i0[11]),.i1(i1[11]),.i2(i2[11]),.i3(i3[11]),.s0(s0_y),.s1(s1_y));
mx41d2 u_12(.z(z[12]),.i0(i0[12]),.i1(i1[12]),.i2(i2[12]),.i3(i3[12]),.s0(s0_y),.s1(s1_y));
mx41d2 u_13(.z(z[13]),.i0(i0[13]),.i1(i1[13]),.i2(i2[13]),.i3(i3[13]),.s0(s0_y),.s1(s1_y));
mx41d2 u_14(.z(z[14]),.i0(i0[14]),.i1(i1[14]),.i2(i2[14]),.i3(i3[14]),.s0(s0_y),.s1(s1_y));
mx41d2 u_15(.z(z[15]),.i0(i0[15]),.i1(i1[15]),.i2(i2[15]),.i3(i3[15]),.s0(s0_y),.s1(s1_y));
endmodule