pi_aes.v
9.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// pi_aes.v v1 Frank Berndt
// pi aes decryption core;
// :set tabstop=4
// decrypts data in pi buffer in place;
// cbc init vector and expanded key must have been set up;
// key length is fixed at 128bits;
module pi_aes (
sysclk, reset_l,
aes_start, aes_stop, aes_busy, aes_ia, aes_hc, aes_da, aes_size, aes_done,
aes_addr, aes_req, aes_write, aes_ack, aes_in, aes_out, aes_dw
);
// module io ports;
input sysclk; // system clock;
input reset_l; // system reset;
input aes_start; // start aes decryption;
input aes_stop; // kill aes operation in progress;
output aes_busy; // aes core is busy;
input [7:1] aes_ia; // buffer address of cbc init vector;
input aes_hc; // hardware chaining of cbc init vector;
input [7:1] aes_da; // buffer address of data;
input [5:0] aes_size; // number of 128-bit words to decrypt;
output aes_done; // aes decryption done;
output [8:0] aes_addr; // aes pi buffer address;
output aes_req; // request aes buffer access;
output aes_write; // buffer write request;
input aes_ack; // aes acknowledge;
input [63:0] aes_in; // data read from buffer;
output [31:0] aes_out; // buffer write data;
output aes_dw; // doen with another doubleword;
// data/key delay buffers;
// must align data and expended key;
// read 64 bits from buffer but suppply 32 bits to core;
reg [31:0] aes_ddel; // lower data delay buffer;
reg [31:0] aes_edat; // encrypted aes data;
wire [31:0] aes_ekey; // expanded aes key;
reg [1:0] aes_dena; // enable data/key latches;
reg [1:0] aes_dsel; // data word selects;
reg [1:0] aes_ksel; // key word selects;
always @(posedge sysclk)
begin
if(aes_dena[1]) begin
aes_ddel <= aes_in[31:0];
aes_edat <= aes_dsel[1]? aes_in[63:32] : aes_ddel;
end
end
assign aes_ekey = aes_ksel[1]? aes_in[63:32] : aes_ddel;
// instantiate aes decryption core;
// use aes_d module instead of aes_cbc_d;
// main reason is the reuse of the cbc reference buffer
// for buffering of the decrypted data for write-back;
wire aes_en; // aes core enable;
wire aes_go; // start decryption operation;
wire aes_init; // loading of cbc init vector;
wire aes_din_req; // key and encrypted data in request;
wire [31:0] aes_dout; // decrypted data output;
wire aes_dout_val; // decrypted data valid;
aes_d aes (
.clk(sysclk),
.rstn(reset_l),
.en(aes_en),
.go(aes_go),
.ksize(2'b00),
.din(aes_edat),
.key(aes_ekey),
.din_req(aes_din_req),
.dout(aes_dout),
.dout_vld(aes_dout_val)
);
// capture decrypted data for write-back;
// must buffer all output words because the last key read
// requests overlap with output data and have committed aes_en;
// no need to clear these buffers on reset;
reg [31:0] aes_buf0; // reference vector buffer;
reg [31:0] aes_buf1; // reference vector buffer;
reg [31:0] aes_buf2; // reference vector buffer;
reg [31:0] aes_buf3; // reference vector buffer;
reg [31:0] aes_buf4; // reference vector buffer;
reg [31:0] aes_buf5; // reference vector buffer;
reg [31:0] aes_buf6; // reference vector buffer;
reg [31:0] aes_buf7; // reference vector buffer;
wire [31:0] aes_ddat; // decrypted data;
wire aes_wack; // write-back ack;
assign aes_wack = aes_write & aes_ack;
assign aes_dw = aes_wack & aes_addr[1];
always @(posedge sysclk)
begin
if(aes_en & (aes_init | aes_din_req)) begin
aes_buf0 <= aes_edat;
aes_buf1 <= aes_buf0;
aes_buf2 <= aes_buf1;
aes_buf3 <= aes_buf2;
end
if((aes_en & (aes_din_req | aes_dout_val)) | aes_wack) begin
aes_buf4 <= aes_dout_val? aes_ddat : aes_buf3;
aes_buf5 <= aes_buf4;
aes_buf6 <= aes_buf5;
aes_buf7 <= aes_buf6;
end
end
assign aes_ddat = aes_buf7 ^ aes_dout;
assign aes_out = aes_buf7;
// aes control logic;
// state values have been chosen so that the state becomes
// the lower address bits for the expended key location;
// read requests use 64-bit read data path;
// writebacks use 32-bit device data path;
//
// 000100 4 read iv0/1
// 000101 5 idle
// 000110 6 read iv2/3
// 000111 7 idle
// 001000 8 read d0/1
// 001001 9 read k0/1 addr 132
// 001010 10 read d2/3
// 001011 11 read k2/3
// 001100 12..50 idle
// 001101 13..51 read k4/5 ... k42/43
// 110100 52 idle
// 110101 53 idle
// 110110 54 idle
// 110111 55 write do0
// 111000 56 write do1
// 111001 57 write do2
// 111010 58 write do3
reg [5:0] aes_wsm; // aes word state machine;
reg aes_breq; // request buffer;
always @(aes_wsm)
begin
case(aes_wsm)
6'd5, // read iv0,1;
6'd7, 6'd8, 6'd9, 6'd10, // read di0..3, k0..3;
6'd12, 6'd14, 6'd16, 6'd18, // read k4 ...;
6'd20, 6'd22, 6'd24, 6'd26,
6'd28, 6'd30, 6'd32, 6'd34,
6'd36, 6'd38, 6'd40, 6'd42,
6'd44, 6'd46, 6'd48, 6'd50, // read ...k43;
6'd54, 6'd55, 6'd56, 6'd57: // write do0..3;
aes_breq <= 1'b1;
default:
aes_breq <= 1'b0;
endcase
end
// aes state machine control;
// aes core takes 44 clocks to decrypt 128-bit word;
// 4 more clocks are needed for decrypted data write-back;
// one more clock to setup pipeline for next word;
// state machine lags three clocks to align with read data;
wire aes_reset; // stop aes engine;
reg aes_fack; // kick first ack;
reg aes_busy; // aes block is busy;
reg [5:0] aes_nsm; // delayed aes_wsm;
wire aes_inc; // next aes word state;
wire aes_nw; // advance aes_wsm to next word state;
reg aes_dph; // data phase in next clock;
reg aes_rreq; // buffer read request;
wire aes_idle; // no buffer request;
reg aes_del_req; // delayed buffer request for re-issue;
wire aes_wr; // decode of write-back state;
reg aes_write; // decrypted data write back phase;
wire aes_czero; // word count is 0;
reg aes_last; // last data word;
assign aes_done = aes_ack & aes_last;
assign aes_reset = aes_stop | aes_done;
assign aes_inc = aes_ack | aes_idle;
assign aes_wr = (aes_nsm == 6'd54)
| (aes_nsm == 6'd55)
| (aes_nsm == 6'd56)
| (aes_nsm == 6'd57);
assign aes_nw = (aes_wsm == 6'd58);
always @(posedge sysclk)
begin
aes_fack <= aes_start;
if(reset_l == 1'b0)
aes_busy <= 1'b0;
else
aes_busy <= ~aes_reset & (aes_busy | aes_start);
if( ~aes_busy) begin
aes_wsm <= 6'd4;
aes_nsm <= 6'd4;
aes_rreq <= aes_start;
aes_dph <= 1'b0;
aes_write <= 1'b0;
aes_last <= 1'b0;
end else if(aes_fack | aes_inc) begin
aes_wsm <= aes_nw? 6'd8 : (aes_wsm + 1);
aes_nsm <= aes_wsm;
aes_rreq <= aes_breq | aes_nw;
aes_dph <= aes_dph | (aes_nsm[2:0] == 3'd6);
aes_write <= aes_wr;
aes_last <= aes_nw & aes_czero;
end
if(reset_l == 1'b0)
aes_del_req <= 1'b0;
else
aes_del_req <= aes_start | aes_req;
end
// issue buffer request;
// reissue when not acked;
// idle advances state machine when no buffer access is needed;
assign aes_req = aes_rreq | (aes_del_req & ~aes_ack);
assign aes_idle = ~aes_del_req;
// hold on to data buffer address at start time, then increment;
// hold on to word count, then decrement;
// done with decrypt operation when word count is 0;
reg [7:1] aes_daddr; // data buffer address;
reg [5:0] aes_count; // word count, in 128-bit words;
wire aes_dainc; // increment data address;
assign aes_dainc = aes_inc & (aes_nsm == 6'd57);
assign aes_czero = (aes_count == 6'd0);
always @(posedge sysclk)
begin
if(aes_start) begin
aes_daddr <= aes_da;
aes_count <= aes_size;
end else if(aes_dainc) begin
aes_daddr <= aes_daddr + 1;
aes_count <= aes_count - 1;
end
end
// buffer address logic;
// initially load with init vector address;
// use delayed address to deal with preemption;
reg [8:0] aes_addr; // aes buffer request address;
wire aes_dasel; // select data word address, else key word address;
assign aes_dasel = aes_nsm[0] | aes_wr | aes_write;
always @(posedge sysclk)
begin
if( ~aes_dph)
aes_addr[8:2] <= aes_ia;
else if(aes_inc)
aes_addr[8:2] <= aes_dasel? aes_daddr : { 3'b100, aes_nsm[5:2] };
if( ~aes_busy)
aes_addr[1:0] <= 2'b00;
else if(aes_inc) begin
aes_addr[1] <= aes_nsm[1] ^ aes_dasel;
aes_addr[0] <= aes_nsm[0] & aes_wr;
end
end
// data/key mux control;
// split one 64-bit word into two 32-bit words for aes core;
always @(posedge sysclk)
begin
aes_dena <= { aes_dena[0], aes_inc };
aes_dsel <= { aes_dsel[0], ~aes_nsm[0] };
aes_ksel <= { aes_ksel[0], aes_nsm[0] };
end
// aes core control;
// aes_ack can stall the aes core;
reg aes_nsm_ien; // enable for init phase;
wire nsm_en; // enable for init, din, decrypt and dout;
wire nsm_init; // init phase;
wire nsm_go; // start decrypt operation;
reg [2:0] aes_nsm_en; // piped enable;
reg [2:0] aes_nsm_init; // piped init phase;
reg [2:0] aes_nsm_go; // piped go;
assign nsm_en = (aes_ack & aes_nsm[0] & ~aes_write);
assign nsm_init = (aes_nsm[5:2] == 4'b0001);
assign nsm_go = (aes_nsm[2:0] == 3'd7);
always @(posedge sysclk)
begin
aes_nsm_ien <= ~aes_hc & aes_busy & aes_ack & ~aes_dph;
aes_nsm_en <= { aes_nsm_en[1:0], nsm_en | aes_nsm_ien };
aes_nsm_init <= { aes_nsm_init[1:0], nsm_init };
aes_nsm_go[1:0] <= { aes_nsm_go[0], nsm_go };
aes_nsm_go[2] <= aes_busy & (|aes_nsm_go[2:1]);
end
assign aes_en = |aes_nsm_en[2:1];
assign aes_init = aes_nsm_init[2];
assign aes_go = aes_nsm_go[2];
endmodule