pi_aes.v 9.17 KB
// pi_aes.v v1 Frank Berndt
// pi aes decryption core;
// :set tabstop=4

// decrypts data in pi buffer in place;
// cbc init vector and expanded key must have been set up;
// key length is fixed at 128bits;

module pi_aes (
	sysclk, reset_l,
	aes_start, aes_stop, aes_busy, aes_ia, aes_hc, aes_da, aes_size, aes_done,
	aes_addr, aes_req, aes_write, aes_ack, aes_in, aes_out, aes_dw
);
	// module io ports;

	input sysclk;				// system clock;
	input reset_l;				// system reset; 

	input aes_start;			// start aes decryption;
	input aes_stop;				// kill aes operation in progress;
	output aes_busy;			// aes core is busy;
	input [7:1] aes_ia;			// buffer address of cbc init vector;
	input aes_hc;				// hardware chaining of cbc init vector;
	input [7:1] aes_da;			// buffer address of data;
	input [5:0] aes_size;		// number of 128-bit words to decrypt;
	output aes_done;			// aes decryption done;

	output [8:0] aes_addr;		// aes pi buffer address;
	output aes_req;				// request aes buffer access;
	output aes_write;			// buffer write request;
	input aes_ack;				// aes acknowledge;
	input [63:0] aes_in;		// data read from buffer;
	output [31:0] aes_out;		// buffer write data;
	output aes_dw;				// doen with another doubleword;

	// data/key delay buffers;
	// must align data and expended key;
	// read 64 bits from buffer but suppply 32 bits to core;

	reg [31:0] aes_ddel;		// lower data delay buffer;
	reg [31:0] aes_edat;		// encrypted aes data;
	wire [31:0] aes_ekey;		// expanded aes key;
	reg [1:0] aes_dena;			// enable data/key latches;
	reg [1:0] aes_dsel;			// data word selects;
	reg [1:0] aes_ksel;			// key word selects;

	always @(posedge sysclk)
	begin
		if(aes_dena[1]) begin
			aes_ddel <= aes_in[31:0];
			aes_edat <= aes_dsel[1]? aes_in[63:32] : aes_ddel;
		end
	end

	assign aes_ekey = aes_ksel[1]? aes_in[63:32] : aes_ddel;

	// instantiate aes decryption core;
	// use aes_d module instead of aes_cbc_d;
	// main reason is the reuse of the cbc reference buffer
	// for buffering of the decrypted data for write-back;

	wire aes_en;				// aes core enable;
	wire aes_go;				// start decryption operation;
	wire aes_init;				// loading of cbc init vector;
	wire aes_din_req;			// key and encrypted data in request;
	wire [31:0] aes_dout;		// decrypted data output;
	wire aes_dout_val;			// decrypted data valid;

	aes_d aes (
		.clk(sysclk),
		.rstn(reset_l),
		.en(aes_en),
		.go(aes_go),
		.ksize(2'b00),
		.din(aes_edat),
		.key(aes_ekey),
		.din_req(aes_din_req),
		.dout(aes_dout),
		.dout_vld(aes_dout_val)
	);

	// capture decrypted data for write-back;
	// must buffer all output words because the last key read
	// requests overlap with output data and have committed aes_en;
	// no need to clear these buffers on reset;

	reg [31:0] aes_buf0;		// reference vector buffer;
	reg [31:0] aes_buf1;		// reference vector buffer;
	reg [31:0] aes_buf2;		// reference vector buffer;
	reg [31:0] aes_buf3;		// reference vector buffer;
	reg [31:0] aes_buf4;		// reference vector buffer;
	reg [31:0] aes_buf5;		// reference vector buffer;
	reg [31:0] aes_buf6;		// reference vector buffer;
	reg [31:0] aes_buf7;		// reference vector buffer;
	wire [31:0] aes_ddat;		// decrypted data;
	wire aes_wack;				// write-back ack;

	assign aes_wack = aes_write & aes_ack;
	assign aes_dw = aes_wack & aes_addr[1];

	always @(posedge sysclk)
	begin
		if(aes_en & (aes_init | aes_din_req)) begin
			aes_buf0 <= aes_edat;
			aes_buf1 <= aes_buf0;
			aes_buf2 <= aes_buf1;
			aes_buf3 <= aes_buf2;
		end
		if((aes_en & (aes_din_req | aes_dout_val)) | aes_wack) begin
			aes_buf4 <= aes_dout_val? aes_ddat : aes_buf3;
			aes_buf5 <= aes_buf4;
			aes_buf6 <= aes_buf5;
			aes_buf7 <= aes_buf6;
		end
	end

	assign aes_ddat = aes_buf7 ^ aes_dout;
	assign aes_out = aes_buf7;

	// aes control logic;
	// state values have been chosen so that the state becomes
	// the lower address bits for the expended key location;
	// read requests use 64-bit read data path;
	// writebacks use 32-bit device data path;
	//
	//	000100	4		read iv0/1
	//	000101	5		idle
	//	000110	6		read iv2/3
	//	000111	7		idle
	//	001000	8		read d0/1
	//	001001	9		read k0/1	addr 132
	//	001010	10		read d2/3
	//	001011	11		read k2/3
	//	001100	12..50	idle
	//	001101	13..51	read k4/5 ... k42/43
	//	110100	52		idle
	//	110101	53		idle
	//	110110	54		idle
	//	110111	55		write do0
	//	111000	56		write do1
	//	111001	57		write do2
	//	111010	58		write do3

	reg [5:0] aes_wsm;		// aes word state machine;
	reg aes_breq;			// request buffer;

	always @(aes_wsm)
	begin
		case(aes_wsm)
			6'd5,							// read iv0,1;
			6'd7, 6'd8, 6'd9, 6'd10,		// read di0..3, k0..3;
			6'd12, 6'd14, 6'd16, 6'd18,		// read k4 ...;
			6'd20, 6'd22, 6'd24, 6'd26,
			6'd28, 6'd30, 6'd32, 6'd34,
			6'd36, 6'd38, 6'd40, 6'd42,
			6'd44, 6'd46, 6'd48, 6'd50,		// read ...k43;
			6'd54, 6'd55, 6'd56, 6'd57:		// write do0..3;
				aes_breq <= 1'b1;
			default:
				aes_breq <= 1'b0;
		endcase
	end

	// aes state machine control;
	// aes core takes 44 clocks to decrypt 128-bit word;
	// 4 more clocks are needed for decrypted data write-back;
	// one more clock to setup pipeline for next word;
	// state machine lags three clocks to align with read data;

	wire aes_reset;			// stop aes engine;
	reg aes_fack;			// kick first ack;
	reg aes_busy;			// aes block is busy;
	reg [5:0] aes_nsm;		// delayed aes_wsm;
	wire aes_inc;			// next aes word state;
	wire aes_nw;			// advance aes_wsm to next word state;
	reg aes_dph;			// data phase in next clock;
	reg aes_rreq;			// buffer read request;
	wire aes_idle;			// no buffer request;
	reg aes_del_req;		// delayed buffer request for re-issue;
	wire aes_wr;			// decode of write-back state;
	reg aes_write;			// decrypted data write back phase;
	wire aes_czero;			// word count is 0;
	reg aes_last;			// last data word;

	assign aes_done = aes_ack & aes_last;
	assign aes_reset = aes_stop | aes_done;
	assign aes_inc = aes_ack | aes_idle;

	assign aes_wr = (aes_nsm == 6'd54)
		| (aes_nsm == 6'd55)
		| (aes_nsm == 6'd56)
		| (aes_nsm == 6'd57);
	assign aes_nw = (aes_wsm == 6'd58);

	always @(posedge sysclk)
	begin
		aes_fack <= aes_start;
		if(reset_l == 1'b0)
			aes_busy <= 1'b0;
		else
			aes_busy <= ~aes_reset & (aes_busy | aes_start);
		if( ~aes_busy) begin
			aes_wsm <= 6'd4;
			aes_nsm <= 6'd4;
			aes_rreq <= aes_start;
			aes_dph <= 1'b0;
			aes_write <= 1'b0;
			aes_last <= 1'b0;
		end else if(aes_fack | aes_inc) begin
			aes_wsm <= aes_nw? 6'd8 : (aes_wsm + 1);
			aes_nsm <= aes_wsm;
			aes_rreq <= aes_breq | aes_nw;
			aes_dph <= aes_dph | (aes_nsm[2:0] == 3'd6);
			aes_write <= aes_wr;
			aes_last <= aes_nw & aes_czero;
		end
		if(reset_l == 1'b0)
			aes_del_req <= 1'b0;
		else
			aes_del_req <= aes_start | aes_req;
	end

	// issue buffer request;
	// reissue when not acked;
	// idle advances state machine when no buffer access is needed;

	assign aes_req = aes_rreq | (aes_del_req & ~aes_ack);
	assign aes_idle = ~aes_del_req;

	// hold on to data buffer address at start time, then increment;
	// hold on to word count, then decrement;
	// done with decrypt operation when word count is 0;

	reg [7:1] aes_daddr;	// data buffer address;
	reg [5:0] aes_count;	// word count, in 128-bit words;
	wire aes_dainc;			// increment data address;

	assign aes_dainc = aes_inc & (aes_nsm == 6'd57);
	assign aes_czero = (aes_count == 6'd0);

	always @(posedge sysclk)
	begin
		if(aes_start) begin
			aes_daddr <= aes_da;
			aes_count <= aes_size;
		end else if(aes_dainc) begin
			aes_daddr <= aes_daddr + 1;
			aes_count <= aes_count - 1;
		end
	end

	// buffer address logic;
	// initially load with init vector address;
	// use delayed address to deal with preemption;

	reg [8:0] aes_addr;		// aes buffer request address;
	wire aes_dasel;			// select data word address, else key word address;

	assign aes_dasel = aes_nsm[0] | aes_wr | aes_write;

	always @(posedge sysclk)
	begin
		if( ~aes_dph)
			aes_addr[8:2] <= aes_ia;
		else if(aes_inc)
			aes_addr[8:2] <= aes_dasel? aes_daddr : { 3'b100, aes_nsm[5:2] };
		if( ~aes_busy)
			aes_addr[1:0] <= 2'b00;
		else if(aes_inc) begin
			aes_addr[1] <= aes_nsm[1] ^ aes_dasel;
			aes_addr[0] <= aes_nsm[0] & aes_wr;
		end
	end

	// data/key mux control;
	// split one 64-bit word into two 32-bit words for aes core;

	always @(posedge sysclk)
	begin
		aes_dena <= { aes_dena[0], aes_inc };
		aes_dsel <= { aes_dsel[0], ~aes_nsm[0] };
		aes_ksel <= { aes_ksel[0], aes_nsm[0] };
	end

	// aes core control;
	// aes_ack can stall the aes core;

	reg aes_nsm_ien;			// enable for init phase;
	wire nsm_en;				// enable for init, din, decrypt and dout;
	wire nsm_init;				// init phase;
	wire nsm_go;				// start decrypt operation;
	reg [2:0] aes_nsm_en;		// piped enable;
	reg [2:0] aes_nsm_init;		// piped init phase;
	reg [2:0] aes_nsm_go;		// piped go;

	assign nsm_en = (aes_ack & aes_nsm[0] & ~aes_write);
	assign nsm_init = (aes_nsm[5:2] == 4'b0001);
	assign nsm_go = (aes_nsm[2:0] == 3'd7);

	always @(posedge sysclk)
	begin
		aes_nsm_ien <= ~aes_hc & aes_busy & aes_ack & ~aes_dph;
		aes_nsm_en <= { aes_nsm_en[1:0], nsm_en | aes_nsm_ien };
		aes_nsm_init <= { aes_nsm_init[1:0], nsm_init };
		aes_nsm_go[1:0] <= { aes_nsm_go[0], nsm_go };
		aes_nsm_go[2] <= aes_busy & (|aes_nsm_go[2:1]);
	end

	assign aes_en = |aes_nsm_en[2:1];
	assign aes_init = aes_nsm_init[2];
	assign aes_go = aes_nsm_go[2];

endmodule