pi_flash.v 28.9 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
// pi_flash.v v1 Frank Berndt
// pi flash interface controller;
// :set tabstop=4

// Nand flash controller;
// implements hardware ecc generation, detection and correction;
// used by cpu through PI_FLASH_CTRL;
// used by traditional dma engine for cartridge emulation;

module pi_flash (
	sysclk, reset,
	flc_conf, flc_start, flc_stop, flc_busy, flc_done,
	flc_addr, flc_dev, flc_buf, flc_cmd, flc_adph, flc_rdph, flc_wdph,
	flc_wrdy, flc_ecc, flc_mcmd,
	flc_size_dec, flc_size_last, flc_sbe, flc_dbe,
	flc_breq, flc_bgnt, flc_bre, flc_in, flc_brd, flc_out,
	flb_addr, flb_req, flb_write, flb_ack, flb_in, flb_out,
	fl_ce, fl_ale, fl_cle, fl_re, fl_we, fl_wp, fl_ryby
);
	// module io ports;

	input sysclk;				// system clock;
	input reset;				// controller and system reset;

	// request interface;

	input [31:0] flc_conf;		// flash config;
	input flc_start;			// start flash operation;
	input flc_stop;				// stop flash operation in progress;
	output flc_busy;			// flash controller is busy;
	output flc_done;			// flash controller is done;
	input [29:0] flc_addr;		// flash address;
	input [1:0] flc_dev;		// device number;
	input flc_buf;				// data buffer to use;
	input [7:0] flc_cmd;		// flash command;
	input [3:0] flc_adph;		// address phase selects;
	input flc_rdph;				// read data phase;
	input flc_wdph;				// write data phase;
	input flc_wrdy;				// wait for flash ready;
	input flc_ecc;				// enable ecc detection/correction;
	input flc_mcmd;				// multi-cycle command;
	output flc_size_dec;		// decrement flc_size;
	input flc_size_last;		// size has reached last byte;
	output flc_sbe;				// single-bit error;
	output flc_dbe;				// double-bit error;

	// io bus interface;

	output flc_breq;			// flash bus request;
	input flc_bgnt;				// flash bus grant;
	output flc_bre;				// io bus read enable;
	input [15:0] flc_in;		// io bus read data;
	output flc_brd;				// flash bus read;
	output [15:0] flc_out;		// flash output data;

	// pi buffer interface;

	output [8:0] flb_addr;		// flash pi buffer address;
	output flb_req;				// request pi buffer for flash data;
	output flb_write;			// buffer write request;
	input flb_ack;				// flash buffer ack;
	input [31:0] flb_in;		// buffer read data;
	output [31:0] flb_out;		// buffer write data;

	// nand flash controls;

	output [3:0] fl_ce;			// chip enables;
	output fl_ale;				// address latch enable;
	output fl_cle;				// command latch enable;
	output fl_re;				// read eanble;
	output fl_we;				// write eanble;
	output fl_wp;				// write protect;
	input fl_ryby;				// ready/busy;

	// sample ready/busy line;
	// require 0 or 1 for three consequtive clocks;

	reg [2:0] dev_ryby;		// ryby delay regs;
	reg [1:0] dev_rb;		// ryby all 0 or 1;

	always @(posedge sysclk)
	begin
		dev_ryby <= { dev_ryby[1:0], fl_ryby };
		dev_rb[0] <= (dev_ryby == 3'b000);
		dev_rb[1] <= (dev_ryby == 3'b111);
	end

	// flash controller state machine;
	// request io bus for flash byte reads and writes;
	// output command, address and data phases;
	// pi buffer accesses are done in multiples of four bytes;
	// flash state machine;
	//
	//	000000001	cmd cycle, CLE;
	//	000000010	a0 cycle, ALE;
	//	000000100	a1 cycle, ALE;
	//	000001000	a2 cycle, ALE;
	//	000010000	a3 cycle, ALE;
	//	000100000	wait for assertion of RYBY;
	//	001000000	wait for deassertion of RYBY;
	//	010000000	data phases;
	//	100000000	ecc correction;

	reg flc_busy;			// flash controller is busy;
	reg flc_new;			// delayed flc_start;
	wire flc_reset;			// reset flash controller;
	reg [9:0] flc_pipe;		// flash controller state;
	wire flc_next;			// advance flc_pipe state;
	reg flx_ack;			// io bus cycle ack;
	wire flc_xrph;			// in read data phase;
	wire flc_xwph;			// in write data phase;
	wire flc_xeph;			// in ecc correction phase;
	wire flc_ecack;			// ecc correction ack;

	assign flc_done = flc_pipe[9] & flc_busy;
	assign flc_reset = reset | flc_stop | flc_pipe[9];

	always @(posedge sysclk)
	begin
		flc_new <= flc_start;
		flc_busy <= ~flc_reset & (flc_busy | flc_start);
		if( ~flc_busy)
			flc_pipe <= 10'd1;
		else if(flc_next)
			flc_pipe <= { flc_pipe[8:0], 1'b0 };
	end

	assign flc_xrph = flc_pipe[7] & flc_rdph;
	assign flc_xwph = flc_pipe[7] & flc_wdph;
	assign flc_xeph = flc_pipe[8];

	// page column address counter;
	// loaded from device address flc_addr;
	// data phase ends at end of page or when byte count reaches 0;

	wire flc_dack;			// data phase ack;
	reg flc_half;			// which half to start in;
	reg [9:0] flc_col;		// column counter;
	wire flc_col15;			// mod 15;
	wire flc_col527;		// last in spare block;
	reg flc_last;			// last data byte;
	reg flb_last;			// pi buffer write ack of last data byte;

	assign flc_dack = flc_pipe[7] & flx_ack;
	assign flc_size_dec = flc_dack;

	always @(posedge sysclk)
	begin
		if(flc_start)
			flc_half <= flc_addr[8];
		if(flc_start)
			flc_col <= { 1'b0, flc_addr[8:0] };
		else if(flc_dack)
			flc_col <= flc_col + 1;
		flc_last <= flc_size_last | flc_col527;
		if( ~flc_busy)
			flb_last <= 1'b0;
		else if(flx_ack)
			flb_last <= flc_last;
	end

	assign flc_col15 = (flc_col[3:0] == 4'd15);
	assign flc_col527 = flc_col[9] & flc_col15;

	// request io bus;
	// guarantee a hole of one clock so that another
	// pending io bus requestor can win a cycle;

	wire flc_bwreq;			// request io bus for data write cycle;
	wire flc_brreq;			// request io bus for data read cycle;

	assign flc_breq = flc_busy & ~flx_ack &
		( (flc_pipe[0] & dev_rb[1])
		| (flc_pipe[1] & flc_adph[0])
		| (flc_pipe[2] & flc_adph[1])
		| (flc_pipe[3] & flc_adph[2])
		| (flc_pipe[4] & flc_adph[3])
		| (flc_xrph & flc_brreq)
		| (flc_xwph & flc_bwreq)
		);

	// advance flash state machine;

	assign flc_next = (flc_pipe[0] & flx_ack)
		| (flc_pipe[1] & (flc_adph[0]? flx_ack : 1'b1))
		| (flc_pipe[2] & (flc_adph[1]? flx_ack : 1'b1))
		| (flc_pipe[3] & (flc_adph[2]? flx_ack : 1'b1))
		| (flc_pipe[4] & (flc_adph[3]? flx_ack : 1'b1))
		| (flc_pipe[5] & (flc_wrdy? dev_rb[0] : 1'b1))
		| (flc_pipe[6] & (flc_wrdy? dev_rb[1] : 1'b1))
		| (flc_pipe[7] & ~(flc_rdph | flc_wdph))
		| (flc_pipe[7] & flc_wdph & flc_last & flx_ack)
		| (flc_pipe[7] & flc_rdph & flb_last & flb_ack)
		| (flc_pipe[8] & flc_ecack);

	// pi buffer state machine;
	// four data states, one pi buffer access state;
	// writes to flash start with pi buffer access phase;
	//
	//	000	read/write d0;		read start;
	//	001	read/write d1;
	//	010	read/write d2;
	//	011	read/write d3;
	//	1xx	pi buffer access;	write start;

	reg [2:0] flc_wsm;		// word state machine;
	reg [1:0] flc_wdel;		// delayed flc_wsm[2] buffer access state;
	wire flx_eack;			// early io bus ack;
	wire flc_wnext;			// advance word state machine;
	wire flc_wacc;			// force buffer access;

	always @(posedge sysclk)
	begin
		if( ~flc_pipe[7])
			flc_wsm <= { flc_wdph, flc_col[1:0] };
		else if(flc_wnext) begin
			if(flc_wsm[2])
				flc_wsm[2] <= 0;
			else
				flc_wsm <= { flc_wacc, 2'b00 } | (flc_wsm + 1);
		end
		flc_wdel <= { flc_wdel[0], flc_wsm[2] };
	end

	assign flc_brreq = (flc_wsm[2] == 1'b0);
	assign flc_bwreq = ({ flc_wsm[2], flc_wdel } == 3'b000);
	assign flc_wnext = flx_eack | flb_ack;
	assign flc_wacc = flc_xrph & flc_last;

	// request pi buffer access;
	// only during read or write data phases;
	// cannot use flc_col due to different inc time for reads and writes;
	// 32-bit word address increments on acks;
	// buf0 counts: 0..511, 1024..1039, index 0..63, 128..129;
	// buf1 counts: 512..1023, 1040..1055, index 64..127, 130..131;

	wire flb_rwreq;			// pi buffer data request;
	wire flb_ecreq;			// pi buffer ecc correction request;
	wire flb_ecwr;			// ecc correction write phase;
	reg [8:0] flb_addr;		// flash pi buffer address;
	wire flb_addr_load;		// load initial pi buffer address;
	reg flb_addr_inc;		// increment pi buffer address;
	reg flb_w127;			// last word in data region;
	wire [8:2] ecc_addr;	// address of ecc byte to correct;

	assign flb_addr_load = flc_new;
	assign flb_rwreq = (flc_xrph | flc_xwph) & flc_wsm[2];
	assign flb_req = flb_rwreq | flb_ecreq;
	assign flb_write = flc_xeph? flb_ecwr : flc_rdph;

	always @(posedge sysclk)
	begin
		flb_addr_inc <= flb_ack;
		if(flb_addr_load)
			flb_addr <= { 1'b0, flc_buf, flc_col[8:2] };
		else if(flc_xeph)
			flb_addr <= { 1'b0, flc_buf, ecc_addr[8:2] };
		else if(flb_addr_inc)
			flb_addr <= flb_w127? { 6'b100000, flc_buf, 2'd0 } : (flb_addr + 1);
		flb_w127 <= (flb_addr[6:0] == 7'd127);
	end

	// pi buffer data buffer;
	// holds word to be written to pi buffer;
	// holds word read from pi buffer;
	// latch bytes depending on colum address;

	wire [7:0] flx_in;		// io bus read byte;
	wire [31:0] flb_data;	// buffer data;
	wire [31:0] flb_xor;	// ecc correction xor;
	reg flb_dsel;			// select buffer data or flash read data;
	reg [31:0] flb_out;		// write data buffer;
	reg [3:0] flb_bwe;		// buffer write enables;

	assign flx_in = flc_in[15:8];
	assign flb_data = flb_dsel? (flb_in ^ flb_xor) : { flx_in, flx_in, flx_in, flx_in };

	always @(posedge sysclk)
	begin
		if(flb_bwe[0])
			flb_out[31:24] <= flb_data[31:24];
		if(flb_bwe[1])
			flb_out[23:16] <= flb_data[23:16];
		if(flb_bwe[2])
			flb_out[15:8] <= flb_data[15:8];
		if(flb_bwe[3])
			flb_out[7:0] <= flb_data[7:0];
	end

	// flash output data mux;
	// select cmd, address bytes, or write data bytes;
	// force data outputs of other port to 0;

	reg [7:0] flb_byte;		// byte for write data cycle;
	wire [7:0] flc_wdata;	// flash write data;
	reg [7:0] flx_out;		// flash output data;
	reg [7:0] ecc_out;		// ecc output byte;
	reg ecc_sel;			// select ecc instead of buffer byte;

	always @(flc_col[1:0] or flb_out)
	begin
		case(flc_col[1:0])
			2'd0: flb_byte <= flb_out[31:24];
			2'd1: flb_byte <= flb_out[23:16];
			2'd2: flb_byte <= flb_out[15:8];
			2'd3: flb_byte <= flb_out[7:0];
		endcase
	end

	assign flc_wdata = flc_pipe[0]? { flc_cmd[7:1], flc_cmd[0] | flc_half }
		: flc_pipe[1]? flc_addr[7:0]
		: flc_pipe[2]? flc_addr[16:9]
		: flc_pipe[3]? flc_addr[24:17]
		: flc_pipe[4]? { 3'd0, flc_addr[29:25] }
		: ecc_sel? ~ecc_out : flb_byte;

	always @(posedge sysclk)
	begin
		flx_out <= flc_wdata;
	end

	assign flc_out[15:8] = flx_out;
	assign flc_out[7:0] = 8'h00;

	// decode flash io cycle type;

	wire flc_cle;			// command latch cycle;
	wire flc_ale;			// address latch cycle;
	wire flc_we;			// write cycle;
	wire flc_re;			// read cycle;

	assign flc_cle = flc_pipe[0];
	assign flc_ale = |flc_pipe[4:1];
	assign flc_we = flc_cle | flc_ale | flc_xwph;
	assign flc_re = flc_xrph;

	// flash read/write cycle state machine;
	// flx_type determines cycle type;

	wire flc_breq;			// io bus request;
	reg [7:0] flx_pipe;		// flash cycle pipe;
	reg flx_cle;			// command latch cycle;
	reg flx_ale;			// address latch cycle;
	reg flx_we;				// write cycle;
	reg flx_re;				// read cycle;

	always @(posedge sysclk)
	begin
		flx_pipe <= flc_bgnt? { flx_pipe[6:0], 1'b0 } : 8'd1;
		if(flc_breq & ~flc_bgnt) begin
			flx_cle <= flc_cle;
			flx_ale <= flc_ale;
			flx_we <= flc_we;
			flx_re <= flc_re;
		end
	end

	assign flc_brd = flx_re & flc_bgnt;

	// end of cycle decoder;
	// flc_conf bits are selects of 8->1 mux;

	wire [2:0] flc_tend;	// end time config bits;
	reg flx_tend;			// last cycle;

	assign flc_tend = flc_conf[30:28];

	always @(flc_tend or flx_pipe)
	begin
		case(flc_tend)
			0: flx_tend <= flx_pipe[0];
			1: flx_tend <= flx_pipe[1];
			2: flx_tend <= flx_pipe[2];
			3: flx_tend <= flx_pipe[3];
			4: flx_tend <= flx_pipe[4];
			5: flx_tend <= flx_pipe[5];
			6: flx_tend <= flx_pipe[6];
			7: flx_tend <= flx_pipe[7];
		endcase
	end

	assign flx_eack = flx_tend;

	always @(posedge sysclk)
	begin
		flx_ack <= flx_tend;
	end

	// read sample time decoder;
	// flc_conf bits are selects of 8->1 mux;

	wire [2:0] flc_tdin;	// sample time config bits;
	reg flx_tdin;			// sample cycle;

	assign flc_tdin = flc_conf[26:24];

	always @(flc_tdin or flx_pipe)
	begin
		case(flc_tdin)
			0: flx_tdin <= flx_pipe[0];
			1: flx_tdin <= flx_pipe[1];
			2: flx_tdin <= flx_pipe[2];
			3: flx_tdin <= flx_pipe[3];
			4: flx_tdin <= flx_pipe[4];
			5: flx_tdin <= flx_pipe[5];
			6: flx_tdin <= flx_pipe[6];
			7: flx_tdin <= flx_pipe[7];
		endcase
	end

	reg flc_bre;			// io bus read enable;
	wire flb_ecen;			// enable buffer for ecc correction;
	reg flb_rack;			// pi buffer read ack;

	always @(posedge sysclk)
	begin
		flc_bre <= flx_tdin & flx_re;
		flb_dsel <= flc_xwph | flc_xeph;
		flb_rack <= flb_ack & (flc_xwph | flb_ecen);
		flb_bwe[0] <= flb_rack | (flc_bre & (flc_col[1:0] == 2'd0));
		flb_bwe[1] <= flb_rack | (flc_bre & (flc_col[1:0] == 2'd1));
		flb_bwe[2] <= flb_rack | (flc_bre & (flc_col[1:0] == 2'd2));
		flb_bwe[3] <= flb_rack | (flc_bre & (flc_col[1:0] == 2'd3));
	end

	// decode active times of flash control signals;
	// simple and/or or flx_pipe pipe;

	wire flx_xle;			// cle/ale active times;
	wire flx_xwe;			// we active times;
	wire flx_xre;			// re active times;
	wire flx_xwp;			// flash is write-protected;

	assign flx_xle = flc_bgnt & |(flx_pipe & flc_conf[7:0]);
	assign flx_xwe = flc_bgnt & |(flx_pipe & flc_conf[15:8]);
	assign flx_xre = flc_bgnt & |(flx_pipe & flc_conf[23:16]);
	assign flx_xwp = reset | flc_conf[31];

	// flash cycle control;
	// output register in pad layer delays one clock;

	assign fl_cle = flx_cle & flx_xle;
	assign fl_ale = flx_ale & flx_xle;
	assign fl_we = ~reset & flx_we & flx_xwe;
	assign fl_re = ~reset & flx_re & flx_xre;
	assign fl_wp = flx_xwp;

	// device chip selects;
	// must be kept active for multi-cycle commands;
	// only one device can be active at a time;
	// fl_ce go through another flop in the pad layer;
	// ce is active low;

	reg flc_mult;			// delayed flc_mcmd to line up with flc_busy;
	reg [3:0] fl_ce;		// module port chip enables;
	reg flc_tce;			// toshiba fix flop;
	wire ce_ena;			// enable ce;

	assign ce_ena = ~reset & (flc_mult | (flc_busy & ~flc_tce));

	always @(posedge sysclk)
	begin
		if(flc_busy == 1'b0)
			flc_tce <= 1'b0;
		else if(flc_bre)
			flc_tce <= flc_last;
		if(reset)
			flc_mult <= 0;
		else if(flc_start)
			flc_mult <= flc_mcmd;
		fl_ce[0] <= ce_ena & (flc_dev == 2'd0);
		fl_ce[1] <= ce_ena & (flc_dev == 2'd1);
		fl_ce[2] <= ce_ena & (flc_dev == 2'd2);
		fl_ce[3] <= ce_ena & (flc_dev == 2'd3);
	end

	// ecc logic;
	// select byte from output or input stream;

	reg ecc_ena;			// enable ecc;
	reg [9:0] ecc_col;		// ecc column address;
	reg [7:0] ecc_byte;		// byte that we do ecc on;
	wire [7:0] ecc_ibyte;		// inverted ecc byte;
	wire ecc_bp;			// byte parity;

	always @(posedge sysclk)
	begin
		ecc_ena <= (flc_busy & ecc_ena) | (flc_start & flc_ecc);
		ecc_col <= flc_col;
		ecc_byte <= flc_xrph? flx_in : flx_out;
	end

	assign ecc_bp = ^ecc_byte;
	assign ecc_ibyte = ~ecc_byte;

	// generate column parity bits;

	wire [2:0] ecc_cph;		// high column parity;
	wire [2:0] ecc_cpl;		// low column parity;

	assign ecc_cph[2] = ecc_byte[7] ^ ecc_byte[6] ^ ecc_byte[5] ^ ecc_byte[4];
	assign ecc_cpl[2] = ecc_byte[3] ^ ecc_byte[2] ^ ecc_byte[1] ^ ecc_byte[0];
	assign ecc_cph[1] = ecc_byte[7] ^ ecc_byte[6] ^ ecc_byte[3] ^ ecc_byte[2];
	assign ecc_cpl[1] = ecc_byte[5] ^ ecc_byte[4] ^ ecc_byte[1] ^ ecc_byte[0];
	assign ecc_cph[0] = ecc_byte[7] ^ ecc_byte[5] ^ ecc_byte[3] ^ ecc_byte[1];
	assign ecc_cpl[0] = ecc_byte[6] ^ ecc_byte[4] ^ ecc_byte[2] ^ ecc_byte[0];

	// decode ecc byte positions;
	// ecc of first half on 525..527;
	// ecc of second half on 520..522;

	wire [2:0] ecc0_wsel;		// ecc0 write selects;
	wire [2:0] ecc1_wsel;		// ecc1 write selects;
	wire [2:0] ecc0_rsel;		// ecc0 byte selects;
	wire [2:0] ecc1_rsel;		// ecc1 byte selects;

	assign ecc0_wsel[0] = ecc_col[9] & (ecc_col[3:0] == 4'd12);
	assign ecc0_wsel[1] = ecc_col[9] & (ecc_col[3:0] == 4'd13);
	assign ecc0_wsel[2] = ecc_col[9] & (ecc_col[3:0] == 4'd14);
	assign ecc1_wsel[0] = ecc_col[9] & (ecc_col[3:0] == 4'd7);
	assign ecc1_wsel[1] = ecc_col[9] & (ecc_col[3:0] == 4'd8);
	assign ecc1_wsel[2] = ecc_col[9] & (ecc_col[3:0] == 4'd9);

	assign ecc0_rsel[0] = ecc0_wsel[1];
	assign ecc0_rsel[1] = ecc0_wsel[2];
	assign ecc0_rsel[2] = ecc_col[9] & (ecc_col[3:0] == 4'd15);
	assign ecc1_rsel[0] = ecc1_wsel[1];
	assign ecc1_rsel[1] = ecc1_wsel[2];
	assign ecc1_rsel[2] = ecc_col[9] & (ecc_col[3:0] == 4'd10);

	always @(posedge sysclk)
	begin
		if( ~ecc_ena)
			ecc_sel <= 0;
		else if(flc_dack)
			ecc_sel <= |{ ecc0_wsel, ecc1_wsel };
	end

	// calculate ecc syndromes;
	// ecc1* calculates and holds syndrome of last half;
	// ecc0* captures ecc1* at end of first half;
	// detection is done after the 3 ecc bytes per area have been read;
	// correction is done after completion of page read;
	// the calculated ecc is xor'ed with read ecc as ecc bytes move in;

	wire ecc_clr;			// clear ecc syndrome;
	wire ecc_rack;			// ecc read ack;
	reg ecc_ack;			// ecc byte ack;
	reg [9:0] ecc_idx;		// byte index;
	reg ecc_next;			// next ecc step;
	reg [10:0] ecc0h;		// upper ecc syndrome of first half;
	reg [10:0] ecc0l;		// lower ecc syndrome of first half;
	reg [10:0] ecc1h;		// upper ecc syndrome of second half;
	reg [10:0] ecc1l;		// lower ecc syndrome of second half;
	reg ecc0_hold;			// hold calculated ecc0;
	reg [2:0] ecc0_en;		// read ecc0 byte xor enables;
	reg [2:0] ecc1_en;		// read ecc1 byte xor enables;

	assign ecc_clr = ~ecc_ena | ecc0_hold;
	assign ecc_rack = ecc_ack & flc_xrph & ecc_col[9];

	always @(posedge sysclk)
	begin
		ecc_ack <= flc_dack;
		if(ecc_ack)
			ecc_idx <= ecc_col;
		ecc_next <= ecc_ack & ~ecc_col[9];
		ecc0_hold <= ecc_next & (ecc_idx == 10'd255);
		ecc0_en <= {3{ecc_rack}} & ecc0_rsel;
		ecc1_en <= {3{ecc_rack}} & ecc1_rsel;
		if(ecc_clr) begin
			ecc1l <= 11'd0;
			ecc1h <= 11'd0;
		end else if(ecc_next) begin
			ecc1l[2:0] <= ecc1l[2:0] ^ ecc_cpl[2:0];
			ecc1h[2:0] <= ecc1h[2:0] ^ ecc_cph[2:0];
			if(~ecc_idx[0])
				ecc1l[3] <= ecc1l[3] ^ ecc_bp;
			if(ecc_idx[0])
				ecc1h[3] <= ecc1h[3] ^ ecc_bp;
			if(~ecc_idx[1])
				ecc1l[4] <= ecc1l[4] ^ ecc_bp;
			if(ecc_idx[1])
				ecc1h[4] <= ecc1h[4] ^ ecc_bp;
			if(~ecc_idx[2])
				ecc1l[5] <= ecc1l[5] ^ ecc_bp;
			if(ecc_idx[2])
				ecc1h[5] <= ecc1h[5] ^ ecc_bp;
			if(~ecc_idx[3])
				ecc1l[6] <= ecc1l[6] ^ ecc_bp;
			if(ecc_idx[3])
				ecc1h[6] <= ecc1h[6] ^ ecc_bp;
			if(~ecc_idx[4])
				ecc1l[7] <= ecc1l[7] ^ ecc_bp;
			if(ecc_idx[4])
				ecc1h[7] <= ecc1h[7] ^ ecc_bp;
			if(~ecc_idx[5])
				ecc1l[8] <= ecc1l[8] ^ ecc_bp;
			if(ecc_idx[5])
				ecc1h[8] <= ecc1h[8] ^ ecc_bp;
			if(~ecc_idx[6])
				ecc1l[9] <= ecc1l[9] ^ ecc_bp;
			if(ecc_idx[6])
				ecc1h[9] <= ecc1h[9] ^ ecc_bp;
			if(~ecc_idx[7])
				ecc1l[10] <= ecc1l[10] ^ ecc_bp;
			if(ecc_idx[7])
				ecc1h[10] <= ecc1h[10] ^ ecc_bp;
		end else begin
			if(ecc1_en[0]) begin
				ecc1h[6] <= ecc1h[6] ^ ecc_ibyte[7];
				ecc1l[6] <= ecc1l[6] ^ ecc_ibyte[6];
				ecc1h[5] <= ecc1h[5] ^ ecc_ibyte[5];
				ecc1l[5] <= ecc1l[5] ^ ecc_ibyte[4];
				ecc1h[4] <= ecc1h[4] ^ ecc_ibyte[3];
				ecc1l[4] <= ecc1l[4] ^ ecc_ibyte[2];
				ecc1h[3] <= ecc1h[3] ^ ecc_ibyte[1];
				ecc1l[3] <= ecc1l[3] ^ ecc_ibyte[0];
			end
			if(ecc1_en[1]) begin
				ecc1h[10] <= ecc1h[10] ^ ecc_ibyte[7];
				ecc1l[10] <= ecc1l[10] ^ ecc_ibyte[6];
				ecc1h[9] <= ecc1h[9] ^ ecc_ibyte[5];
				ecc1l[9] <= ecc1l[9] ^ ecc_ibyte[4];
				ecc1h[8] <= ecc1h[8] ^ ecc_ibyte[3];
				ecc1l[8] <= ecc1l[8] ^ ecc_ibyte[2];
				ecc1h[7] <= ecc1h[7] ^ ecc_ibyte[1];
				ecc1l[7] <= ecc1l[7] ^ ecc_ibyte[0];
			end
			if(ecc1_en[2]) begin
				ecc1h[2] <= ecc1h[2] ^ ecc_ibyte[7];
				ecc1l[2] <= ecc1l[2] ^ ecc_ibyte[6];
				ecc1h[1] <= ecc1h[1] ^ ecc_ibyte[5];
				ecc1l[1] <= ecc1l[1] ^ ecc_ibyte[4];
				ecc1h[0] <= ecc1h[0] ^ ecc_ibyte[3];
				ecc1l[0] <= ecc1l[0] ^ ecc_ibyte[2];
			end
		end
		if(ecc0_hold) begin
			ecc0h <= ecc1h;
			ecc0l <= ecc1l;
		end else begin
			if(ecc0_en[0]) begin
				ecc0h[6] <= ecc0h[6] ^ ecc_ibyte[7];
				ecc0l[6] <= ecc0l[6] ^ ecc_ibyte[6];
				ecc0h[5] <= ecc0h[5] ^ ecc_ibyte[5];
				ecc0l[5] <= ecc0l[5] ^ ecc_ibyte[4];
				ecc0h[4] <= ecc0h[4] ^ ecc_ibyte[3];
				ecc0l[4] <= ecc0l[4] ^ ecc_ibyte[2];
				ecc0h[3] <= ecc0h[3] ^ ecc_ibyte[1];
				ecc0l[3] <= ecc0l[3] ^ ecc_ibyte[0];
			end
			if(ecc0_en[1]) begin
				ecc0h[10] <= ecc0h[10] ^ ecc_ibyte[7];
				ecc0l[10] <= ecc0l[10] ^ ecc_ibyte[6];
				ecc0h[9] <= ecc0h[9] ^ ecc_ibyte[5];
				ecc0l[9] <= ecc0l[9] ^ ecc_ibyte[4];
				ecc0h[8] <= ecc0h[8] ^ ecc_ibyte[3];
				ecc0l[8] <= ecc0l[8] ^ ecc_ibyte[2];
				ecc0h[7] <= ecc0h[7] ^ ecc_ibyte[1];
				ecc0l[7] <= ecc0l[7] ^ ecc_ibyte[0];
			end
			if(ecc0_en[2]) begin
				ecc0h[2] <= ecc0h[2] ^ ecc_ibyte[7];
				ecc0l[2] <= ecc0l[2] ^ ecc_ibyte[6];
				ecc0h[1] <= ecc0h[1] ^ ecc_ibyte[5];
				ecc0l[1] <= ecc0l[1] ^ ecc_ibyte[4];
				ecc0h[0] <= ecc0h[0] ^ ecc_ibyte[3];
				ecc0l[0] <= ecc0l[0] ^ ecc_ibyte[2];
			end
		end
	end

	// ecc write mux;
	// inserted into output stream when ecc is enabled;
	// 3->1 mux to select approprate ecc byte;
	// ecc_out is being inverted for odd parity;

	wire [7:0] ecc0_b0;		// ecc0 byte 0;
	wire [7:0] ecc0_b1;		// ecc0 byte 1;
	wire [7:0] ecc0_b2;		// ecc0 byte 2;
	wire [7:0] ecc0_out;	// ecc0 output byte;
	wire [7:0] ecc1_b0;		// ecc1 byte 0;
	wire [7:0] ecc1_b1;		// ecc1 byte 1;
	wire [7:0] ecc1_b2;		// ecc1 byte 2;
	wire [7:0] ecc1_out;	// ecc1 output byte;

	assign ecc0_b0 = {
		ecc0h[6], ecc0l[6], ecc0h[5], ecc0l[5],
		ecc0h[4], ecc0l[4], ecc0h[3], ecc0l[3] };
	assign ecc0_b1 = {
		ecc0h[10], ecc0l[10], ecc0h[9], ecc0l[9],
		ecc0h[8], ecc0l[8], ecc0h[7], ecc0l[7] };
	assign ecc0_b2 = {
		ecc0h[2], ecc0l[2], ecc0h[1], ecc0l[1],
		ecc0h[0], ecc0l[0], 2'b00 };
	assign ecc0_out = ({8{ecc0_wsel[0]}} & ecc0_b0)
		| ({8{ecc0_wsel[1]}} & ecc0_b1)
		| ({8{ecc0_wsel[2]}} & ecc0_b2);

	assign ecc1_b0 = {
		ecc1h[6], ecc1l[6], ecc1h[5], ecc1l[5],
		ecc1h[4], ecc1l[4], ecc1h[3], ecc1l[3] };
	assign ecc1_b1 = {
		ecc1h[10], ecc1l[10], ecc1h[9], ecc1l[9],
		ecc1h[8], ecc1l[8], ecc1h[7], ecc1l[7] };
	assign ecc1_b2 = {
		ecc1h[2], ecc1l[2], ecc1h[1], ecc1l[1],
		ecc1h[0], ecc1l[0], 2'b00 };
	assign ecc1_out = ({8{ecc1_wsel[0]}} & ecc1_b0)
		| ({8{ecc1_wsel[1]}} & ecc1_b1)
		| ({8{ecc1_wsel[2]}} & ecc1_b2);

	always @(posedge sysclk)
	begin
		if(flc_dack)
			ecc_out <= ecc0_out | ecc1_out;
	end

	// ecc error detection;
	// no error if all ecc bits are 0;
	// wallace tree takes care of no error case;

	wire [21:0] ecc0;		// ecc0 bits;
	wire [21:0] ecc1;		// ecc1 bits;

	assign ecc0 = { ecc0h, ecc0l };
	assign ecc1 = { ecc1h, ecc1l };

	// correctable single-bit error;
	// in data region if high and low syndromes are opposites;

	wire [10:0] ecc0_hlx;	// xor's of high and low syndrome;
	wire [10:0] ecc1_hlx;	// xor's of high and low syndrome;
	wire ecc0_sde;			// single-bit error in region 0;
	wire ecc1_sde;			// single-bit error in region 1;

	assign ecc0_hlx = ecc0h ^ ecc0l;
	assign ecc1_hlx = ecc1h ^ ecc1l;
	assign ecc0_sde = &ecc0_hlx;
	assign ecc1_sde = &ecc1_hlx;

	// correctable single-bit error;
	// in ecc region if only one bit is set;
	// implemented as 3 deep wallace tree;
	// also detects all bits 0;

	wire [6:0] ecc0_0c;		// level 0 carries;
	wire [6:0] ecc0_0s;		// level 0 sum;
	wire [2:0] ecc0_1c;		// level 1 carries;
	wire [2:0] ecc0_1s;		// level 1 sum;
	wire ecc0_2c;			// level 2 carry;
	wire ecc0_2s;			// level 2 sum;
	wire ecc0_c;			// any of the carries is set;

	assign { ecc0_0c[0], ecc0_0s[0] } = ecc0[0] + ecc0[1] + ecc0[2];
	assign { ecc0_0c[1], ecc0_0s[1] } = ecc0[3] + ecc0[4] + ecc0[5];
	assign { ecc0_0c[2], ecc0_0s[2] } = ecc0[6] + ecc0[7] + ecc0[8];
	assign { ecc0_0c[3], ecc0_0s[3] } = ecc0[9] + ecc0[10] + ecc0[11];
	assign { ecc0_0c[4], ecc0_0s[4] } = ecc0[12] + ecc0[13] + ecc0[14];
	assign { ecc0_0c[5], ecc0_0s[5] } = ecc0[15] + ecc0[16] + ecc0[17];
	assign { ecc0_0c[6], ecc0_0s[6] } = ecc0[18] + ecc0[19] + ecc0[20];

	assign { ecc0_1c[0], ecc0_1s[0] } = ecc0_0s[0] + ecc0_0s[1] + ecc0_0s[2];
	assign { ecc0_1c[1], ecc0_1s[1] } = ecc0_0s[3] + ecc0_0s[4] + ecc0_0s[5];
	assign { ecc0_1c[2], ecc0_1s[2] } = ecc0_0s[6] + ecc0[21];

	assign { ecc0_2c, ecc0_2s } = ecc0_1s[0] + ecc0_1s[1] + ecc0_1s[2];
	assign ecc0_c = |{ ecc0_0c, ecc0_1c, ecc0_2c };

	wire [6:0] ecc1_0c;		// level 0 carries;
	wire [6:0] ecc1_0s;		// level 0 sum;
	wire [2:0] ecc1_1c;		// level 1 carries;
	wire [2:0] ecc1_1s;		// level 1 sum;
	wire ecc1_2c;			// level 2 carry;
	wire ecc1_2s;			// level 2 sum;
	wire ecc1_c;			// any of the carries is set;

	assign { ecc1_0c[0], ecc1_0s[0] } = ecc1[0] + ecc1[1] + ecc1[2];
	assign { ecc1_0c[1], ecc1_0s[1] } = ecc1[3] + ecc1[4] + ecc1[5];
	assign { ecc1_0c[2], ecc1_0s[2] } = ecc1[6] + ecc1[7] + ecc1[8];
	assign { ecc1_0c[3], ecc1_0s[3] } = ecc1[9] + ecc1[10] + ecc1[11];
	assign { ecc1_0c[4], ecc1_0s[4] } = ecc1[12] + ecc1[13] + ecc1[14];
	assign { ecc1_0c[5], ecc1_0s[5] } = ecc1[15] + ecc1[16] + ecc1[17];
	assign { ecc1_0c[6], ecc1_0s[6] } = ecc1[18] + ecc1[19] + ecc1[20];

	assign { ecc1_1c[0], ecc1_1s[0] } = ecc1_0s[0] + ecc1_0s[1] + ecc1_0s[2];
	assign { ecc1_1c[1], ecc1_1s[1] } = ecc1_0s[3] + ecc1_0s[4] + ecc1_0s[5];
	assign { ecc1_1c[2], ecc1_1s[2] } = ecc1_0s[6] + ecc1[21];

	assign { ecc1_2c, ecc1_2s } = ecc1_1s[0] + ecc1_1s[1] + ecc1_1s[2];
	assign ecc1_c = |{ ecc1_0c, ecc1_1c, ecc1_2c };

	// map ecc results into status;
	// do not correct single-bit errors in ecc region;
	// do not process ecc for first half if it was not read;
	// 00=ok, 01=dbe, 10=sbe-cor, 11=sbe-nocor;

	wire [1:0] ecc_cor;		// enable ecc correction per half;
	reg [1:0] ecc0_sts;		// region 0 ecc status;
	reg [1:0] ecc1_sts;		// region 1 ecc status;

	assign ecc_cor[0] = ecc_ena & flc_rdph & ~flc_half;
	assign ecc_cor[1] = ecc_ena & flc_rdph;

	always @(posedge sysclk)
	begin
		ecc0_sts[1] <= ecc_cor[0] & (ecc0_sde | (~ecc0_c & ecc0_2s));
		ecc0_sts[0] <= ecc_cor[0] & ~ecc0_sde & (ecc0_c | ecc0_2s);
		ecc1_sts[1] <= ecc_cor[1] & (ecc1_sde | (~ecc1_c & ecc1_2s));
		ecc1_sts[0] <= ecc_cor[1] & ~ecc1_sde & (ecc1_c | ecc1_2s);
	end

	// ecc correction state machine;
	// read pi buffer into flb_out, xor correction term, write back;
	// correct ecc1 first, because it is available first;
	// ecc_sm:
	//	0100	ecc1 query correction status;
	//	0101	ecc1 request pi buffer read;
	//	0110	ecc1 xor correction terms;
	//	0111	ecc1 request pi buffer write;
	//	1000	ecc0 query correction status;
	//	1001	ecc0 request pi buffer read;
	//	1010	ecc0 xor correction terms;
	//	1011	ecc0 request pi buffer write;
	//	11xx	done with correction;

	wire [1:0] ecc_sbe;		// correct single-bit errors in data region;
	wire [1:0] ecc_qry;		// correction query state;
	reg [3:0] ecc_sm;		// ecc correction state machine;
	wire ecc_sm_next;		// advance ecc state machine;

	assign ecc_sbe[0] = (ecc0_sts == 2'b10);
	assign ecc_sbe[1] = (ecc1_sts == 2'b10);
	assign ecc_qry[0] = ~ecc_sm[2] & (ecc_sm[1:0] == 2'b00);
	assign ecc_qry[1] = ~ecc_sm[3] & (ecc_sm[1:0] == 2'b00);

	always @(posedge sysclk)
	begin
		if( ~flc_xeph)
			ecc_sm <= 4'b0100;
		else if(ecc_qry[1] & ~ecc_sbe[1])
			ecc_sm[3:2] <= 2'b10;
		else if(ecc_qry[0] & ~ecc_sbe[0])
			ecc_sm[3:2] <= 2'b11;
		else if(ecc_sm_next)
			ecc_sm <= ecc_sm + 1;
	end

	assign flb_ecreq = ecc_sm[0];
	assign flb_ecwr = ecc_sm[1];
	assign flb_ecen = (ecc_sm[1:0] == 2'b01);
	assign flc_ecack = (ecc_sm == 4'b1100);

	// advance correction state machine;
	// for buffer requests, acks, and correction xors;

	assign ecc_sm_next = (ecc_qry[1] & ecc_sbe[1])
		| (ecc_qry[0] & ecc_sbe[0])
		| flb_ack
		| (ecc_sm[1:0] == 2'b10);

	// ecc correction terms;
	// only data region is being corrected;
	// put into registers due to loading;

	wire [7:0] ecc0_addr;	// address of byte to correct;
	wire [7:0] ecc1_addr;	// address of byte to correct;
	wire [2:0] ecc0_bit;	// bit to correct;
	wire [2:0] ecc1_bit;	// bit to correct;
	reg [2:0] ecc_bit;		// bit to correct;
	reg [1:0] ecc_bidx;		// byte index of 32-bit word;
	reg [7:0] ecc_xor;		// byte xor correction term;
	reg [3:0] ecc_ben;		// ecc correction byte enables;
	wire ecc_xen;			// ecc xor enable;

	assign ecc0_addr = ecc0h[10:3];
	assign ecc0_bit = ecc0h[2:0];
	assign ecc1_addr = ecc1h[10:3];
	assign ecc1_bit = ecc1h[2:0];
	assign ecc_xen = ecc_ena & flc_rdph & flc_xeph;

	always @(posedge sysclk)
	begin
		ecc_bit <= ecc_sm[2]? ecc1_bit : ecc0_bit;
		ecc_bidx <= ecc_sm[2]? ecc1_addr[1:0] : ecc0_addr[1:0];
		ecc_xor[0] <= (ecc_bit == 3'd0);
		ecc_xor[1] <= (ecc_bit == 3'd1);
		ecc_xor[2] <= (ecc_bit == 3'd2);
		ecc_xor[3] <= (ecc_bit == 3'd3);
		ecc_xor[4] <= (ecc_bit == 3'd4);
		ecc_xor[5] <= (ecc_bit == 3'd5);
		ecc_xor[6] <= (ecc_bit == 3'd6);
		ecc_xor[7] <= (ecc_bit == 3'd7);
		ecc_ben[0] <= ecc_xen & (ecc_bidx == 2'd0);
		ecc_ben[1] <= ecc_xen & (ecc_bidx == 2'd1);
		ecc_ben[2] <= ecc_xen & (ecc_bidx == 2'd2);
		ecc_ben[3] <= ecc_xen & (ecc_bidx == 2'd3);
	end

	assign ecc_addr[8] = ecc_sm[2];
	assign ecc_addr[7:2] = ecc_sm[2]? ecc1_addr[7:2] : ecc0_addr[7:2];

	// expand ecc correction mask;
	// need mask for all four bytes of buffer width;
	// xor is done at write time to flb_out;

	assign flb_xor[31:24] = {8{ecc_ben[0]}} & ecc_xor;
	assign flb_xor[23:16] = {8{ecc_ben[1]}} & ecc_xor;
	assign flb_xor[15:8]  = {8{ecc_ben[2]}} & ecc_xor;
	assign flb_xor[7:0]   = {8{ecc_ben[3]}} & ecc_xor;

	// hold on to ecc status for return in ctrl register;

	reg flc_sbe;			// correctable single-bit error;
	reg flc_dbe;			// uncorrectable error;

	always @(posedge sysclk)
	begin
		if(flc_new) begin
			flc_sbe <= 1'b0;
			flc_dbe <= 1'b0;
		end else if(flc_ecack) begin
			flc_sbe <= ecc0_sts[1] | ecc1_sts[1];
			flc_dbe <= (ecc0_sts == 2'b01) | (ecc1_sts == 2'b01);
		end
	end

endmodule