all.c 2.75 KB

Raw Blame History Permalink

/*

	C-code sketch of how RSP microcode will process a macroblock.

	Different versions will make different I/O and computational
	assumptions.

    Version #1 assumptions:
	Macroblocks are un-tokenized into a mbdata[NBLKS][64] area of 16-bit
	words in DMEM.  Which blocks are non-zero is in CBP in header.

	iquant and idct routines work on single blocks per call.

	No longer true:
	    motion-compensation and reference combining (F+B) occur
	    all at once in a separate routine. DMAs are assummed to
	    already be complete (results are inputs).
*/

typedef struct mb_hdr_struct
    {
	short mbtype,
	      cbp,
	      quant,
	      mvfx, mvfy,
	      mvbx, mvby,
	      block_num;    /* sMMMMMMMMMMMbbbb	s=sign (<0 implies new MB)
						M=Macroblock number (0-2047)
						b=block number within MB */

    } MB_header;

typedef struct mb_struct
    {
	MB_header header;

	short mbdata[NBLKS][64];

	unsigned char fyref[MC_YTILE_SIZE];
	unsigned char fuvref[MC_UVTILE_SIZE];

	unsigned char byref[MC_YTILE_SIZE];
	unsigned char buvref[MC_UVTILE_SIZE];
    } MacroBlock;


static short prev_quant;
static short prev_mbtype;

static MB_header mbh;

/*

    Process a Macroblock
*/

proc_mb()
{

    quant = mbh.quant;
    mbtype = mbh.mbtype;

    if( ((mbtype & MBTYPE_INTRA) != (prev_mbtype & MBTYPE_INTRA)) ||
	( quant != prev_quant ) )
	    iquant_calc_new_qmat( quant, mbtype & MBTYPE_INTRA );

    if( (mbtype & MBTYPE_INTRA) == 0 )
	mc_calc_dma();

    for(i=0; i<NBLKS; i++)
	if( cbp & (1<<i) ) {
	    mc_check_dma( i-1, cbp );
	    iquant(i);
	    idct(i);
	    if( mc_dma_done_flag )
		mc_combine( i );
	    else
		idct_save(i);
	};

    while( mc_dma_done_flag != 1 )
	mc_check_dma( i-1, cbp );

    recon_save();
}


/*
    MC can be done in many different ways.  Two key charateristics
    to try to optimize are: (1) minimize DMEM use, and (2) minimize
    time (typically DMA xfers, VReg I/O)

    To minimize DMA xfers, load horizontally adjacent tiles in 1 DMA
    (but this transfers more than the necessary amount of data) Also,
    UV MB data could be interleaved with Y MB data.  This gets 2 DMAs
    per reference direction instead of 8. Actually, UV doesn't need
    to be interleaved, it could just follow Y. But then, ALL of the
    UV tiles will be fetched, instead of just the necessary subsets.

    To Minimize DMEM use, DMA should be scheduled to fit into recently
    vacated areas of DMEM.  For example, after For_Luma_mc is done, 18*16
    bytes are freed up, su load the Bak_Luma_ref_tiles here.

    To minimize DMEM-Vregs I/O, do For_mc and Bak_mc 4-lines at a time
    alternatively, average, then write out to DMEM. After MC is done,
    IDCT results (still in Vregs) can be added to reference.
*/

mc_calc_dma()
{
}

mc_check_dma()
{
}

mc_combine()
{
}