nidct.c 8.63 KB
#include "nidct.h"
#include <assert.h>
#include <stdio.h>
#include <math.h>

static double A0[4][4];
static double A1[4][4];
static double C0[8][8];

static Int16 i16_A0[4][4];
static Int16 i16_A1[4][4];
static Int16 i16_C0[8][8];

static int initFlag = 0;

int CheckOverflow(Int64 val);
Int16 convert2sf(double in);

void init_A0(void) {
    
    A0[0][0]= 0.5*cos(M_PI/4.0); A0[0][1]=  0.5*cos(M_PI/4.0); A0[0][2]=  0.5*cos(M_PI/8.0); A0[0][3]=  0.5*sin(M_PI/8.0); 
    A0[1][0]= 0.5*cos(M_PI/4.0); A0[1][1]= -0.5*cos(M_PI/4.0); A0[1][2]=  0.5*sin(M_PI/8.0); A0[1][3]= -0.5*cos(M_PI/8.0);
    A0[2][0]= 0.5*cos(M_PI/4.0); A0[2][1]= -0.5*cos(M_PI/4.0); A0[2][2]= -0.5*sin(M_PI/8.0); A0[2][3]=  0.5*cos(M_PI/8.0);
    A0[3][0]= 0.5*cos(M_PI/4.0); A0[3][1]=  0.5*cos(M_PI/4.0); A0[3][2]= -0.5*cos(M_PI/8.0); A0[3][3]= -0.5*sin(M_PI/8.0);
}

void init_A1(void) {

    A1[0][0]= 0.5*sin(M_PI/16.0); A1[0][1]= 0.5*cos( (3.0*M_PI)/16.0 ); A1[0][2]= -0.5*sin((3.0*M_PI)/16.0);  A1[0][3]= -0.5*cos(M_PI/16.0);
    
    A1[1][0]= 0.5*cos(M_PI/4.0)*( cos(M_PI/16.0) - sin(M_PI/16.0) );
    A1[1][1]= 0.5*cos(M_PI/4.0)*( cos((3.0*M_PI)/16.0) - sin((3.0*M_PI)/16.0) );
    A1[1][2]= 0.5*cos(M_PI/4.0)*( -sin((3.0*M_PI)/16.0) - cos((3.0*M_PI)/16.0) );
    A1[1][3]= 0.5*cos(M_PI/4.0)*( cos(M_PI/16.0) + sin(M_PI/16.0) ); 
    
    A1[2][0]= 0.5*cos(M_PI/4.0)*( sin(M_PI/16.0) + cos(M_PI/16.0) ); 
    A1[2][1]= 0.5*cos(M_PI/4.0)*( -sin((3.0*M_PI)/16.0) - cos((3.0*M_PI)/16.0) );
    A1[2][2]= 0.5*cos(M_PI/4.0)*( sin((3.0*M_PI)/16.0) - cos((3.0*M_PI)/16.0) );
    A1[2][3]= 0.5*cos(M_PI/4.0)*( sin(M_PI/16.0) - cos(M_PI/16.0) ); 
    
    A1[3][0]= 0.5*cos(M_PI/16.0); A1[3][1]= 0.5*sin((3.0*M_PI)/16.0); A1[3][2]= 0.5*cos((3.0*M_PI)/16.0); A1[3][3]= 0.5*sin(M_PI/16.0);
}

void
init_C0(void) {
    int i;
    int	j;
    double km;
    
    for (i = 0; i < 8; i++) {
	for (j = 0; j < 8; j++) {
	    if (j == 0) {
		 km = 1.0 / sqrt(2.0);
	    } else {
		 km = 1.0;
	    }
	    C0[i][j] = 0.5 * km *  cos( ((2.0*(double)i + 1.0) * (double)j * M_PI)/16.0 );
	}
    }
  }

void
InitCoefs(void)
{
    int	i;
    int	j;
    

    /* Initialize the matrices and convert to 16-bit signed fraction data type */

    
    init_A0();
    init_A1();
    init_C0();
    
    /* convert matrices to signed fraction data types */
    
    for (i=0;i<4;i++) {
	for (j=0;j<4;j++) {
	    i16_A0[i][j] = convert2sf( 2*A0[i][j] );
	    i16_A1[i][j] = convert2sf( 2*A1[i][j] );
	}
    }
    for (i = 0; i < 8; i++) {
	for (j = 0; j < 8; j++) {
	    i16_C0[i][j] = convert2sf(C0[i][j]);
	}
    }

#ifdef PRINT_MAT
    printf("i16_A0 = {");
    for (i=0;i<4;i++) {
	printf("%s\n\t", (i!=0)?",":"");
	for (j=0;j<4;j++)
	    printf("%5d%s", i16_A0[i][j], (j==3)?"":", ");

	printf("%s", (j==3)?"":", ");
    };
    printf("};\n\n");

    printf("i16_A1 = {");
    for (i=0;i<4;i++) {
	printf("%s\n\t", (i!=0)?",":"");
	for (j=0;j<4;j++)
	    printf("%5d%s", i16_A1[i][j], (j==3)?"":", ");

	printf("%s",(j==3)?"":", ");
    };
    printf("};\n\n");
#endif
}


/*
   FIXED POINT Implementation of "Fast" Algorithm
*/

void new_fixed_fast_idct_8x1(Int16* in_ptr, Int16* out_ptr, int extraShift) {
    
    Int64 x0[4];
    Int64 x1[4];
    double fx0[4];
    double fx1[4];
    
    Int64 x10[4];
    Int64 x11[4];
    double fx10[4];
    double fx11[4];
    
    Int16 i16_x10[4];
    Int16 i16_x11[4];
    
    Int64 x20[8];
    Int64 test;
    Int64 save;
    double fx20[8];

    Int64  sum;
    double fsum;
    int i,j;
    int	shiftAmt;
    
    
    /* bit reversed addressing */
    
    x0[0] = in_ptr[0] ;	
    x0[1] = in_ptr[4] ;
    x0[2] = in_ptr[2] ;
    x0[3] = in_ptr[6] ;
    
    x1[0] = in_ptr[1] ;
    x1[1] = in_ptr[5] ;
    x1[2] = in_ptr[3] ;
    x1[3] = in_ptr[7] ;

#ifdef IN_FLOAT
    fx0[0] = (double)(in_ptr[0] / 16);	
    fx0[1] = (double)(in_ptr[4] / 16);
    fx0[2] = (double)(in_ptr[2] / 16);
    fx0[3] = (double)(in_ptr[6] / 16);
    
    fx1[0] = (double)(in_ptr[1] / 16);
    fx1[1] = (double)(in_ptr[5] / 16);
    fx1[2] = (double)(in_ptr[3] / 16);
    fx1[3] = (double)(in_ptr[7] / 16);
#endif    
    
    /* stage 1:  perform two (4x4)*(4x1) matrix multiplies */
    
    for (i=0;i<4;i++) {
	sum= 0;
#ifdef IN_FLOAT
	fsum = 0;
#endif    
	for (j=0;j<4;j++) {
	   sum += ((Int64)i16_A0[i][j] * (Int64)x0[j] );
#ifdef IN_FLOAT
	   fsum += A0[i][j] * fx0[j];
#endif    
	}
	x10[i] = sum;
#ifdef IN_FLOAT
	fx10[i] = fsum;
#endif    
    }
    
    
    for (i=0;i<4;i++) {
	sum= 0;
#ifdef IN_FLOAT
	fsum = 0;
#endif    
	for (j=0;j<4;j++) {
	    sum += ((Int64)i16_A1[i][j] * (Int64)x1[j] );
#ifdef IN_FLOAT
	   fsum += A1[i][j] * fx1[j];
#endif    
	}
	x11[i] = sum;
#ifdef IN_FLOAT
	fx11[i] = fsum;
#endif    
    }

    /*
       Round
    */
    shiftAmt = 15;
    for (i=0;i<4;i++) {
    
	x10[i] = DumbRound(x10[i], shiftAmt, Int64);
	x11[i] = DumbRound(x11[i], shiftAmt, Int64);
	 	
 	assert(CheckOverflow(x10[i]));
	assert(CheckOverflow(x11[i]));
   }
      
    /* stage 2: perform butterfly operatons */
    
    x20[0] = x10[0] + x11[3];   /* 0,7 bfly */
    x20[1] = x10[1] + x11[2];   /* 1,6 bfly */
    x20[2] = x10[2] + x11[1];   /* 2,5 bfly */
    x20[3] = x10[3] + x11[0];   /* 3,4 bfly */
    x20[4] = x10[3] - x11[0];   /* 3,4 bfly */
    x20[5] = x10[2] - x11[1];   /* 2,5 bfly */
    x20[6] = x10[1] - x11[2];   /* 1,6 bfly */
    x20[7] = x10[0] - x11[3];   /* 0,7 bfly */
    
    shiftAmt = 16 - shiftAmt + (extraShift?4:0);
    for (i = 0; i < 8; i++) {
        assert(CheckOverflow(x20[i]));
	x20[i] = Round(x20[i], shiftAmt, Int64);
	out_ptr[i] = x20[i];
    }

#ifdef IN_FLOAT
    fx20[0] = ((fx10[0] + fx11[3]) );   /* 0,7 bfly */
    fx20[1] = ((fx10[1] + fx11[2]) );   /* 1,6 bfly */
    fx20[2] = ((fx10[2] + fx11[1]) );   /* 2,5 bfly */
    fx20[3] = ((fx10[3] + fx11[0]) );   /* 3,4 bfly */
    fx20[4] = ((fx10[3] - fx11[0]) );   /* 3,4 bfly */
    fx20[5] = ((fx10[2] - fx11[1]) );   /* 2,5 bfly */
    fx20[6] = ((fx10[1] - fx11[2]) );   /* 1,6 bfly */
    fx20[7] = ((fx10[0] - fx11[3]) );   /* 0,7 bfly */
#endif
    
}


void fixed_direct_idct_8x1(Int16* in_ptr, Int16* out_ptr, int extraShift) {
    
    int n,m;
    Int64 sum;
    double fsum;
    int	shiftAmt;
    int fin_ptr[8];
    
    shiftAmt = 15 + (extraShift?4:0);
#ifdef IN_FLOAT
    for (n = 0; n < 8; n++) {
	fin_ptr[n] = ((double)in_ptr[n]) / 16;
    }
#endif
    for (n = 0; n < 8; n++) {
	sum = 0;
#ifdef IN_FLOAT
	fsum = 0;
#endif
	for (m = 0; m < 8; m++) {
	    sum += in_ptr[m] * i16_C0[n][m];
#ifdef IN_FLOAT
	    fsum += fin_ptr[m] * C0[n][m];
#endif
	}
	sum = DumbRound(sum, shiftAmt, Int64);
	assert(CheckOverflow(sum));
	out_ptr[n] = sum;
    }
}



Int16 convert2sf(double in) {
    Int32   ival;
    assert(fabs(in) < 1);
    if (in > 0) {
	in *= 0x8000;
	ival = (Int16)(in + 0.5);
    } else {
	in += 1;
	in *= 0x8000;
	ival = (Int16)(in + 0.5);
	ival |= 0xffff8000;		        /* set the sign bits */
    }
    assert(ival < ((Int16)(0x7fff)));
    assert(ival > ((Int16)(0x8000)));
    return ival;
}

void dprintres(double* x) {

    int i;
    printf("\n");
    for ( i=0; i< 8; i++) {
	printf("%lf  ",x[i]); 
    }
    printf("\n");
}

void iprintres(Int16* x) {

    int i;
    printf("\n");
    for ( i=0; i< 8; i++) {
	printf("%-8d  ",x[i]); 
    }
    printf("\n");
}

void printin(double* x) {
    
    int i;
    printf("\n");
    for ( i=0; i< 8; i++) {
	printf("%lf  ",x[i]); 
    }
}

CheckOverflow(Int64 val)
{
    if ((val & 0x8000) && ((val & 0xffffffffffff8000) != 0xffffffffffff8000)) {
	return 0;
    } else if ((!(val & 0x8000)) && ((val & 0xffffffffffff8000) != 0)) {
	return 0;
    }
    return 1;
}




void IDct8x8_fastInt16(Int16 block[8][8]) {
    
    Int16 Int16Block[8][8];
    Int16 Int16TempBlock[8][8];
    Int16 Int16Temp;
    int i,j;

    FILE *src;
    FILE *dst;

    if (!initFlag) {
        InitCoefs();
	initFlag = 1;
      }

    for (i=0;i<8;i++) 
       for (j=0;j<8;j++) 
            Int16Block[i][j] = block[j][i] << 4;
    
    
    /*  Perform Row-wise IDCT */
    
    for (i=0;i<8;i++) {
        new_fixed_fast_idct_8x1(&Int16Block[i][0] , &Int16TempBlock[i][0], FALSE);    }
    
    /*  Transpose the Block */
    
    for (i=0;i<8;i++) {
       for (j=0;j<8;j++) {
            Int16Block[i][j] = Int16TempBlock[j][i] ;
       }
    }
    
    /*  Perform Row-wise IDCT on the transposed Block */
    
    for (i=0;i<8;i++) {
        new_fixed_fast_idct_8x1(&Int16Block[i][0] , &Int16TempBlock[i][0], TRUE);
    }

    
    /*  Clamp */
    for (i=0;i<8;i++) {
        for (j=0;j<8;j++) {
            Int16Temp = Int16TempBlock[i][j];
            if ( Int16Temp > 255 ) {
                 Int16Temp = 255;
            }
            if (Int16Temp < -256) {
                 Int16Temp = -256;
            }
            block[i][j] = Int16Temp;       
        }
    }
    
}