vu.c 4.84 KB
/*
 * Vector Unit instruction Simulation
 */

#include <stdio.h>
#include "graphic.h"

/*
 * These macros makes VU code looks more symbolic, it assumes
 *	i	loop ctr for VU units
 *	d	dst reg
 *	s	src reg
 *	t	2nd src
 *	e	scalar element
 *	sft	post multiply/post accumulate shift
 *	sc	where this is a scalar operation
 */

#define VD	(vu.vregs[i][d])
#define	VDs	(vu.vregs[e][d])	/* scalar D */
#define VS	(vu.vregs[i][s])
#define VT	(sc ? vu.vregs[e][t] : vu.vregs[i][t])
#define ACC	(vu.vacc[i])
#define	VCC	(vu.vcc)
#define	SFT(x)	(sft > 0 ? x << sft : x >> -sft)

static short	space;

vinit()
{
	int	i, r;

	for (i=0; i<VU_NMAC; i++) {
		vu.vacc[i] = 0;
		for (r=0; r<VU_NREG; r++)
			vu.vregs[i][r] = 0;
	}

	/* XXX make sure SPECed as hardwired 1 */
	for (i=0; i<VU_NMAC; i++)
		vu.vregs[i][1] = 1;

	vu.vcc = 0;
}

vload(int r, short *a)			/* load */
{
	int	i;

	/*
	 * register 0 = 0 read only
	 */

	if (r == 0) {
		fprintf(stderr, "vld: attemp to load register 0\n");
		exit(EXIT_FAILURE);
	}

	for (i=0; i<VU_NMAC; i++)
		vu.vregs[i][r] = *a++;
}

vstore(int r, short *a)			/* store */
{
	int	i;
	for (i=0; i<VU_NMAC; i++)
		*a++ = vu.vregs[i][r];
}

vu_mul(int s, int t, int e, int d, int double_prec, int sft, int sc)
{
	int	i;
	for (i=0; i<VU_NMAC; i++) {
		if (double_prec) {
			ACC = SFT( mul16( VS, VT));
			VD = ACC & 0x7fff;
		}
		else {
			/* round if losing precision */
			if (sft == -15) ACC =  mul16( VS, VT) + (1<<(-sft-1));
			else ACC =  mul16( VS, VT);
			VD = SFT(ACC);
		}
	}
}

vu_mac(int s, int t, int e, int d, int double_prec, int sft, int sc)
{
	int	i;
	for (i=0; i<VU_NMAC; i++) {
		if (double_prec) {
			ACC += SFT( mul16( VS, VT));
			VD = ACC & 0x7fff;
		}
		else {
			ACC +=  mul16( VS, VT);
			VD = SFT(ACC);
		}
	}
}

vu_mulu() {}
vu_macu() {}

vu_div(int s, int t, int e, int d, int sc)	/* div, scalar T */
{
	float	r;
	short	i, f;

	if (VT == 0) {
		fprintf(stderr, "divide by 0\n");
/*
		exit(EXIT_FAILURE);
*/		return;
	}
/*
	r = 1/(float)VT;
*/
{
int n, m;
	n = VT;
	divide_seed( n, &m);
	r = (float)m * ((float)1/DIV_RECP_FRAC);
}
	ftos(r, &i, &f);
	f |= (r < 0) ? 0x8000 : 0;		/* do a signed fraction */
	VDs = f;
}

vu_divd(int s, int t, int e, int d, int high, int sc)	/* double div */
{
	static	int	i;
	double	r;
	int	tmp;
	static short *h;

	if (high) {
		i = VT << 15;
		h = &(VDs);
	}
	else {
		i |= VT;
		if (i == 0) {
			fprintf(stderr, "divide by 0\n");
/*
			exit(EXIT_FAILURE);
*/			return;
		}
/*
	tmp = (int)((double)0x40000000/(double)i);
*/
{
int n, m;
	n = i;
	divide_seed( n, &m);
/* used to be 30 bit fraction, divide seed is 31 bit fraction */
	divide_newt( n, m, &m);
	tmp = (int)((double)0x40000000 * 
		(float)m * ((float)1/DIV_RECP_FRAC));
}
		*h = tmp >> 15;
		VDs = tmp & 0x7fff;
	}
}

vu_select(int s, int t, int e, int cc, int sc)
{
	int	i;
	for (i=0; i<VU_NMAC; i++) {
		switch (cc) {
		case SEL_LT:
		case SEL_LE:
		case SEL_EQ:
		case SEL_NE:
		case SEL_GE:
		case SEL_GT:
		case SEL_MERGE:
			break;
		case SEL_CL:
			if (VS <= -VT) VCC |= (1<<(2*i)); else VCC &= ~(1<<(2*i));
			if (VS >= VT) VCC |= (1<<(2*i+1)); else VCC &= ~(1<<(2*i+1));
			break;
		}
	}

	/* mask out the clip code compares with element e againest itself */
	/* basically, avoid w <= w and w >= w */
	if (cc == SEL_CL) VCC &= ~(0x3 << (2*e));
}

/*-----------------------------------------------------------*/

/*
 * Interface routines to map simulator data structures into VU
 */

vloadmtx(int r, struct Matrix *mtx)
{
	short	si[4], sf[4];
	int	col, row;

	for (row=0; row<4; row++) {
	for (col=0; col<4; col++) {
		ftos(mtx->m[row][col], &(si[col]), &(sf[col]));
	}
	vload(r++, si);
	vload(r++, sf);
	}
}

vloaddc(int r, struct Image_space *is)
{
	short	dcs[4], dct[4], frac[4];
	float	f;

	ftos(is->sx, &(dcs[0]), &(frac[0]));
	ftos(is->sy, &(dcs[1]), &(frac[1]));
	ftos(is->sz, &(dcs[2]), &(frac[2]));
	ftos(1.0, &(dcs[3]), &(frac[3]));
	ftos(is->tx, &(dct[0]), &(frac[0]));
	ftos(is->ty, &(dct[1]), &(frac[1]));
	ftos(is->tz, &(dct[2]), &(frac[2]));
	ftos(1.0, &(dct[3]), &(frac[3]));

	vload(r++, dcs);
	vload(r++, dct);
}

/*
 * convert double precision (float and int) to short routines
 */

void ftos(float f, short *si, short *sf)
{
	int	i;

	i = (int)(f * 0x8000 + 0.5);	/* scale up for 15 frac bits */
	*sf = i & 0x7fff;		/* get the fraction, positive number */
	*si = i >> 15;			/* get the int, signed number */
}

void itos(int i, short *si, short *sf)
{
	*sf = i & 0x7fff;	/* get the fraction, positive number */
	*si = i >> 15;		/* get the int, signed number */
}

/*
 * convert short pair to float
 */

void stof(short si, short sf, float *f)
{
	*f = (float)si + (float)(sf)/(float)(0x8000);
}

/*
 * print VU register set as a familiar floating point matrix
 */
void vprintmtx(int r)
{
	int row, col;
	float f;

	for (row = 0; row < 4; row++) {
		for (col=0; col<4; col++) {
			stof(vu.vregs[col][r], vu.vregs[col][r+1], &f);
			printf("%8.3f ", f);
		}
		printf("\n");
		r += 2;
	}
}