msp_light.txt 2.12 KB

light

/* ??? need better specular exponent. linear approximation?
/* piecewise linear approximation: each segment is
	vmuls( cos, a, N, 0);	/* segment = cos * a + b;
	vmacs( con1, b, N, segN);
	...
	vcmp_le( cos, c, N);	/* use seg when cos less than threshhold */
	vmerge( cos, segN, out);
/* or about 4 clocks per segment (except first is cos and last is zero)
	so 5 segments is only 15 clocks + noop on out */

/* register usage */
	nx, ny, nz;	/* vectors of normal vector components */
	rx, ry, rz;	/* vectors of reflection vector components */
	mtxa, mtxb, mtxc;	/* rotation matrix */
	lit;		/* light vector */
	amb, dif, spc;	/* component colors weighted by contribution */
	con1, con2;	/* constants -2, 1 */
	cos;		/* cosine */
	or, og, ob;	/* output color */

/* load, can be parallel or reuse */
        vload( inputx, nx);
	/* repeat nx, nz */
        vload( matrixa, mtxa);
	/* repeat b, c */
        vload( light, lit);
	/* repeat amb, dif, spc, con1, con2 */
/* transform */
	vmuls( nx, mtxa, 0, 0);
	vmacs( ny, mtxb, 0, 0);
	vmacs( nx, mtxc, 0, nx);
	/* repeat nx, nz */
/* compute reflection vector at infinity */
	/* 2 noop */
	vmuls( nz, con2, 0, rz);	/* nz * -2 */
/* dot light and normal */
	/* 3 noop for nx */ 
	vmuls( nz, lit, 2, 0);
	vmacs( ny, lit, 1, 0);
	vmacs( nx, lit, 0, cos);
/* reflection  vector */
	/* 2 noop for rz */
	vmul( nx, rz, rx);
	vmul( ny, rz, ry);
	vmul( nz, rz, 0);
	vmacs( con1, con1, 1, rz);	/* nz * rz + 1 */
/* diffuse color */
	/* 2 noop for cos */
	vmuls( cos, dif, 0, 0);
	vmacs( con1, dif, 0, or);
	vmuls( cos, dif, 1, 0);
	vmacs( con1, dif, 1, og);
	vmuls( cos, dif, 2, 0);
	vmacs( con1, dif, 2, ob);
/* dot light and relfect */
	vmuls( rx, lit, 0, 0);
	vmacs( ry, lit, 1, 0);
	vmacs( rz, lit, 2, cos);
/* specular exponent ?? */
	/* 8 noop for cos */
	vstore( cos);
	/* su does N times (load, add, load, store) table lookup */
	vload( cos);
/* specular color */
	/* 6 clocks */
/* store color */
	/* 6 noop for out */
	/* 3 clocks */ 

/* totals
	load	11	can be parallel 
	trans	 9
	refl	 5
	dif	 9
	exp	34?	with linear approx?
	spc	 9
	store	 3
	noop	23

	total	94
or about 12 clocks per vertex if 8 in parallel
*/