msp_light.txt
2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
light
/* ??? need better specular exponent. linear approximation?
/* piecewise linear approximation: each segment is
vmuls( cos, a, N, 0); /* segment = cos * a + b;
vmacs( con1, b, N, segN);
...
vcmp_le( cos, c, N); /* use seg when cos less than threshhold */
vmerge( cos, segN, out);
/* or about 4 clocks per segment (except first is cos and last is zero)
so 5 segments is only 15 clocks + noop on out */
/* register usage */
nx, ny, nz; /* vectors of normal vector components */
rx, ry, rz; /* vectors of reflection vector components */
mtxa, mtxb, mtxc; /* rotation matrix */
lit; /* light vector */
amb, dif, spc; /* component colors weighted by contribution */
con1, con2; /* constants -2, 1 */
cos; /* cosine */
or, og, ob; /* output color */
/* load, can be parallel or reuse */
vload( inputx, nx);
/* repeat nx, nz */
vload( matrixa, mtxa);
/* repeat b, c */
vload( light, lit);
/* repeat amb, dif, spc, con1, con2 */
/* transform */
vmuls( nx, mtxa, 0, 0);
vmacs( ny, mtxb, 0, 0);
vmacs( nx, mtxc, 0, nx);
/* repeat nx, nz */
/* compute reflection vector at infinity */
/* 2 noop */
vmuls( nz, con2, 0, rz); /* nz * -2 */
/* dot light and normal */
/* 3 noop for nx */
vmuls( nz, lit, 2, 0);
vmacs( ny, lit, 1, 0);
vmacs( nx, lit, 0, cos);
/* reflection vector */
/* 2 noop for rz */
vmul( nx, rz, rx);
vmul( ny, rz, ry);
vmul( nz, rz, 0);
vmacs( con1, con1, 1, rz); /* nz * rz + 1 */
/* diffuse color */
/* 2 noop for cos */
vmuls( cos, dif, 0, 0);
vmacs( con1, dif, 0, or);
vmuls( cos, dif, 1, 0);
vmacs( con1, dif, 1, og);
vmuls( cos, dif, 2, 0);
vmacs( con1, dif, 2, ob);
/* dot light and relfect */
vmuls( rx, lit, 0, 0);
vmacs( ry, lit, 1, 0);
vmacs( rz, lit, 2, cos);
/* specular exponent ?? */
/* 8 noop for cos */
vstore( cos);
/* su does N times (load, add, load, store) table lookup */
vload( cos);
/* specular color */
/* 6 clocks */
/* store color */
/* 6 noop for out */
/* 3 clocks */
/* totals
load 11 can be parallel
trans 9
refl 5
dif 9
exp 34? with linear approx?
spc 9
store 3
noop 23
total 94
or about 12 clocks per vertex if 8 in parallel
*/