recovery.tcl
11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
#
# Copyright (C) 1996-1998 by the Board of Trustees
# of Leland Stanford Junior University.
#
# This file is part of the SimOS distribution.
# See LICENSE file for terms of the license.
#
###
### Recovery statistics -- used for measuring recovery perf
###
### Dan Teodosiu, Apr 1996
### Revision history:
### - brought up to date (Dan Teodosiu, Sep. 1996)
### - small updates, dieset injection (DT, 02/97)
###
### Before sourcing this script, you should set the following variables:
### recoveryWatchOnly -- watch only, don't force recovery
### recoveryKickFast -- fast-kick recovery
### recoveryDebugRc -- add debugging annotations
### recoverySwitch -- switch modes:
### 0 -> off
### 1 -> when OS recovery starts
### 2 -> after OS recovery complete
### recoverySwitchTo -- EMBRA_PAGE, ..., EXIT
### recoveryDieSet -- dieset (optional, defaults to 0)
###
annotation set simos enter {
#
# Make recovery quiet since we don't want to wast time
# in printf.
#
if {$CPU == 0} {
for {set i 0} {$i < $PARAM(HIVE.NumCells)} {incr i} {
set vrp [symbol read "kernel$i:hive_recovery.c:&verbose_recovery"]
set MEMORY($vrp) 0
}
}
}
set rfn 0
set rtrigd 0
set nsted 0
set cpusPerCell [expr $PARAM(CPU.Count) / $PARAM(HIVE.NumCells)]
for {set i 0} {$i < $PARAM(CPU.Count)} {incr i} {
set CPUtoCell($i) [expr $i / $cpusPerCell]
}
proc getLS {n} {
global PARAM
set r ""
for {set i 0} {$i < $PARAM(HIVE.NumCells)} {incr i} {
set l [symbol read "kernel$n:hive.h:CPA->ls<$i>"]
if {[expr $l & 0x1] != 0} {
lappend r $i
}
}
return $r
}
proc printStats {i} {
global PARAM rk ls lf rs rf rb1 ra1 rb2 ra2 frs fre ps pe recoveryWatchOnly
console " RECOVERY stats for cell $i:\n"
console "\tLatency (kick->lset): [expr $ls($i)-$rk($i)]\n"
console "\tLSET: [expr $lf($i)-$ls($i)] RECV: [expr $rf($i)-$rs($i)] Total Latency: [expr $rf($i)-$ls($i)]\n"
console "\t\tRound latencies: "
for {set j 1} {$j <= $PARAM(HIVE.NumCells)} {incr j} {
console "[expr $fre($i,$j)-$frs($i,$j)] "
}
console "\n\t\tFirst ping round latency: [expr $pe($i)-$ps($i)]\n"
console "\tB1: Start: $rb1($i) Latency: [expr $ra1($i)-$rb1($i)]\n"
console "\tB2: Start: $rb2($i) Latency: [expr $ra2($i)-$rb2($i)]\n"
}
proc die {mess} { global CPU CPUtoCell CYCLES
console "RECV STATS CPU=$CPU cell=$CPUtoCell($CPU) cyc=$CYCLES: $mess\n"
### Note: due to a bug in annotations code, an annotation may fire more
### than once on a given instruction, if the instruction takes an
### exception (s.a. a TLB miss). Thus, we only print a warning
### but do not exit here.
### exit
}
### why this kludge? because cpuEnter EMBRA_PAGE is currently broken if
### called from a PC annotation...
set enterSimlPending 0
proc enterSiml {} {
global CYCLES enterSimlPending recoverySwitchTo PARAM a0 pc ra
if {$enterSimlPending == 0} {
console "RECOVERY: preparing to switch back to \$recoverySwitchTo...\n"
annotation set cycle [expr $CYCLES + 10000] {
console "RECOVERY: switching back to $recoverySwitchTo...\n"
for {set i 0} {$i < $PARAM(CPU.Count)} {incr i} {
console "CPU $i: a0=$a0, pc=$pc, ra=$ra\n"
}
cpuEnter $recoverySwitchTo
}
set enterSimlPending 1
}
}
for {set i 0} {$i < $PARAM(HIVE.NumCells)} {incr i} {
set rk($i) 0
set ls($i) 0
set lf($i) 0
set rs($i) 0
set rf($i) 0
set rb1($i) 0
set ra1($i) 0
set rb2($i) 0
set ra2($i) 0
# initialize flood-round times
set ps($i) 0
set pe($i) 0
set rnd($i) 0
for {set j 1} {$j <= $PARAM(HIVE.NumCells)} {incr j} {
set frs($i,$j) 0
set fre($i,$j) 0
}
}
#
# Set the following procs MUSTRUN on CPU 0: PING, RECV, LSET
# This should save a lot of $ misses on Flash
#
annotation set simos enter {
if {$recoveryWatchOnly == 0 && $CPU == 0} {
for {set i 0} {$i < $PARAM(HIVE.NumCells)} {incr i} {
# PING
symbol set "kernel$i:proc.h:proc<6>.p_mustrun" 0
# LSET
symbol set "kernel$i:proc.h:proc<7>.p_mustrun" 0
# RECV
symbol set "kernel$i:proc.h:proc<8>.p_mustrun" 0
}
}
}
annotation set pc kernel:hive_recovery.c:hive_kick_recovery_init:END {
log "RECV $CPU: $CYCLES hive_kick_recovery_init:END\n"
if {$rk($CPUtoCell($CPU)) == 0} {
set rk($CPUtoCell($CPU)) $CYCLES
}
if {[info exists recoverySwitch] != 0 && $recoverySwitch == 1} {
if {$recoverySwitchTo == "EXIT"} {
exit
} elseif {$PARAM(CPU.Model) != $recoverySwitchTo} {
enterSiml
}
}
}
annotation set pc kernel:hive_recovery.c:hive_LSET:lset_started {
log "RECV $CPU: $CYCLES hive_LSET:lset_started\n"
if {$ls($CPUtoCell($CPU)) != 0} { die "ls already set" }
set ls($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:hive_LSET:lset_finished {
log "RECV $CPU: $CYCLES hive_recovery.c:hive_LSET:lset_finished\n"
if {$lf($CPUtoCell($CPU)) != 0} { die "lf already set" }
set lf($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:hive_RECV:recovery_started {
log "RECV $CPU: $CYCLES hive_RECV:recovery_started\n"
if {$rs($CPUtoCell($CPU)) != 0} { die "rs already set" }
set rs($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:hive_RECV:recovery_finished {
set cell $CPUtoCell($CPU)
log "RECV $CPU: $CYCLES hive_RECV:recovery_finished\n"
if {$rf($cell) != 0} { die "rf already set" }
set rf($cell) $CYCLES
incr rfn
printStats $cell
set lset [getLS $cell]
console " RECOVERY LS($cell) is $lset\n"
if {$rfn == [llength $lset]} {
# recovery is complete
console " RECOVERY: recovery complete on all live cells.\n"
if {[info exists recoverySwitch] != 0 && $recoverySwitch == 2} {
if {$recoverySwitchTo == "EXIT"} {
exit
} elseif {$PARAM(CPU.Model) != $recoverySwitchTo} {
enterSiml
}
}
if {$recoveryWatchOnly == 0} {
### comment the following line out if you don't wish to exit
exit
}
}
}
annotation set pc kernel:hive_recovery.c:hive_RECV:recovery_before_1 {
log "RECV $CPU: $CYCLES hive_RECV:recovery_before_1\n"
if {$rb1($CPUtoCell($CPU)) != 0} { die "rb1 already set" }
set rb1($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:hive_RECV:recovery_after_1 {
log "RECV $CPU: $CYCLES hive_RECV:recovery_after_1\n"
if {$ra1($CPUtoCell($CPU)) != 0} { die "ra1 already set" }
set ra1($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:hive_RECV:recovery_before_2 {
log "RECV $CPU: $CYCLES hive_RECV:recovery_before_2\n"
if {$rb2($CPUtoCell($CPU)) != 0} { die "rb2 already set" }
set rb2($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:hive_RECV:recovery_after_2 {
log "RECV $CPU: $CYCLES hive_RECV:recovery_after_2\n"
if {$ra2($CPUtoCell($CPU)) != 0} { die "ra2 already set" }
set ra2($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:activate_ping:START {
log "RECV $CPU: $CYCLES activate_ping gen $a0\n"
if {$ps($CPUtoCell($CPU)) != 0} { die "ps already set" }
set ps($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:activate_ping:END {
log "RECV $CPU: $CYCLES activate_ping END\n"
if {$pe($CPUtoCell($CPU)) != 0} { die "pe already set" }
set pe($CPUtoCell($CPU)) $CYCLES
}
annotation set pc kernel:hive_recovery.c:flood_send_message:START {
set cell $CPUtoCell($CPU)
if {$a0 == 0 && $frs($cell,$rnd($cell)) == 0} {
log "RECV $CPU: $CYCLES starting round $rnd($cell)\n"
set frs($cell,$rnd($cell)) $CYCLES
}
}
annotation set pc kernel:hive_recovery.c:flood_round_complete:END {
set cell $CPUtoCell($CPU)
if {$v0 == 1 && $fre($cell,$rnd($cell)) == 0} {
log "RECV $CPU: $CYCLES done round $rnd($cell)\n"
set fre($cell,$rnd($cell)) $CYCLES
}
}
annotation set pc kernel:hive_recovery.c:flip_rounds:START {
set cell $CPUtoCell($CPU)
incr rnd($cell)
}
if {$recoveryWatchOnly == 0 && $recoveryKickFast == 0} {
### the following triggers recovery on the next clock tick after
### all the compiles have started
annotation set pc kernel::exece:END {
if { $rtrigd == 0 && "$PROCESS($CPU)" == "cfe" } {
console "RECV $CPU: cfe exec'd\n"
incr nsted
if {$nsted >= $PARAM(CPU.Count)} {
set vrp [symbol read "kernel0:hive_recovery.c:&force_recovery"]
### force recovery on next clock tick on cell 0
set MEMORY($vrp) 1
console "RECV TRIGGERED: $CYCLES\n"
set rtrigd 1
}
}
}
}
annotation set pc kernel:SIMMPasm.s:SimosGetDieSet:END {
if [info exists recoveryDieSet] {
# Smash v0 with dieset (bitmap of who's alive here).
# This controls rebooting.
set v0 $recoveryDieSet
}
}
if {$recoveryWatchOnly == 0 && $recoveryKickFast != 0} {
annotation set simos enter {
if {$CPU == 0} {
set vrp [symbol read "kernel0:hive_recovery.c:&force_recovery"]
### force recovery on next clock tick on cell 0
set MEMORY($vrp) 1
console "RECV TRIGGERED: $CYCLES\n"
}
}
}
if {$recoveryDebugRc != 0} {
annotation set pc kernel:hive_recovery.c:hivePingStub:START {
set cell $CPUtoCell($CPU)
log "RECV $CPU: $CYCLES hivePingStub\n"
if {0} {
set pm [symbol read "kernel0:hrpc.h:&((HrpcMessageType*)$a0)->m<0>"]
set f [symbol read "kernel0:hive_recovery.c:((PingMessage*)$pm)->from"]
set s [symbol read "kernel0:hive_recovery.c:((PingMessage*)$pm)->serial"]
set r [symbol read "kernel0:hive_recovery.c:((PingMessage*)$pm)->round"]
set g [symbol read "kernel0:hive_recovery.c:((PingMessage*)$pm)->gen"]
set cp [symbol read "kernel$cell:hive_recovery.c:&(p.c<$f>)"]
set cs [symbol read "kernel0:hive_recovery.c:((PingCell*)$cp)->p_serial"]
set cr [symbol read "kernel0:hive_recovery.c:((PingCell*)$cp)->p_round"]
log "RECV $CPU: (f=$f s=$s r=$r g=$g) ($cs $cr)\n"
}
}
annotation set pc kernel:hive_recovery.c:send_ping_message:START {
log "RECV $CPU: $CYCLES send_ping_message to $a0 is_ping $a1\n"
}
annotation set pc kernel:hive_recovery.c:flood_send_message:START {
log "RECV $CPU: $CYCLES flood_send_message to $a0\n"
}
annotation set pc kernel:hive_recovery.c:hiveFloodStub:START {
log "RECV $CPU: $CYCLES hiveFloodStub\n"
}
annotation set pc kernel:hive_recovery.c:flood_heartbeat:START {
log "RECV $CPU: $CYCLES flood_heartbeat\n"
}
annotation set pc kernel:hive_recovery.c:flip_rounds:START {
log "RECV $CPU: $CYCLES flip_rounds\n"
}
annotation set pc kernel:hive_recovery.c:activate_ping:START {
log "RECV $CPU: $CYCLES activate_ping gen $a0\n"
}
annotation set pc kernel:hive_recovery.c:initialize_flood:START {
log "RECV $CPU: $CYCLES initialize_flood\n"
}
annotation set pc kernel:hive_recovery.c:hive_flood:START {
log "RECV $CPU: $CYCLES hive_flood\n"
}
annotation set pc kernel:hive_recovery.c:deactivate_ping:START {
log "RECV $CPU: $CYCLES deactivate_ping\n"
}
annotation set pc kernel:hive_recovery.c:ping_set_cell_state:START {
log "RECV $CPU: $CYCLES ping_set_cell_state [hex $a0] $a1\n"
}
}
console " RECOVERY installed:\n"
console " recoveryWatchOnly = $recoveryWatchOnly\n"
console " recoveryKickFast = $recoveryKickFast\n"
console " recoveryDebugRc = $recoveryDebugRc\n"
if [info exists recoverySwitch] {
console " recoverySwitch = $recoverySwitch\n"
console " recoverySwitchTo = $recoverySwitchTo\n"
}