asm_support.s
8.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/*
* Copyright (C) 1996-1998 by the Board of Trustees
* of Leland Stanford Junior University.
*
* This file is part of the SimOS distribution.
* See LICENSE file for terms of the license.
*
*/
#include <asm.h>
#include <sys/regdef.h>
#define CACH_SD 0x3 /* secondary data cache */
#define C_HINV 0x10 /* hit invalidate (all) */
#define C_HWBINV 0x14 /* hit writeback inv. (d, sd) */
/* These labels are made external so that I can set backdoor annotations
on synchronization entry and exit */
.globl _Solo_Barrier_End_1
.globl _Solo_Barrier_End_2
.globl _Solo_Lock_End
.globl _Solo_Unlock_End
/*
* This file contains support routines for the solo runtime. It is
* intended to be linked in with the program being run. Note that
* it should be linked after all the application code and before
* libc.
*/
/*
* unsigned long long _SoloLoadDouble(unsigned long long *addr) ;
*
* Perform a load double to the specified address. Assumes a
* "broken" ld instruction.
*
*/
LEAF(_LoadDouble)
/* This assumes ld does a laod into v0 and v1 */
ldc2 v0,(a0)
j ra
END(_LoadDouble)
/*
* void _StoreDouble(unsigned long long *addr, unsigned long long value) ;
*
* Perform a load double to the specified address. Assumes a
* "broken" ld instruction.
*
*/
LEAF(_StoreDouble)
/* This assumes sd does astore from a2 and a3 */
sdc2 a2, (a0)
j ra
END(_StoreDouble)
/*
* void _Sync(void)
*
* Do a sync instruction
*
*/
LEAF(_Sync)
sync
j ra
END(_Sync)
/*
* void _Prefetch(char *addr)
*
* Prefetch an address
*/
LEAF(_Prefetch)
pref 0,(a0)
j ra
END(_Prefetch)
/*
* void _PrefetchX(char *addr)
*
* Prefetch exclusive an address
*/
LEAF(_PrefetchX)
pref 1,(a0)
j ra
END(_PrefetchX)
/*
* void _FlushLine(char *addr)
*/
LEAF(_FlushLine)
.set noreorder
cache CACH_SD|C_HINV, 0(a0)
.set reorder
j ra
END(_FlushLine)
/*
* void _LockEnter(unsigned int *lockPtr);
*
* Acquire a lock. Two different versions are provided. By default,
* the backoff version is used
*/
#if 0
LEAF(_LockEnter) # This version provides no backoff
_Locktry:
ll a1,(a0)
bne a1,zero,_Locktry
li a2,1
sc a2,(a0)
beq a2,zero,_Locktry
__Lock_End:
j ra
END(_LockEnter)
#endif
#if 1
LEAF(_LockEnter)
/* This version is a smarter way to do exponential backoff: I only backoff when
the SC fails, so that I'm spinning with low latency until I really experience
contention */
/* a2 is used to track the number of retries -- used for exponential backoff */
move a2,a0
li a3,0 # The backoff indicator
_Locktry:
ll a1,(a2)
bne a1,zero,_Locktry
_Lockgrab:
li a0,1
sc a0,(a2)
bne a0,zero,__Lock_End
/*# if SC succeeds, finish up*/
/* Hardcode "pause" system call to implement random backoff for LL --
similar to the CheckPlatform implementation below */
_LLBackoff:
li v0, 1029
move a0, a3 # Copy current backoff time
syscall
add a3,a3,1 # Bump backoff time
b _Locktry
__Lock_End:
j ra
END(_LockEnter)
#endif
/*
* void _UnLock(unsigned int *lockPtr);
*
* Release a lock.
*
*/
LEAF(_Unlock)
sync
sw zero,(a0)
__Unlock_End:
j ra
END(_Unlock)
/*
* Object:
* _Barrier(Barrier *barrier, int num_procs) ;
*
* This provides an effeceint barrier
*
* Arguments:
* a0 barrier address (count of processes in the barrier )
* a0 + 4 Null byte
* a1 Number of waiters at the barrier
*/
/*
* These offset assumes that lock+0 and lock+4 are in the same
* cache line while lock+128 is in a difference cache line.
*/
#define COUNT_OFFSET 0
#define PREFETCH_OFFSET 4
#define GEN_OFFSET 128 /* On next cache line */
/* #define PREFETCH_ON_LL */
/*
* _Brrier(Barrier *barrier, int num_procs)
*/
LEAF(_Barrier)
sync
/*
* Read the generation number for this barrier. This memory
* location will be changed when all have arrived.
*/
lw t0, GEN_OFFSET(a0)
loop:
/*
* count = LOAD_LINKED(counter);
* if (count == 2*num_proc) count = 0;
* STORE_CONDITIONAL(counter, count);
*/
#ifdef PREFETCH_ON_LL
sb zero, PREFETCH_OFFSET(a0)
#endif
ll v0, COUNT_OFFSET(a0)
addu v0, v0, 1
mul a2, a1, 2
bne v0, a2, 1f
move v0, zero
1:
move t1, v0
sc v0, COUNT_OFFSET(a0)
beq v0, 0, loop
/*
* If this is the last process to arrive, release the rest.
* if ((count == 0) || (count == num_proc)) goto release
*/
beq t1, 0, release
beq t1, a1, release
/*
* while(my_gen == *cur_gen) continue;
*/
wait:
lw t1, GEN_OFFSET(a0)
beq t0, t1, wait
__Barrier_End_1:
j ra
/*
* cur_gen++; // Release all waiters
*/
release:
addu t0, t0, 1
sw t0, GEN_OFFSET(a0)
__Barrier_End_2:
j ra
END(_Barrier)
/* I've hardcoded "sync" to be the syscall of choice. It's number is
1036, and I hardocde its return value in mipsy. */
LEAF(_CheckPlatform)
li v0, 1036
syscall
j ra
END(_CheckPlatform)
LEAF(_Exit)
li v0, 1001
syscall
j ra
END(_SoloExit)
/*
* int _FetchAndStore(int *d, int val)
* Atomically store val into the memory pointed to by d, and return the value
* previously held by d
*/
LEAF(_FetchAndStore)
.set noreorder
li a3,0 # The backoff indicator
move a2,a0 # Copy argument
pref 1,(a2) # Prefetch exclusive the lock destination
_FASTry:
add t1, a1, zero # Set up for loop
ll t0,(a2) # Get old value
sc t1,(a2) # Store new value
nop
bne t1,zero,_FASEnd # Leave if SC succeeded
/* Hardcode "pause" system call to implement random backoff for LL --
similar to the CheckPlatform implementation below */
## Exponential backoff
li v0, 1029
move a0, a3 # Copy current backoff time
syscall # Back off a random amount
add a3,a3,1 # Bump backoff time
b _FASTry
_FASEnd:
add v0, t0, zero # return old value
j ra
nop
END(FetchAndStore)
.set reorder
/*
* int _CompareAndSwap(int *d, int old, int new)
* Atomically perform this operation:
* if (*d == old) { *d = new; return 1;} else { return 0; }
*/
LEAF(_CompareAndSwap)
.set noreorder
li a3,0 # The backoff indicator
move t2,a0 # Copy argument
pref 1,(t2) # Prefetch exclusive the lock destination
_CASTry:
add t1, a2, zero # Set up for loop
ll t0,(t2) # Get old value
nop
bne t0, a1, _CASFail
# Check if *d == old
nop
sc t1,(t2) # Store new value
nop
bne t1,zero,_CASEnd # Leave if SC succeeded
/* Hardcode "pause" system call to implement random backoff for LL --
similar to the CheckPlatform implementation below */
## Exponential backoff
li v0, 1029
move a0, a3 # Copy current backoff time
syscall # Back off a random amount
add a3,a3,1 # Bump backoff time
b _CASTry
_CASEnd:
li v0, 1 # Return success
j ra
nop
_CASFail:
add v0, zero, zero # Return failure
j ra
nop
END(CompareAndSwap)
.set reorder
LEAF(_CountDown)
.set noreorder
beq $4, 0, _CD_done
nop
_CD:
addu $4, $4, -1
bgtz $4, _CD
nop
_CD_done:
j ra
nop
END(_CountDown)
.set reorder
/*
* Object:
* LibcStart()
*
* Set a marker for the start of the libc functions.
*
*/
LEAF(LibcStart)
j ra
END(LibcStart);
/* WARNING - WARNING: LibcStart MUST be the last function in this
* file. If you add a function after this it will cause the
* the simulator to acquire the libc mutex when it is called.
*/