userflush.c
8.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
/*
* Copyright (C) 1996-1998 by the Board of Trustees
* of Leland Stanford Junior University.
*
* This file is part of the SimOS distribution.
* See LICENSE file for terms of the license.
*
*/
/* ****************************************************
* File to maintain cache consistency after DCG
*
* Highly platform-specific. Only sgi and alpha versions
* implemented so far.
* ****************************************************/
#include <stdio.h>
#ifdef sgi
#include <sys/cachectl.h>
#include <bstring.h>
#endif
#ifdef __alpha
#include <alpha/inst.h>
#include <machine/pal.h>
#endif
#include <errno.h>
#include <strings.h>
#include "sim_error.h"
#include "userflush.h"
extern int errno;
/* ************************************************************
* sgi version
* ************************************************************/
#ifdef sgi
#define LOG2_CACHE_SIZE 15
#define CACHE_LINE_SIZE 16
#define JMP_RA_INST 0x03e00008
#define ADDIU_LINESIZE_INT (0x24840000|((-CACHE_LINE_SIZE)&0xffff))
#define BEQZ_A0_XXX_INST 0x10800000
#define CACHE_SIZE (1<<LOG2_CACHE_SIZE)
#define CACHE_INDEX(_va) (_va & ((CACHE_SIZE-1)&~(CACHE_LINE_SIZE-1)))
static char flushbuffermem[CACHE_SIZE*2+8];
static char flushbuffermem2[CACHE_SIZE*2+8];
unsigned int flushbuffer;
unsigned int flushbuffer2;
static int userflush_init = 0;
int slowcacheflush(void *addr, int len, int cache)
{
return cacheflush(addr,len,cache);
}
int flushrange(unsigned int start, int nbytes, int cache);
/*
* This is a user level version of the cacheflush routine for mshade. Note
* that it only works on the R4000 & R4400 and it only flushes the
* primary caches.
*/
void usercacheinit(void) {
int i;
flushbuffer = (unsigned int)
((((unsigned int)flushbuffermem) + CACHE_SIZE) & ~(CACHE_SIZE-1));
flushbuffer2 = (unsigned int)
((((unsigned int)flushbuffermem2) + CACHE_SIZE) & ~(CACHE_SIZE-1));
bzero((char *) flushbuffer, CACHE_SIZE+8); /* Fill with NOPs */
bzero((char *) flushbuffer2, CACHE_SIZE+8); /* Fill with NOPs */
for (i = 0; i < CACHE_SIZE; i += CACHE_LINE_SIZE) {
int o = i/sizeof(unsigned int);
((int *)flushbuffer)[o] = ADDIU_LINESIZE_INT; /* add a0,a0,-LSIZE */
((int *)flushbuffer)[o+1] =
BEQZ_A0_XXX_INST|((CACHE_SIZE-(i+8))/sizeof(unsigned int));
/* beq a0,zero, ret */
}
for (i = 0; i < CACHE_SIZE; i += CACHE_LINE_SIZE) {
int o = i/sizeof(unsigned int);
((int *)flushbuffer2)[o] = ADDIU_LINESIZE_INT; /* add a0,a0,-LSIZE */
((int *)flushbuffer2)[o+1] =
BEQZ_A0_XXX_INST|((CACHE_SIZE-(i+8))/sizeof(unsigned int));
/* beq a0,zero, ret */
}
((int *)flushbuffer)[CACHE_SIZE/sizeof(unsigned int)] = JMP_RA_INST;
((int *)flushbuffer2)[CACHE_SIZE/sizeof(unsigned int)] = JMP_RA_INST;
if (slowcacheflush((void *)flushbuffer, CACHE_SIZE+8, BCACHE) < 0) {
perror("flushing cache in userflush init");
}
if (cacheflush((void *)flushbuffer2, CACHE_SIZE+8, BCACHE) < 0) {
perror("flushing cache in userflush init");
}
}
int
usercacheflush(void *addr, int nbytes, int cache)
{
unsigned int start_addr, end_addr;
unsigned int start_offset;
int i;
if (!userflush_init) {
usercacheinit();
userflush_init = 1;
}
/*
* Round address to cache line boundries. Round down at start and
* up at end.
*/
start_addr = (unsigned int)addr & ~(CACHE_LINE_SIZE-1);
end_addr = ((unsigned int)addr+nbytes+CACHE_LINE_SIZE-1) &
~(CACHE_LINE_SIZE-1);
/*
* We never need to flush more than the entire cache. Compute
* the start and range of bytes to read into the primary cache
* to flush the user buffer from it. Note this assumes direct
* mapped i & d caches.
*/
nbytes = end_addr - start_addr;
if (nbytes >= CACHE_SIZE) {
flushrange(0, CACHE_SIZE, cache);
return 0;
}
start_offset = start_addr%CACHE_SIZE;
/*
* Detect the case of wrapping in the cache. Break this into
* two different range flushes.
*/
if (start_offset + nbytes > CACHE_SIZE) {
(void) flushrange(start_offset, CACHE_SIZE-start_offset, cache);
flushrange(0, nbytes-(CACHE_SIZE-start_offset), cache);
} else {
flushrange(start_offset, nbytes, cache);
}
return 0;
}
struct {
int start;
int start2;
int nbytes;
} bufLog[1024];
int bufCount;
void SyncInstr(void);
int
flushrange(unsigned int start, int nbytes, int cache)
{
int i,x=0;
int pid;
int start2 = start+flushbuffer2;
start += flushbuffer;
bufLog[bufCount].start = start;
bufLog[bufCount].start2 = start2;
bufLog[bufCount].nbytes = nbytes;
bufCount = (bufCount+1)%1024;;
if (cache==DCACHE || cache==BCACHE) {
for (i = 0; i < nbytes; i += CACHE_LINE_SIZE) {
x += ((volatile int *)(start + i))[0];
x += ((volatile int *)(start2 + i))[0];
((volatile int *)(start + i))[0] = ((volatile int *)(start + i))[0];
((volatile int *)(start2 + i))[0] = ((volatile int *)(start2 + i))[0];
}
}
SyncInstr();
if ((cache==ICACHE) || (cache==BCACHE)) {
((void(*)(int))(start))(nbytes);
((void(*)(int))(start2))(nbytes);
}
return x;
}
/* This version only flushes one line of the I-cache and the D-cache.
* On the MIPS processors, inclusion of the I-cache is maintained
* but coherency is not maintained. Both I and D caches MUST be flushed
* if code is emitted at runtime.
*/
int FlushOneLine(TCA tca) {
uint ca = CACHE_INDEX((uint)tca);
uint addr1 = flushbuffer + ca;
uint addr2 = flushbuffer2 + ca;
int x=0;
/* flush from both the I and D caches */
x +=((volatile int *)addr1)[0];
x +=((volatile int *)addr2)[0];
((volatile int *)(addr1))[0] = ((volatile int *)(addr1))[0];
((volatile int *)(addr2))[0] = ((volatile int *)(addr2))[0];
((void(*)(int))addr1)(CACHE_LINE_SIZE);
((void(*)(int))addr2)(CACHE_LINE_SIZE);
return x;
}
#endif /* sgi */
/* **************************************************************
* alpha
* **************************************************************/
#ifdef __alpha
#define LOG2_CACHE_SIZE 13 /* 8kb */
#define CACHE_SIZE (1<<LOG2_CACHE_SIZE)
#define CACHE_LINE_SIZE 16 /* bytes */
#define HOST_PAGE_SIZE (1<<13)
#define NEXT_HOST_PAGE(_x) ( ((size_t)(_x)+HOST_PAGE_SIZE-1) & ~(HOST_PAGE_SIZE-1))
#define CCj(_opcode, _jsr, _ra, _rb, _disp) (uint32)(\
U(_opcode)<<26 | U(_ra)<<21| \
U(_rb)<<16 | U(_jsr)<<14 | \
((U(_disp)>>2) & 0x3FFF) )
#define REG_ZERO 31
#define REG_RA 26
static Inst buf_space[(CACHE_SIZE+HOST_PAGE_SIZE)/sizeof(Inst)];
static union alpha_instruction *flushbuf;
void usercacheinit(void)
{
int i;
union alpha_instruction inst;
inst.common.opcode = op_jsr;
inst.j_format.function = jsr_jsr;
inst.j_format.ra = REG_ZERO;
inst.j_format.rb = REG_RA;
inst.j_format.hint = 0;
flushbuf = (union alpha_instruction*)NEXT_HOST_PAGE(buf_space);
for (i=0;i<CACHE_SIZE/sizeof(Inst);i++) {
flushbuf[i] = inst;
}
usercacheflush(flushbuf,CACHE_SIZE,0);
}
/* *************************************************
* FlushOneLine. static function. Make sure to add
* a "mb" before it.
* *************************************************/
#define CACHE_SIZE_MASK ((CACHE_SIZE-1) & ~0x7)
inline int FlushOneLine(TCA tca)
{
uint64 addr = (uint64)flushbuf+ (CACHE_SIZE_MASK&(uint64)tca);
volatile uint64 x = *(volatile uint64 *)addr;
*(volatile uint64*) addr = x;
((void(*)()) addr)();
return 0;
}
int usercacheflush(void *addr, int len, int cache)
{
int i;
/* asm ("call_pal 0x86");*/ /* imb */
#if 0
asm volatile ("mb"::);
for (i=0;i<=len;i+=CACHE_LINE_SIZE) {
FlushOneLine((TCA)((char*)addr+i));
}
#endif
asm ("call_pal 0x86"); /* imb */
for(i=0;i<12;i++) {
;
}
return 0;
}
int slowcacheflush(void *addr, int len, int cache)
{
/*
* imb done in usercacheflush already!
* asm ("call_pal 0x86");
*/
if ( len >CACHE_SIZE) {
return usercacheflush(addr,CACHE_SIZE,cache);
} else {
return usercacheflush(addr,len,cache);
}
}
#endif /* __alpha */