bzero.s
2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/* ------------------------------------------------------------------ */
/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
/* | Reserved. This software contains proprietary and confidential | */
/* | information of MIPS and its suppliers. Use, disclosure or | */
/* | reproduction is prohibited without the prior express written | */
/* | consent of MIPS. | */
/* ------------------------------------------------------------------ */
#ident "$Header: /root/leakn64/depot/rf/sw/bbplayer/libultra/monegi/libc/bzero.s,v 1.1.1.2 2002/10/29 08:06:43 blythe Exp $"
/*
* Copyright 1985 by MIPS Computer Systems, Inc.
*/
.weakext bzero, _bzero
.weakext blkclr, _blkclr
#include "sys/regdef.h"
#include "sys/asm.h"
#define NBPW 4
/*
* _bzero(dst, bcount)
* Zero block of memory
*
* Calculating MINZERO, assuming 50% cache-miss on non-loop code:
* Overhead =~ 18 instructions => 63 (81) cycles
* Byte zero =~ 16 (24) cycles/word for 08M44 (08V11)
* Word zero =~ 3 (6) cycles/word for 08M44 (08V11)
* If I-cache-miss nears 0, MINZERO ==> 4 bytes; otherwise, times are:
* breakeven (MEM) = 63 / (16 - 3) =~ 5 words
* breakeven (VME) = 81 / (24 - 6) =~ 4.5 words
* Since the overhead is pessimistic (worst-case alignment), and many calls
* will be for well-aligned data, and since Word-zeroing at least leaves
* the zero in the cache, we shade these values (18-20) down to 12
*/
#define MINZERO 12
LEAF(_bzero)
XLEAF(_blkclr)
subu v1,zero,a0 # number of bytes til aligned
blt a1,MINZERO,bytezero
and v1,NBPW-1
subu a1,v1
beq v1,zero,blkzero # already aligned
#ifdef MIPSEB
swl zero,0(a0)
#endif
#ifdef MIPSEL
swr zero,0(a0)
#endif
addu a0,v1
/*
* zero 32 byte, aligned block
*/
blkzero:
and a3,a1,~31 # 32 byte chunks
subu a1,a3
beq a3,zero,wordzero
addu a3,a0 # dst endpoint
1: sw zero,0(a0)
sw zero,4(a0)
sw zero,8(a0)
sw zero,12(a0)
addu a0,32
sw zero,-16(a0)
sw zero,-12(a0)
sw zero,-8(a0)
sw zero,-4(a0)
bne a0,a3,1b
wordzero:
and a3,a1,~(NBPW-1) # word chunks
subu a1,a3
beq a3,zero,bytezero
addu a3,a0 # dst endpoint
1: addu a0,NBPW
sw zero,-NBPW(a0)
bne a0,a3,1b
bytezero:
ble a1,zero,zerodone
addu a1,a0 # dst endpoint
1: addu a0,1
sb zero,-1(a0)
bne a0,a1,1b
zerodone:
j ra
.end _bzero