bcmp.s
2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* ------------------------------------------------------------------ */
/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
/* | Reserved. This software contains proprietary and confidential | */
/* | information of MIPS and its suppliers. Use, disclosure or | */
/* | reproduction is prohibited without the prior express written | */
/* | consent of MIPS. | */
/* ------------------------------------------------------------------ */
#ident "$Header"
/*
* Copyright 1985 by MIPS Computer Systems, Inc.
*/
#define ISBCMP
/* bcmp(s1, s2, n) */
#ifdef ISBCMP
.weakext bcmp, _bcmp
#else
/* memcmp is ansi defined so no weak symbol is needed */
#endif
#include "sys/regdef.h"
#include "sys/asm.h"
/*
* bcmp(src, dst, bcount)
*
* MINCMP is minimum number of byte that its worthwhile to try and
* align cmp into word transactions
*
* Calculating MINCMP
* Overhead =~ 15 instructions => 90 cycles
* Byte cmp =~ 38 cycles/word
* Word cmp =~ 17 cycles/word
* Breakeven =~ 16 bytes
*/
#define MINCMP 16
#define NBPW 4
#ifdef ISBCMP
LEAF(_bcmp)
#else
LEAF(memcmp)
#endif
xor v0,a0,a1
blt a2,MINCMP,bytecmp # too short, just byte cmp
and v0,NBPW-1
subu t8,zero,a0 # number of bytes til aligned
bne v0,zero,unalgncmp # src and dst not alignable
/*
* src and dst can be simultaneously word aligned
*/
and t8,NBPW-1
subu a2,t8
beq t8,zero,wordcmp # already aligned
move v0,v1 # lw[lr] dont clear target reg
#ifdef MIPSEB
lwl v0,0(a0) # cmp unaligned portion
lwl v1,0(a1)
#endif
#ifdef MIPSEL
lwr v0,0(a0)
lwr v1,0(a1)
#endif
addu a0,t8
addu a1,t8
bne v0,v1,cmpne
/*
* word cmp loop
*/
wordcmp:
and a3,a2,~(NBPW-1)
subu a2,a3
beq a3,zero,bytecmp
addu a3,a0 # src1 endpoint
1: lw v0,0(a0)
lw v1,0(a1)
addu a0,NBPW # 1st BDSLOT
addu a1,NBPW # 2nd BDSLOT (asm doesnt move)
bne v0,v1,cmpne
bne a0,a3,1b # at least one more word
b bytecmp
/*
* deal with simultaneously unalignable cmp by aligning one src
*/
unalgncmp:
subu a3,zero,a1 # calc byte cnt to get src2 aligned
and a3,NBPW-1
subu a2,a3
beq a3,zero,partaligncmp # already aligned
addu a3,a0 # src1 endpoint
1: lbu v0,0(a0)
lbu v1,0(a1)
addu a0,1
addu a1,1
bne v0,v1,cmpne
bne a0,a3,1b
/*
* src unaligned, dst aligned loop
*/
partaligncmp:
and a3,a2,~(NBPW-1)
subu a2,a3
beq a3,zero,bytecmp
addu a3,a0
1:
#ifdef MIPSEB
lwl v0,0(a0)
lwr v0,3(a0)
#endif
#ifdef MIPSEL
lwr v0,0(a0)
lwl v0,3(a0)
#endif
lw v1,0(a1)
addu a0,NBPW
addu a1,NBPW
bne v0,v1,cmpne
bne a0,a3,1b
/*
* brute force byte cmp loop
*/
bytecmp:
addu a3,a2,a0 # src1 endpoint; BDSLOT
ble a2,zero,cmpdone
1: lbu v0,0(a0)
lbu v1,0(a1)
addu a0,1
addu a1,1
bne v0,v1,cmpne
bne a0,a3,1b
cmpdone:
move v0,zero
j ra
cmpne:
#ifndef ISBCMP
sltu a2,v1,v0
bne a2,zero,9f
li v0,-1
j ra
9:
#endif
li v0,1
j ra
.end _bcmp