Alphabet.java
8.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
/******************************************************************************
* Compilation: javac Alphabet.java
* Execution: java Alphabet
* Dependencies: StdOut.java
*
* A data type for alphabets, for use with string-processing code
* that must convert between an alphabet of size R and the integers
* 0 through R-1.
*
* Warning: supports only the basic multilingual plane (BMP), i.e,
* Unicode characters between U+0000 and U+FFFF.
*
******************************************************************************/
package edu.princeton.cs.algs4;
public class Alphabet {
/**
* The binary alphabet { 0, 1 }.
*/
public static final Alphabet BINARY = new Alphabet("01");
/**
* The octal alphabet { 0, 1, 2, 3, 4, 5, 6, 7 }.
*/
public static final Alphabet OCTAL = new Alphabet("01234567");
/**
* The decimal alphabet { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }.
*/
public static final Alphabet DECIMAL = new Alphabet("0123456789");
/**
* The hexadecimal alphabet { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F }.
*/
public static final Alphabet HEXADECIMAL = new Alphabet("0123456789ABCDEF");
/**
* The DNA alphabet { A, C, T, G }.
*/
public static final Alphabet DNA = new Alphabet("ACGT");
/**
* The lowercase alphabet { a, b, c, ..., z }.
*/
public static final Alphabet LOWERCASE = new Alphabet("abcdefghijklmnopqrstuvwxyz");
/**
* The uppercase alphabet { A, B, C, ..., Z }.
*/
public static final Alphabet UPPERCASE = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
/**
* The protein alphabet { A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y }.
*/
public static final Alphabet PROTEIN = new Alphabet("ACDEFGHIKLMNPQRSTVWY");
/**
* The base-64 alphabet (64 characters).
*/
public static final Alphabet BASE64 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
/**
* The ASCII alphabet (0-127).
*/
public static final Alphabet ASCII = new Alphabet(128);
/**
* The extended ASCII alphabet (0-255).
*/
public static final Alphabet EXTENDED_ASCII = new Alphabet(256);
/**
* The Unicode 16 alphabet (0-65,535).
*/
public static final Alphabet UNICODE16 = new Alphabet(65536);
private char[] alphabet; // the characters in the alphabet
private int[] inverse; // indices
private final int R; // the radix of the alphabet
/**
* Initializes a new alphabet from the given set of characters.
*
* @param alpha the set of characters
*/
public Alphabet(String alpha) {
// check that alphabet contains no duplicate chars
boolean[] unicode = new boolean[Character.MAX_VALUE];
for (int i = 0; i < alpha.length(); i++) {
char c = alpha.charAt(i);
if (unicode[c])
throw new IllegalArgumentException("Illegal alphabet: repeated character = '" + c + "'");
unicode[c] = true;
}
alphabet = alpha.toCharArray();
R = alpha.length();
inverse = new int[Character.MAX_VALUE];
for (int i = 0; i < inverse.length; i++)
inverse[i] = -1;
// can't use char since R can be as big as 65,536
for (int c = 0; c < R; c++)
inverse[alphabet[c]] = c;
}
/**
* Initializes a new alphabet using characters 0 through R-1.
*
* @param radix the number of characters in the alphabet (the radix R)
*/
private Alphabet(int radix) {
this.R = radix;
alphabet = new char[R];
inverse = new int[R];
// can't use char since R can be as big as 65,536
for (int i = 0; i < R; i++)
alphabet[i] = (char) i;
for (int i = 0; i < R; i++)
inverse[i] = i;
}
/**
* Initializes a new alphabet using characters 0 through 255.
*/
public Alphabet() {
this(256);
}
/**
* Returns true if the argument is a character in this alphabet.
*
* @param c the character
* @return {@code true} if {@code c} is a character in this alphabet;
* {@code false} otherwise
*/
public boolean contains(char c) {
return inverse[c] != -1;
}
/**
* Returns the number of characters in this alphabet (the radix).
*
* @return the number of characters in this alphabet
* @deprecated Replaced by {@link #radix()}.
*/
@Deprecated
public int R() {
return R;
}
/**
* Returns the number of characters in this alphabet (the radix).
*
* @return the number of characters in this alphabet
*/
public int radix() {
return R;
}
/**
* Returns the binary logarithm of the number of characters in this alphabet.
*
* @return the binary logarithm (rounded up) of the number of characters in this alphabet
*/
public int lgR() {
int lgR = 0;
for (int t = R-1; t >= 1; t /= 2)
lgR++;
return lgR;
}
/**
* Returns the index corresponding to the argument character.
*
* @param c the character
* @return the index corresponding to the character {@code c}
* @throws IllegalArgumentException unless {@code c} is a character in this alphabet
*/
public int toIndex(char c) {
if (c >= inverse.length || inverse[c] == -1) {
throw new IllegalArgumentException("Character " + c + " not in alphabet");
}
return inverse[c];
}
/**
* Returns the indices corresponding to the argument characters.
*
* @param s the characters
* @return the indices corresponding to the characters {@code s}
* @throws IllegalArgumentException unless every character in {@code s}
* is a character in this alphabet
*/
public int[] toIndices(String s) {
char[] source = s.toCharArray();
int[] target = new int[s.length()];
for (int i = 0; i < source.length; i++)
target[i] = toIndex(source[i]);
return target;
}
/**
* Returns the character corresponding to the argument index.
*
* @param index the index
* @return the character corresponding to the index {@code index}
* @throws IllegalArgumentException unless {@code 0 <= index < R}
*/
public char toChar(int index) {
if (index < 0 || index >= R) {
throw new IndexOutOfBoundsException("Alphabet index out of bounds");
}
return alphabet[index];
}
/**
* Returns the characters corresponding to the argument indices.
*
* @param indices the indices
* @return the characters corresponding to the indices {@code indices}
* @throws IllegalArgumentException unless {@code 0 < indices[i] < R}
* for every {@code i}
*/
public String toChars(int[] indices) {
StringBuilder s = new StringBuilder(indices.length);
for (int i = 0; i < indices.length; i++)
s.append(toChar(indices[i]));
return s.toString();
}
/**
* Unit tests the {@code Alphabet} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
int[] encoded1 = Alphabet.BASE64.toIndices("NowIsTheTimeForAllGoodMen");
String decoded1 = Alphabet.BASE64.toChars(encoded1);
StdOut.println(decoded1);
int[] encoded2 = Alphabet.DNA.toIndices("AACGAACGGTTTACCCCG");
String decoded2 = Alphabet.DNA.toChars(encoded2);
StdOut.println(decoded2);
int[] encoded3 = Alphabet.DECIMAL.toIndices("01234567890123456789");
String decoded3 = Alphabet.DECIMAL.toChars(encoded3);
StdOut.println(decoded3);
}
}
/******************************************************************************
* Copyright 2002-2016, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/