LZW.java
5.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/******************************************************************************
* Compilation: javac LZW.java
* Execution: java LZW - < input.txt (compress)
* Execution: java LZW + < input.txt (expand)
* Dependencies: BinaryIn.java BinaryOut.java
* Data files: http://algs4.cs.princeton.edu/55compression/abraLZW.txt
* http://algs4.cs.princeton.edu/55compression/ababLZW.txt
*
* Compress or expand binary input from standard input using LZW.
*
* WARNING: STARTING WITH ORACLE JAVA 6, UPDATE 7 the SUBSTRING
* METHOD TAKES TIME AND SPACE LINEAR IN THE SIZE OF THE EXTRACTED
* SUBSTRING (INSTEAD OF CONSTANT SPACE AND TIME AS IN EARLIER
* IMPLEMENTATIONS).
*
* See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a>
* for more details.
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code LZW} class provides static methods for compressing
* and expanding a binary input using LZW compression over the 8-bit extended
* ASCII alphabet with 12-bit codewords.
* <p>
* For additional documentation,
* see <a href="http://algs4.cs.princeton.edu/55compress">Section 5.5</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class LZW {
private static final int R = 256; // number of input chars
private static final int L = 4096; // number of codewords = 2^W
private static final int W = 12; // codeword width
// Do not instantiate.
private LZW() { }
/**
* Reads a sequence of 8-bit bytes from standard input; compresses
* them using LZW compression with 12-bit codewords; and writes the results
* to standard output.
*/
public static void compress() {
String input = BinaryStdIn.readString();
TST<Integer> st = new TST<Integer>();
for (int i = 0; i < R; i++)
st.put("" + (char) i, i);
int code = R+1; // R is codeword for EOF
while (input.length() > 0) {
String s = st.longestPrefixOf(input); // Find max prefix match s.
BinaryStdOut.write(st.get(s), W); // Print s's encoding.
int t = s.length();
if (t < input.length() && code < L) // Add s to symbol table.
st.put(input.substring(0, t + 1), code++);
input = input.substring(t); // Scan past s in input.
}
BinaryStdOut.write(R, W);
BinaryStdOut.close();
}
/**
* Reads a sequence of bit encoded using LZW compression with
* 12-bit codewords from standard input; expands them; and writes
* the results to standard output.
*/
public static void expand() {
String[] st = new String[L];
int i; // next available codeword value
// initialize symbol table with all 1-character strings
for (i = 0; i < R; i++)
st[i] = "" + (char) i;
st[i++] = ""; // (unused) lookahead for EOF
int codeword = BinaryStdIn.readInt(W);
if (codeword == R) return; // expanded message is empty string
String val = st[codeword];
while (true) {
BinaryStdOut.write(val);
codeword = BinaryStdIn.readInt(W);
if (codeword == R) break;
String s = st[codeword];
if (i == codeword) s = val + val.charAt(0); // special case hack
if (i < L) st[i++] = val + s.charAt(0);
val = s;
}
BinaryStdOut.close();
}
/**
* Sample client that calls {@code compress()} if the command-line
* argument is "-" an {@code expand()} if it is "+".
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
if (args[0].equals("-")) compress();
else if (args[0].equals("+")) expand();
else throw new IllegalArgumentException("Illegal command line argument");
}
}
/******************************************************************************
* Copyright 2002-2016, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/