FrequencyCounter.java
4.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/******************************************************************************
* Compilation: javac FrequencyCounter.java
* Execution: java FrequencyCounter L < input.txt
* Dependencies: ST.java StdIn.java StdOut.java
* Data files: http://algs4.cs.princeton.edu/31elementary/tnyTale.txt
* http://algs4.cs.princeton.edu/31elementary/tale.txt
* http://algs4.cs.princeton.edu/31elementary/leipzig100K.txt
* http://algs4.cs.princeton.edu/31elementary/leipzig300K.txt
* http://algs4.cs.princeton.edu/31elementary/leipzig1M.txt
*
* Read in a list of words from standard input and print out
* the most frequently occurring word that has length greater than
* a given threshold.
*
* % java FrequencyCounter 1 < tinyTale.txt
* it 10
*
* % java FrequencyCounter 8 < tale.txt
* business 122
*
* % java FrequencyCounter 10 < leipzig1M.txt
* government 24763
*
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code FrequencyCounter} class provides a client for
* reading in a sequence of words and printing a word (exceeding
* a given length) that occurs most frequently. It is useful as
* a test client for various symbol table implementations.
* <p>
* For additional documentation, see <a href="http://algs4.cs.princeton.edu/31elementary">Section 3.1</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class FrequencyCounter {
// Do not instantiate.
private FrequencyCounter() { }
/**
* Reads in a command-line integer and sequence of words from
* standard input and prints out a word (whose length exceeds
* the threshold) that occurs most frequently to standard output.
* It also prints out the number of words whose length exceeds
* the threshold and the number of distinct such words.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
int distinct = 0, words = 0;
int minlen = Integer.parseInt(args[0]);
ST<String, Integer> st = new ST<String, Integer>();
// compute frequency counts
while (!StdIn.isEmpty()) {
String key = StdIn.readString();
if (key.length() < minlen) continue;
words++;
if (st.contains(key)) {
st.put(key, st.get(key) + 1);
}
else {
st.put(key, 1);
distinct++;
}
}
// find a key with the highest frequency count
String max = "";
st.put(max, 0);
for (String word : st.keys()) {
if (st.get(word) > st.get(max))
max = word;
}
StdOut.println(max + " " + st.get(max));
StdOut.println("distinct = " + distinct);
StdOut.println("words = " + words);
}
}
/******************************************************************************
* Copyright 2002-2016, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/