KWIK.java
4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/******************************************************************************
* Compilation: javac KWIK.java
* Execution: java KWIK file.txt
* Dependencies: StdIn.java StdOut.java In.java SuffixArray.java
* Data files: http://algs4.cs.princeton.edu/63suffix/tale.txt
* http://algs4.cs.princeton.edu/63suffix/mobydick.txt
*
* Keyword-in-context search.
*
* % java KWIK tale.txt 15
* majesty
* most gracious majesty king george th
* rnkeys and the majesty of the law fir
* on against the majesty of the people
* se them to his majestys chief secreta
* h lists of his majestys forces and of
*
* the worst
* w the best and the worst are known to y
* f them give me the worst first there th
* for in case of the worst is a friend in
* e roomdoor and the worst is over then a
* pect mr darnay the worst its the wisest
* is his brother the worst of a bad race
* ss in them for the worst of health for
* you have seen the worst of her agitati
* cumwented into the worst of luck buuust
* n your brother the worst of the bad rac
* full share in the worst of the day pla
* mes to himself the worst of the strife
* f times it was the worst of times it wa
* ould hope that the worst was over well
* urage business the worst will be over i
* clesiastics of the worst world worldly
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code KWIK} class provides a {@link SuffixArray} client for computing
* all occurrences of a keyword in a given string, with surrounding context.
* This is known as <em>keyword-in-context search</em>.
* <p>
* For additional documentation,
* see <a href="http://algs4.cs.princeton.edu/63suffix">Section 6.3</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class KWIK {
// Do not instantiate.
private KWIK() { }
/**
* Reads a string from a file specified as the first
* command-line argument; read an integer k specified as the
* second command line argument; then repeatedly processes
* use queries, printing all occurrences of the given query
* string in the text string with k characters of surrounding
* context on either side.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
In in = new In(args[0]);
int context = Integer.parseInt(args[1]);
// read in text
String text = in.readAll().replaceAll("\\s+", " ");
int n = text.length();
// build suffix array
SuffixArray sa = new SuffixArray(text);
// find all occurrences of queries and give context
while (StdIn.hasNextLine()) {
String query = StdIn.readLine();
for (int i = sa.rank(query); i < n; i++) {
int from1 = sa.index(i);
int to1 = Math.min(n, from1 + query.length());
if (!query.equals(text.substring(from1, to1))) break;
int from2 = Math.max(0, sa.index(i) - context);
int to2 = Math.min(n, sa.index(i) + context + query.length());
StdOut.println(text.substring(from2, to2));
}
StdOut.println();
}
}
}
/******************************************************************************
* Copyright 2002-2016, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/