Genome.java
3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/******************************************************************************
* Compilation: javac Genome.java
* Execution: java Genome - < input.txt (compress)
* Execution: java Genome + < input.txt (expand)
* Dependencies: BinaryIn.java BinaryOut.java
* Data files: http://algs4.cs.princeton.edu/55compression/genomeTiny.txt
*
* Compress or expand a genomic sequence using a 2-bit code.
*
* % more genomeTiny.txt
* ATAGATGCATAGCGCATAGCTAGATGTGCTAGC
*
* % java Genome - < genomeTiny.txt | java Genome +
* ATAGATGCATAGCGCATAGCTAGATGTGCTAGC
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code Genome} class provides static methods for compressing
* and expanding a genomic sequence using a 2-bit code.
* <p>
* For additional documentation,
* see <a href="http://algs4.cs.princeton.edu/55compress">Section 5.5</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class Genome {
// Do not instantiate.
private Genome() { }
/**
* Reads a sequence of 8-bit extended ASCII characters over the alphabet
* { A, C, T, G } from standard input; compresses them using two bits per
* character; and writes the results to standard output.
*/
public static void compress() {
Alphabet DNA = Alphabet.DNA;
String s = BinaryStdIn.readString();
int n = s.length();
BinaryStdOut.write(n);
// Write two-bit code for char.
for (int i = 0; i < n; i++) {
int d = DNA.toIndex(s.charAt(i));
BinaryStdOut.write(d, 2);
}
BinaryStdOut.close();
}
/**
* Reads a binary sequence from standard input; converts each two bits
* to an 8-bit extended ASCII character over the alphabet { A, C, T, G };
* and writes the results to standard output.
*/
public static void expand() {
Alphabet DNA = Alphabet.DNA;
int n = BinaryStdIn.readInt();
// Read two bits; write char.
for (int i = 0; i < n; i++) {
char c = BinaryStdIn.readChar(2);
BinaryStdOut.write(DNA.toChar(c), 8);
}
BinaryStdOut.close();
}
/**
* Sample client that calls {@code compress()} if the command-line
* argument is "-" an {@code expand()} if it is "+".
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
if (args[0].equals("-")) compress();
else if (args[0].equals("+")) expand();
else throw new IllegalArgumentException("Illegal command line argument");
}
}
/******************************************************************************
* Copyright 2002-2016, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/