0
|
1 package filters;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.FileNotFoundException;
|
|
5 import java.io.FileWriter;
|
|
6 import java.io.IOException;
|
|
7 import java.util.Scanner;
|
|
8 import datastructures.ReadLine;
|
|
9
|
|
10 public class ReadFilter extends Filter{
|
|
11
|
|
12 File input, output;
|
|
13
|
|
14 /**
|
|
15 * Constructs a SamAdapter object. The output of an adaption is written to the
|
|
16 * given file outputFileName.
|
|
17 *
|
|
18 * @param inputFileName the name of the read alignment input file.
|
|
19 * @param outputFileName the name of the output file containing the reduced
|
|
20 * format of the read alignment file. It must ends with ".red".
|
|
21 */
|
|
22 public ReadFilter(String inputFileName, String outputFileName) {
|
|
23 super(inputFileName, outputFileName);
|
|
24 input = new File(inputFileName);
|
|
25 output = new File(outputFileName);
|
|
26 }
|
|
27
|
|
28 /**
|
|
29 * <P>
|
|
30 * Uses ReadLine zu reduce each line of the read alignment file to following format:<BR>
|
|
31 * <name> <chrom> <start> <end> (tab delimited).
|
|
32 * </P>
|
|
33 * In the following we list the 12 fields of the sam-alignment-file. We mark the fields we are
|
|
34 * interessted in with (!!):
|
|
35 * <PRE>
|
|
36 * 1. <QNAME> : Query pair NAME if paired; or Query NAME if unpaired (Ex: 6:105:18438:14421) (!!)
|
|
37 * 2. <FLAG> : bitwise FLAG a₀a₁a₂a₃a₄a₅a₆a₇a₈a₉a₁₀ (Ex: 0 forward, 16 reverse strand)
|
|
38 * a₀ : the read is paired in sequencing, (no matter whether it is mapped in a pair)
|
|
39 * a₁ : the read is mapped in a proper pair
|
|
40 * a₂ : the query sequence itself is unmapped
|
|
41 * a₃ : the mate is unmapped
|
|
42 * a₄ : strand of the query (0 for forward; 1 for reverse strand)
|
|
43 * a₅ : strand of the mate
|
|
44 * a₆ : the read is the first read in a pair
|
|
45 * a₇ : the read is the second read in a pair
|
|
46 * a₈ : the alignment is not primary
|
|
47 * a₉ : the read fails platform/vendor quality checks
|
|
48 * a₁₀: the read is either a PCR duplicate or an optical duplicate
|
|
49 * 3. <RNAME> : Reference sequence NAME (Ex: chr10) (!!)
|
|
50 * 4. <POS> : 1-based leftmost POSition/coordinate of the clipped sequence (Ex: 60041) (!!)
|
|
51 * 5. <MAPQ> : MAPping Quality (Ex: 0)
|
|
52 * (phred-scaled posterior probability that the mapping position of this read is incorrect)
|
|
53 * 6. <CIGAR> : extended CIGAR string (Ex: 150M)
|
|
54 * 7. <MRNM> : Mate Reference sequence NaMe; “=” if the same as <RNAME> (Ex:*)
|
|
55 * 8. <MPOS> : 1-based leftmost Mate POSition of the clipped sequence (Ex: 0)
|
|
56 * 9. <ISIZE> : inferred Insert SIZE (Ex: 0)
|
|
57 * 10. <SEQ> : query SEQuence; “=” for a match to the reference; n/N/. for ambiguity; cases are not maintained (!!)
|
|
58 * (Ex: TGTTGTTGTTATTTCTGAATGACATTTACTTTGCTGCTCTTTATTTTGCG
|
|
59 * TATTTAAAACTATTAGATCGTGTGATTATATTTGACAGGTCTTAATTGAC
|
|
60 * GCGCTGTTCAGCCCTTTGAGTTCGGTTGAGTTTTGTGTTGGAGAATTTTC)
|
|
61 * 11. <QUAL> : query QUALity; ASCII-33 gives the Phred base quality
|
|
62 * (Ex: /.8349-7:95@=8999;1:=;===AABD:=@A;>AD:E:9@==69<;@B3CBC@B8B;B89=8=3;@@@.:->>B?
|
|
63 * C4CBB8EDGDD8GDEEDEEE8EBA9B???=B;,8:+5;;A??>?#############################)
|
|
64 * 12. [<TAG>:<VTYPE>:<VALUE> [...]]: TAG/Value TYPE/match <VTYPE> (space allowed)
|
|
65 * (Ex: XT:A:R NM:i:2 X0:i:2 X1:i:0)
|
|
66 * </PRE>
|
|
67 */
|
|
68 public void filter() {
|
|
69 FileWriter fw = null;
|
|
70 Scanner s = null;
|
|
71
|
|
72 try {
|
|
73 s= new Scanner(input);
|
|
74 } catch (FileNotFoundException e) {
|
|
75 System.err.println("sam file not found");
|
|
76 e.printStackTrace();
|
|
77 }
|
|
78
|
|
79 try {
|
|
80 if(output == null){
|
|
81 output = new File(input.getName().
|
|
82 substring(0,input.getName().lastIndexOf("."))+".rsam");
|
|
83 }
|
|
84
|
|
85 fw = new FileWriter(output);
|
|
86
|
|
87 } catch (IOException e) {
|
|
88 System.err.println("Error generating rsam file");
|
|
89 e.printStackTrace();
|
|
90 }
|
|
91
|
|
92 String rawline;
|
|
93 ReadLine line = null;
|
|
94
|
|
95 do{
|
|
96 rawline = s.nextLine();
|
|
97 }while(rawline.startsWith("@"));
|
|
98
|
|
99 do{
|
|
100 try {
|
|
101 line = new ReadLine(rawline);
|
|
102 fw.write(line+"\r\n");
|
|
103 } catch (IOException e) {
|
|
104 System.err.println("Error writing reduced form of:\n"+rawline);
|
|
105 e.printStackTrace();
|
|
106 }
|
|
107 if(s.hasNextLine())
|
|
108 rawline = s.nextLine();
|
|
109 }while(s.hasNextLine());
|
|
110
|
|
111
|
|
112 try {
|
|
113 fw.write(line +"\r\n");
|
|
114 } catch (IOException e) {
|
|
115 System.err.println("Error writing reduced form of:\n"+line);
|
|
116 e.printStackTrace();
|
|
117 }
|
|
118
|
|
119 try {
|
|
120 fw.close();
|
|
121 } catch (IOException e) {
|
|
122 System.err.println("Error closing file");
|
|
123 e.printStackTrace();
|
|
124 }
|
|
125 s.close();
|
|
126
|
|
127 System.out.println("READS FILE:");
|
|
128 System.out.println(input.getAbsolutePath()+" reduced to "+
|
|
129 output.getAbsolutePath());
|
|
130 sort();
|
|
131 }
|
|
132
|
|
133
|
|
134 public void sort() {
|
|
135 Runtime rt = Runtime.getRuntime();
|
|
136 try {
|
|
137 String rawOutput = output.getAbsolutePath();
|
|
138 String outputName = output.getName();
|
|
139 String pathname = output.getParentFile().getAbsolutePath()+"/"+outputName+"Sorted";
|
|
140
|
|
141 output = new File(pathname);
|
|
142 String tmpD=output.getParentFile().getAbsolutePath();
|
|
143
|
|
144 if(!output.exists())output.createNewFile();
|
|
145 String command = "sort -k2,2 -k3n,3 -T "+tmpD+" "+rawOutput;
|
|
146 Process p = rt.exec(command);
|
|
147 Scanner ps = new Scanner(p.getInputStream());
|
|
148
|
|
149 FileWriter fw = new FileWriter(output);
|
|
150 while(ps.hasNextLine()){
|
|
151 String nextLine = ps.nextLine();
|
|
152 fw.write(nextLine+"\n");
|
|
153 }
|
|
154 fw.close();
|
|
155
|
|
156 Scanner psStdErr=new Scanner(p.getErrorStream());
|
|
157 while(psStdErr.hasNextLine()){
|
|
158 String errLine=psStdErr.nextLine();
|
|
159 System.out.println(errLine);
|
|
160 }
|
|
161
|
|
162 new File(rawOutput).delete();
|
|
163 new File(pathname).renameTo(new File(rawOutput));
|
|
164 System.out.println("Reduced file "+new File(rawOutput).getAbsolutePath()+" sorted\n");
|
|
165
|
|
166 } catch (IOException e1) {
|
|
167 e1.printStackTrace();
|
|
168 }
|
|
169 }
|
|
170
|
|
171
|
|
172 public String toString(){
|
|
173 return "ReadFilter";
|
|
174 }
|
|
175
|
|
176
|
|
177 }
|