comparison NGSrich_0.5.5/src/filters/ReadFilter.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89ad0a9cca52
1 package filters;
2
3 import java.io.File;
4 import java.io.FileNotFoundException;
5 import java.io.FileWriter;
6 import java.io.IOException;
7 import java.util.Scanner;
8 import datastructures.ReadLine;
9
10 public class ReadFilter extends Filter{
11
12 File input, output;
13
14 /**
15 * Constructs a SamAdapter object. The output of an adaption is written to the
16 * given file outputFileName.
17 *
18 * @param inputFileName the name of the read alignment input file.
19 * @param outputFileName the name of the output file containing the reduced
20 * format of the read alignment file. It must ends with ".red".
21 */
22 public ReadFilter(String inputFileName, String outputFileName) {
23 super(inputFileName, outputFileName);
24 input = new File(inputFileName);
25 output = new File(outputFileName);
26 }
27
28 /**
29 * <P>
30 * Uses ReadLine zu reduce each line of the read alignment file to following format:<BR>
31 * &#60;name> &#60;chrom> &#60;start> &#60;end> (tab delimited).
32 * </P>
33 * In the following we list the 12 fields of the sam-alignment-file. We mark the fields we are
34 * interessted in with (!!):
35 * <PRE>
36 * 1. &#60;QNAME> : Query pair NAME if paired; or Query NAME if unpaired (Ex: 6:105:18438:14421) (!!)
37 * 2. &#60;FLAG> : bitwise FLAG a₀a₁a₂a₃a₄a₅a₆a₇a₈a₉a₁₀ (Ex: 0 forward, 16 reverse strand)
38 * a₀ : the read is paired in sequencing, (no matter whether it is mapped in a pair)
39 * a₁ : the read is mapped in a proper pair
40 * a₂ : the query sequence itself is unmapped
41 * a₃ : the mate is unmapped
42 * a₄ : strand of the query (0 for forward; 1 for reverse strand)
43 * a₅ : strand of the mate
44 * a₆ : the read is the first read in a pair
45 * a₇ : the read is the second read in a pair
46 * a₈ : the alignment is not primary
47 * a₉ : the read fails platform/vendor quality checks
48 * a₁₀: the read is either a PCR duplicate or an optical duplicate
49 * 3. &#60;RNAME> : Reference sequence NAME (Ex: chr10) (!!)
50 * 4. &#60;POS> : 1-based leftmost POSition/coordinate of the clipped sequence (Ex: 60041) (!!)
51 * 5. &#60;MAPQ> : MAPping Quality (Ex: 0)
52 * (phred-scaled posterior probability that the mapping position of this read is incorrect)
53 * 6. &#60;CIGAR> : extended CIGAR string (Ex: 150M)
54 * 7. &#60;MRNM> : Mate Reference sequence NaMe; “=” if the same as &#60;RNAME> (Ex:*)
55 * 8. &#60;MPOS> : 1-based leftmost Mate POSition of the clipped sequence (Ex: 0)
56 * 9. &#60;ISIZE> : inferred Insert SIZE (Ex: 0)
57 * 10. &#60;SEQ> : query SEQuence; “=” for a match to the reference; n/N/. for ambiguity; cases are not maintained (!!)
58 * (Ex: TGTTGTTGTTATTTCTGAATGACATTTACTTTGCTGCTCTTTATTTTGCG
59 * TATTTAAAACTATTAGATCGTGTGATTATATTTGACAGGTCTTAATTGAC
60 * GCGCTGTTCAGCCCTTTGAGTTCGGTTGAGTTTTGTGTTGGAGAATTTTC)
61 * 11. &#60;QUAL> : query QUALity; ASCII-33 gives the Phred base quality
62 * (Ex: /.8349-7:95@=8999;1:=;===AABD:=@A;>AD:E:9@==69<;@B3CBC@B8B;B89=8=3;@@@.:->>B?
63 * C4CBB8EDGDD8GDEEDEEE8EBA9B???=B;,8:+5;;A??>?#############################)
64 * 12. [&#60;TAG>:&#60;VTYPE>:&#60;VALUE> [...]]: TAG/Value TYPE/match <VTYPE> (space allowed)
65 * (Ex: XT:A:R NM:i:2 X0:i:2 X1:i:0)
66 * </PRE>
67 */
68 public void filter() {
69 FileWriter fw = null;
70 Scanner s = null;
71
72 try {
73 s= new Scanner(input);
74 } catch (FileNotFoundException e) {
75 System.err.println("sam file not found");
76 e.printStackTrace();
77 }
78
79 try {
80 if(output == null){
81 output = new File(input.getName().
82 substring(0,input.getName().lastIndexOf("."))+".rsam");
83 }
84
85 fw = new FileWriter(output);
86
87 } catch (IOException e) {
88 System.err.println("Error generating rsam file");
89 e.printStackTrace();
90 }
91
92 String rawline;
93 ReadLine line = null;
94
95 do{
96 rawline = s.nextLine();
97 }while(rawline.startsWith("@"));
98
99 do{
100 try {
101 line = new ReadLine(rawline);
102 fw.write(line+"\r\n");
103 } catch (IOException e) {
104 System.err.println("Error writing reduced form of:\n"+rawline);
105 e.printStackTrace();
106 }
107 if(s.hasNextLine())
108 rawline = s.nextLine();
109 }while(s.hasNextLine());
110
111
112 try {
113 fw.write(line +"\r\n");
114 } catch (IOException e) {
115 System.err.println("Error writing reduced form of:\n"+line);
116 e.printStackTrace();
117 }
118
119 try {
120 fw.close();
121 } catch (IOException e) {
122 System.err.println("Error closing file");
123 e.printStackTrace();
124 }
125 s.close();
126
127 System.out.println("READS FILE:");
128 System.out.println(input.getAbsolutePath()+" reduced to "+
129 output.getAbsolutePath());
130 sort();
131 }
132
133
134 public void sort() {
135 Runtime rt = Runtime.getRuntime();
136 try {
137 String rawOutput = output.getAbsolutePath();
138 String outputName = output.getName();
139 String pathname = output.getParentFile().getAbsolutePath()+"/"+outputName+"Sorted";
140
141 output = new File(pathname);
142 String tmpD=output.getParentFile().getAbsolutePath();
143
144 if(!output.exists())output.createNewFile();
145 String command = "sort -k2,2 -k3n,3 -T "+tmpD+" "+rawOutput;
146 Process p = rt.exec(command);
147 Scanner ps = new Scanner(p.getInputStream());
148
149 FileWriter fw = new FileWriter(output);
150 while(ps.hasNextLine()){
151 String nextLine = ps.nextLine();
152 fw.write(nextLine+"\n");
153 }
154 fw.close();
155
156 Scanner psStdErr=new Scanner(p.getErrorStream());
157 while(psStdErr.hasNextLine()){
158 String errLine=psStdErr.nextLine();
159 System.out.println(errLine);
160 }
161
162 new File(rawOutput).delete();
163 new File(pathname).renameTo(new File(rawOutput));
164 System.out.println("Reduced file "+new File(rawOutput).getAbsolutePath()+" sorted\n");
165
166 } catch (IOException e1) {
167 e1.printStackTrace();
168 }
169 }
170
171
172 public String toString(){
173 return "ReadFilter";
174 }
175
176
177 }