Mercurial > repos > pfrommolt > ngsrich
comparison NGSrich_0.5.5/src/filters/ReadFilter.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89ad0a9cca52 |
---|---|
1 package filters; | |
2 | |
3 import java.io.File; | |
4 import java.io.FileNotFoundException; | |
5 import java.io.FileWriter; | |
6 import java.io.IOException; | |
7 import java.util.Scanner; | |
8 import datastructures.ReadLine; | |
9 | |
10 public class ReadFilter extends Filter{ | |
11 | |
12 File input, output; | |
13 | |
14 /** | |
15 * Constructs a SamAdapter object. The output of an adaption is written to the | |
16 * given file outputFileName. | |
17 * | |
18 * @param inputFileName the name of the read alignment input file. | |
19 * @param outputFileName the name of the output file containing the reduced | |
20 * format of the read alignment file. It must ends with ".red". | |
21 */ | |
22 public ReadFilter(String inputFileName, String outputFileName) { | |
23 super(inputFileName, outputFileName); | |
24 input = new File(inputFileName); | |
25 output = new File(outputFileName); | |
26 } | |
27 | |
28 /** | |
29 * <P> | |
30 * Uses ReadLine zu reduce each line of the read alignment file to following format:<BR> | |
31 * <name> <chrom> <start> <end> (tab delimited). | |
32 * </P> | |
33 * In the following we list the 12 fields of the sam-alignment-file. We mark the fields we are | |
34 * interessted in with (!!): | |
35 * <PRE> | |
36 * 1. <QNAME> : Query pair NAME if paired; or Query NAME if unpaired (Ex: 6:105:18438:14421) (!!) | |
37 * 2. <FLAG> : bitwise FLAG a₀a₁a₂a₃a₄a₅a₆a₇a₈a₉a₁₀ (Ex: 0 forward, 16 reverse strand) | |
38 * a₀ : the read is paired in sequencing, (no matter whether it is mapped in a pair) | |
39 * a₁ : the read is mapped in a proper pair | |
40 * a₂ : the query sequence itself is unmapped | |
41 * a₃ : the mate is unmapped | |
42 * a₄ : strand of the query (0 for forward; 1 for reverse strand) | |
43 * a₅ : strand of the mate | |
44 * a₆ : the read is the first read in a pair | |
45 * a₇ : the read is the second read in a pair | |
46 * a₈ : the alignment is not primary | |
47 * a₉ : the read fails platform/vendor quality checks | |
48 * a₁₀: the read is either a PCR duplicate or an optical duplicate | |
49 * 3. <RNAME> : Reference sequence NAME (Ex: chr10) (!!) | |
50 * 4. <POS> : 1-based leftmost POSition/coordinate of the clipped sequence (Ex: 60041) (!!) | |
51 * 5. <MAPQ> : MAPping Quality (Ex: 0) | |
52 * (phred-scaled posterior probability that the mapping position of this read is incorrect) | |
53 * 6. <CIGAR> : extended CIGAR string (Ex: 150M) | |
54 * 7. <MRNM> : Mate Reference sequence NaMe; “=” if the same as <RNAME> (Ex:*) | |
55 * 8. <MPOS> : 1-based leftmost Mate POSition of the clipped sequence (Ex: 0) | |
56 * 9. <ISIZE> : inferred Insert SIZE (Ex: 0) | |
57 * 10. <SEQ> : query SEQuence; “=” for a match to the reference; n/N/. for ambiguity; cases are not maintained (!!) | |
58 * (Ex: TGTTGTTGTTATTTCTGAATGACATTTACTTTGCTGCTCTTTATTTTGCG | |
59 * TATTTAAAACTATTAGATCGTGTGATTATATTTGACAGGTCTTAATTGAC | |
60 * GCGCTGTTCAGCCCTTTGAGTTCGGTTGAGTTTTGTGTTGGAGAATTTTC) | |
61 * 11. <QUAL> : query QUALity; ASCII-33 gives the Phred base quality | |
62 * (Ex: /.8349-7:95@=8999;1:=;===AABD:=@A;>AD:E:9@==69<;@B3CBC@B8B;B89=8=3;@@@.:->>B? | |
63 * C4CBB8EDGDD8GDEEDEEE8EBA9B???=B;,8:+5;;A??>?#############################) | |
64 * 12. [<TAG>:<VTYPE>:<VALUE> [...]]: TAG/Value TYPE/match <VTYPE> (space allowed) | |
65 * (Ex: XT:A:R NM:i:2 X0:i:2 X1:i:0) | |
66 * </PRE> | |
67 */ | |
68 public void filter() { | |
69 FileWriter fw = null; | |
70 Scanner s = null; | |
71 | |
72 try { | |
73 s= new Scanner(input); | |
74 } catch (FileNotFoundException e) { | |
75 System.err.println("sam file not found"); | |
76 e.printStackTrace(); | |
77 } | |
78 | |
79 try { | |
80 if(output == null){ | |
81 output = new File(input.getName(). | |
82 substring(0,input.getName().lastIndexOf("."))+".rsam"); | |
83 } | |
84 | |
85 fw = new FileWriter(output); | |
86 | |
87 } catch (IOException e) { | |
88 System.err.println("Error generating rsam file"); | |
89 e.printStackTrace(); | |
90 } | |
91 | |
92 String rawline; | |
93 ReadLine line = null; | |
94 | |
95 do{ | |
96 rawline = s.nextLine(); | |
97 }while(rawline.startsWith("@")); | |
98 | |
99 do{ | |
100 try { | |
101 line = new ReadLine(rawline); | |
102 fw.write(line+"\r\n"); | |
103 } catch (IOException e) { | |
104 System.err.println("Error writing reduced form of:\n"+rawline); | |
105 e.printStackTrace(); | |
106 } | |
107 if(s.hasNextLine()) | |
108 rawline = s.nextLine(); | |
109 }while(s.hasNextLine()); | |
110 | |
111 | |
112 try { | |
113 fw.write(line +"\r\n"); | |
114 } catch (IOException e) { | |
115 System.err.println("Error writing reduced form of:\n"+line); | |
116 e.printStackTrace(); | |
117 } | |
118 | |
119 try { | |
120 fw.close(); | |
121 } catch (IOException e) { | |
122 System.err.println("Error closing file"); | |
123 e.printStackTrace(); | |
124 } | |
125 s.close(); | |
126 | |
127 System.out.println("READS FILE:"); | |
128 System.out.println(input.getAbsolutePath()+" reduced to "+ | |
129 output.getAbsolutePath()); | |
130 sort(); | |
131 } | |
132 | |
133 | |
134 public void sort() { | |
135 Runtime rt = Runtime.getRuntime(); | |
136 try { | |
137 String rawOutput = output.getAbsolutePath(); | |
138 String outputName = output.getName(); | |
139 String pathname = output.getParentFile().getAbsolutePath()+"/"+outputName+"Sorted"; | |
140 | |
141 output = new File(pathname); | |
142 String tmpD=output.getParentFile().getAbsolutePath(); | |
143 | |
144 if(!output.exists())output.createNewFile(); | |
145 String command = "sort -k2,2 -k3n,3 -T "+tmpD+" "+rawOutput; | |
146 Process p = rt.exec(command); | |
147 Scanner ps = new Scanner(p.getInputStream()); | |
148 | |
149 FileWriter fw = new FileWriter(output); | |
150 while(ps.hasNextLine()){ | |
151 String nextLine = ps.nextLine(); | |
152 fw.write(nextLine+"\n"); | |
153 } | |
154 fw.close(); | |
155 | |
156 Scanner psStdErr=new Scanner(p.getErrorStream()); | |
157 while(psStdErr.hasNextLine()){ | |
158 String errLine=psStdErr.nextLine(); | |
159 System.out.println(errLine); | |
160 } | |
161 | |
162 new File(rawOutput).delete(); | |
163 new File(pathname).renameTo(new File(rawOutput)); | |
164 System.out.println("Reduced file "+new File(rawOutput).getAbsolutePath()+" sorted\n"); | |
165 | |
166 } catch (IOException e1) { | |
167 e1.printStackTrace(); | |
168 } | |
169 } | |
170 | |
171 | |
172 public String toString(){ | |
173 return "ReadFilter"; | |
174 } | |
175 | |
176 | |
177 } |