Mercurial > repos > pfrommolt > ngsrich
comparison NGSrich_0.5.5/src/converters/Read2Wig.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89ad0a9cca52 |
---|---|
1 package converters; | |
2 | |
3 import java.io.File; | |
4 import java.io.FileNotFoundException; | |
5 import java.io.FileWriter; | |
6 import java.io.IOException; | |
7 import java.util.Scanner; | |
8 | |
9 import datastructures.GenomeFrame; | |
10 import datastructures.ReadFrame; | |
11 import datastructures.ReducedReadLine; | |
12 import exceptions.ChromosomeFormatException; | |
13 import exceptions.ChromosomeNotFoundException; | |
14 import exceptions.RangeFormatException; | |
15 import exceptions.RangeLimitNotFoundException; | |
16 | |
17 import middlewares.Misc; | |
18 | |
19 /** | |
20 * <P>This is a converter class, wich convert a reduced alignment file with the | |
21 * following format:</P> | |
22 * <TABLE> | |
23 * <TR> | |
24 * <TD width= "140"><B>read-name</B></TD> | |
25 * <TD width= "200"><B>chromosom-name</B></TD> | |
26 * <TD width= "150"><B>start-position</B></TD> | |
27 * <TD width= "200"><B>end-position</B></TD> | |
28 * </TR> | |
29 * <TR height=""></TR> | |
30 * </TABLE> | |
31 * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous- | |
32 * valued data in a track format and it is used to visualize the read enrichment | |
33 * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link: | |
34 * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new"> | |
35 * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P> | |
36 * | |
37 * @author Ali Abdallah | |
38 * @version 06.01.2011 | |
39 * @since jdk 1.6.0 | |
40 */ | |
41 | |
42 public class Read2Wig { | |
43 | |
44 private static final int LENGTH = 1024; | |
45 private File alignFile; | |
46 private File outputFile; | |
47 private String outputDir; | |
48 private int gMin = Integer.MAX_VALUE; | |
49 private int gMax = Integer.MIN_VALUE; | |
50 private FileWriter tmpWigWriter; | |
51 | |
52 /** | |
53 * Constructs and initialzes a new Read2Wig object. | |
54 * Converts the read alignment file to a overall covrage wig file. | |
55 * | |
56 * @param alignFileName the name of the alignment file. | |
57 * @throws IOException | |
58 */ | |
59 public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{ | |
60 // Das File-Objekt zur Behandlung der Alignment-Datei erzeugen. | |
61 this.alignFile = new File(alignFileName); | |
62 this.outputDir = outputDir+Misc.slash(outputDir); | |
63 tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig"); | |
64 convert(alignFileName,this.outputDir,outPrefix); | |
65 try{ | |
66 wigToBigWig("hg19",tmpDir); | |
67 } | |
68 catch(Exception e){ | |
69 System.err.println("Converting wig file to a bigwig file failed. " + | |
70 "Check whether you have a 64-bit linux system!"); | |
71 } | |
72 } | |
73 | |
74 private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException { | |
75 Scanner s = new Scanner(this.alignFile); | |
76 computeExtremas(s); s.close(); | |
77 Scanner readScanner = new Scanner(this.alignFile); | |
78 alignFileName = Misc.prefix(alignFileName); | |
79 Scanner as = new Scanner(alignFileName); | |
80 as.useDelimiter("_"); | |
81 as.next(); | |
82 outputFile = new File(outputDir+outPrefix+".wig"); | |
83 FileWriter fw = new FileWriter(outputFile); | |
84 | |
85 annotationHeader(fw); | |
86 | |
87 ReadFrame readF = computeNextRead(readScanner); //Current read frame | |
88 GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame | |
89 String chrom = readF.chrom(); //Current chromosome | |
90 if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read | |
91 writeHeader(fw, frame, readF); //Write header line | |
92 | |
93 while(true){ | |
94 while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){ | |
95 frame.updateHits(readF); readF = computeNextRead(readScanner); | |
96 } | |
97 while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){ | |
98 frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner); | |
99 } | |
100 if(!readScanner.hasNextLine()){break;} | |
101 else if(!chrom.equals(readF.chrom())){ | |
102 frame = new GenomeFrame(readF.start(), LENGTH); | |
103 writeHeader(fw, frame, readF); | |
104 chrom = readF.chrom(); | |
105 } | |
106 else if(!frame.overlaps(readF)){ | |
107 GenomeFrame last = frame; writeFrame(fw, last); | |
108 frame = new GenomeFrame(readF.start(), LENGTH); | |
109 writeFrameLeak(fw, last, frame, readF); | |
110 } | |
111 else if(frame.limitExceeded(readF)){ | |
112 writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH); | |
113 } | |
114 } | |
115 fw.close(); | |
116 tmpWigWriter.close(); | |
117 readScanner.close(); | |
118 } | |
119 | |
120 private void wigToBigWig(String genome,String tmpDir){ | |
121 String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir()); | |
122 String wig2bw = scriptDir + "../thirdparty/wigToBigWig"; | |
123 String fetchChromSizes = scriptDir + "fetchChromSizes"; | |
124 | |
125 try { | |
126 Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome); | |
127 FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes"); | |
128 Scanner s = new Scanner(p.getInputStream()); | |
129 while(s.hasNextLine()) | |
130 fw.write(s.nextLine()+"\r\n"); | |
131 fw.close(); | |
132 | |
133 Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath() | |
134 +" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+ | |
135 outputDir+Misc.slash(outputDir)+ | |
136 Misc.prefix(outputFile.getAbsolutePath())+".bw"); | |
137 new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete(); | |
138 new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete(); | |
139 } catch (IOException e) { | |
140 // TODO Auto-generated catch block | |
141 e.printStackTrace(); | |
142 } | |
143 } | |
144 | |
145 private void computeExtremas(Scanner s){ | |
146 String chrom = "Datei falsch formatiert."; | |
147 String oldChrom = null; | |
148 int start = -1; | |
149 int end = -1; | |
150 String line = s.nextLine(); | |
151 | |
152 oldChrom = chrom(line); start = start(line); end = end(line); | |
153 | |
154 while(s.hasNextLine()){ | |
155 line = s.nextLine(); | |
156 if(isHeader(line)){ | |
157 chrom = chrom(line); start = start(line); end = end(line); | |
158 if(gMin > start && chrom.equals(oldChrom)) | |
159 gMin = start; | |
160 | |
161 if(gMax < end && chrom.equals(oldChrom)) | |
162 gMax = end; | |
163 } | |
164 } | |
165 } | |
166 | |
167 private boolean isHeader(String line){ | |
168 return line.indexOf("chr")!=-1; | |
169 } | |
170 | |
171 private int start(String line){ | |
172 Scanner s = new Scanner(line); | |
173 s.next();s.next(); | |
174 return s.nextInt(); | |
175 } | |
176 | |
177 private int end(String line){ | |
178 Scanner s = new Scanner(line); | |
179 s.next();s.next();s.next(); | |
180 return s.nextInt(); | |
181 } | |
182 | |
183 private String chrom(String line){ | |
184 Scanner s = new Scanner(line); | |
185 s.next(); | |
186 return s.next(); | |
187 } | |
188 | |
189 private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException { | |
190 for(int base = frame.start(); base < start2; base++){ | |
191 fw.write(frame.getHit(base)+"\r\n"); | |
192 tmpWigWriter.write(frame.getHit(base)+"\r\n"); | |
193 } | |
194 } | |
195 | |
196 private void writeFrameLeak(FileWriter fw, | |
197 GenomeFrame last, GenomeFrame frame, ReadFrame read) | |
198 throws IOException { | |
199 if(frame.start()-last.end() > 50){ | |
200 writeHeader(fw, frame, read); | |
201 } | |
202 else{ | |
203 for(int i = 0; i < frame.start()-last.end()-1; i++){ | |
204 fw.write(0+"\r\n"); | |
205 tmpWigWriter.write(0+"\r\n"); | |
206 } | |
207 } | |
208 } | |
209 | |
210 private void annotationHeader(FileWriter fw) throws IOException{ | |
211 String browserLines = "browser position chr1:"+gMin+"-"+gMax+"\r\n"+ | |
212 "browser hide all"+"\r\n"+ | |
213 "browser pack refGene encodeRegions"+"\r\n"+ | |
214 "browser full altGraph"; | |
215 Scanner as = new Scanner(alignFile.getName()); | |
216 as.useDelimiter("_");as.next(); | |
217 String trackLine = "track type=wiggle_0 name=\""+as.next()+"\" " + | |
218 "description=\"Base read coverage\" visibility=full " + | |
219 "color=0,0,0 altColor=255,0,0 priority=20 " + | |
220 "autoScale=on"; | |
221 fw.write(browserLines+"\r\n"+trackLine+"\r\n"); | |
222 } | |
223 | |
224 private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read) | |
225 throws IOException { | |
226 fw.write("fixedStep chrom="+read.chrom() | |
227 +" start="+frame.start() | |
228 + " step=1\r\n"); | |
229 tmpWigWriter.write("fixedStep chrom="+read.chrom() | |
230 +" start="+frame.start() | |
231 + " step=1\r\n"); | |
232 } | |
233 | |
234 private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException { | |
235 // TODO Auto-generated method stub | |
236 for(int base = frame.start(); base <= frame.end(); base++){ | |
237 fw.write(frame.getHit(base)+"\r\n"); | |
238 tmpWigWriter.write(frame.getHit(base)+"\r\n"); | |
239 } | |
240 } | |
241 | |
242 private ReadFrame computeNextRead(Scanner afScanner) { | |
243 ReducedReadLine rl = null; | |
244 try { | |
245 rl = new ReducedReadLine(afScanner.nextLine()); | |
246 } catch (ChromosomeFormatException e) { | |
247 e.printStackTrace(); | |
248 } catch (ChromosomeNotFoundException e) { | |
249 e.printStackTrace(); | |
250 } catch (RangeFormatException e) { | |
251 e.printStackTrace(); | |
252 } catch (RangeLimitNotFoundException e) { | |
253 e.printStackTrace(); | |
254 } | |
255 return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end()); | |
256 } | |
257 | |
258 } | |
259 | |
260 | |
261 | |
262 | |
263 | |
264 | |
265 | |
266 | |
267 |