comparison NGSrich_0.5.5/src/converters/Read2Wig.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89ad0a9cca52
1 package converters;
2
3 import java.io.File;
4 import java.io.FileNotFoundException;
5 import java.io.FileWriter;
6 import java.io.IOException;
7 import java.util.Scanner;
8
9 import datastructures.GenomeFrame;
10 import datastructures.ReadFrame;
11 import datastructures.ReducedReadLine;
12 import exceptions.ChromosomeFormatException;
13 import exceptions.ChromosomeNotFoundException;
14 import exceptions.RangeFormatException;
15 import exceptions.RangeLimitNotFoundException;
16
17 import middlewares.Misc;
18
19 /**
20 * <P>This is a converter class, wich convert a reduced alignment file with the
21 * following format:</P>
22 * <TABLE>
23 * <TR>
24 * <TD width= "140"><B>read-name</B></TD>
25 * <TD width= "200"><B>chromosom-name</B></TD>
26 * <TD width= "150"><B>start-position</B></TD>
27 * <TD width= "200"><B>end-position</B></TD>
28 * </TR>
29 * <TR height=""></TR>
30 * </TABLE>
31 * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous-
32 * valued data in a track format and it is used to visualize the read enrichment
33 * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link:
34 * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new">
35 * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P>
36 *
37 * @author Ali Abdallah
38 * @version 06.01.2011
39 * @since jdk 1.6.0
40 */
41
42 public class Read2Wig {
43
44 private static final int LENGTH = 1024;
45 private File alignFile;
46 private File outputFile;
47 private String outputDir;
48 private int gMin = Integer.MAX_VALUE;
49 private int gMax = Integer.MIN_VALUE;
50 private FileWriter tmpWigWriter;
51
52 /**
53 * Constructs and initialzes a new Read2Wig object.
54 * Converts the read alignment file to a overall covrage wig file.
55 *
56 * @param alignFileName the name of the alignment file.
57 * @throws IOException
58 */
59 public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{
60 // Das File-Objekt zur Behandlung der Alignment-Datei erzeugen.
61 this.alignFile = new File(alignFileName);
62 this.outputDir = outputDir+Misc.slash(outputDir);
63 tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig");
64 convert(alignFileName,this.outputDir,outPrefix);
65 try{
66 wigToBigWig("hg19",tmpDir);
67 }
68 catch(Exception e){
69 System.err.println("Converting wig file to a bigwig file failed. " +
70 "Check whether you have a 64-bit linux system!");
71 }
72 }
73
74 private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException {
75 Scanner s = new Scanner(this.alignFile);
76 computeExtremas(s); s.close();
77 Scanner readScanner = new Scanner(this.alignFile);
78 alignFileName = Misc.prefix(alignFileName);
79 Scanner as = new Scanner(alignFileName);
80 as.useDelimiter("_");
81 as.next();
82 outputFile = new File(outputDir+outPrefix+".wig");
83 FileWriter fw = new FileWriter(outputFile);
84
85 annotationHeader(fw);
86
87 ReadFrame readF = computeNextRead(readScanner); //Current read frame
88 GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame
89 String chrom = readF.chrom(); //Current chromosome
90 if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read
91 writeHeader(fw, frame, readF); //Write header line
92
93 while(true){
94 while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){
95 frame.updateHits(readF); readF = computeNextRead(readScanner);
96 }
97 while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){
98 frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner);
99 }
100 if(!readScanner.hasNextLine()){break;}
101 else if(!chrom.equals(readF.chrom())){
102 frame = new GenomeFrame(readF.start(), LENGTH);
103 writeHeader(fw, frame, readF);
104 chrom = readF.chrom();
105 }
106 else if(!frame.overlaps(readF)){
107 GenomeFrame last = frame; writeFrame(fw, last);
108 frame = new GenomeFrame(readF.start(), LENGTH);
109 writeFrameLeak(fw, last, frame, readF);
110 }
111 else if(frame.limitExceeded(readF)){
112 writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH);
113 }
114 }
115 fw.close();
116 tmpWigWriter.close();
117 readScanner.close();
118 }
119
120 private void wigToBigWig(String genome,String tmpDir){
121 String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir());
122 String wig2bw = scriptDir + "../thirdparty/wigToBigWig";
123 String fetchChromSizes = scriptDir + "fetchChromSizes";
124
125 try {
126 Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome);
127 FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes");
128 Scanner s = new Scanner(p.getInputStream());
129 while(s.hasNextLine())
130 fw.write(s.nextLine()+"\r\n");
131 fw.close();
132
133 Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath()
134 +" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+
135 outputDir+Misc.slash(outputDir)+
136 Misc.prefix(outputFile.getAbsolutePath())+".bw");
137 new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete();
138 new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete();
139 } catch (IOException e) {
140 // TODO Auto-generated catch block
141 e.printStackTrace();
142 }
143 }
144
145 private void computeExtremas(Scanner s){
146 String chrom = "Datei falsch formatiert.";
147 String oldChrom = null;
148 int start = -1;
149 int end = -1;
150 String line = s.nextLine();
151
152 oldChrom = chrom(line); start = start(line); end = end(line);
153
154 while(s.hasNextLine()){
155 line = s.nextLine();
156 if(isHeader(line)){
157 chrom = chrom(line); start = start(line); end = end(line);
158 if(gMin > start && chrom.equals(oldChrom))
159 gMin = start;
160
161 if(gMax < end && chrom.equals(oldChrom))
162 gMax = end;
163 }
164 }
165 }
166
167 private boolean isHeader(String line){
168 return line.indexOf("chr")!=-1;
169 }
170
171 private int start(String line){
172 Scanner s = new Scanner(line);
173 s.next();s.next();
174 return s.nextInt();
175 }
176
177 private int end(String line){
178 Scanner s = new Scanner(line);
179 s.next();s.next();s.next();
180 return s.nextInt();
181 }
182
183 private String chrom(String line){
184 Scanner s = new Scanner(line);
185 s.next();
186 return s.next();
187 }
188
189 private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException {
190 for(int base = frame.start(); base < start2; base++){
191 fw.write(frame.getHit(base)+"\r\n");
192 tmpWigWriter.write(frame.getHit(base)+"\r\n");
193 }
194 }
195
196 private void writeFrameLeak(FileWriter fw,
197 GenomeFrame last, GenomeFrame frame, ReadFrame read)
198 throws IOException {
199 if(frame.start()-last.end() > 50){
200 writeHeader(fw, frame, read);
201 }
202 else{
203 for(int i = 0; i < frame.start()-last.end()-1; i++){
204 fw.write(0+"\r\n");
205 tmpWigWriter.write(0+"\r\n");
206 }
207 }
208 }
209
210 private void annotationHeader(FileWriter fw) throws IOException{
211 String browserLines = "browser position chr1:"+gMin+"-"+gMax+"\r\n"+
212 "browser hide all"+"\r\n"+
213 "browser pack refGene encodeRegions"+"\r\n"+
214 "browser full altGraph";
215 Scanner as = new Scanner(alignFile.getName());
216 as.useDelimiter("_");as.next();
217 String trackLine = "track type=wiggle_0 name=\""+as.next()+"\" " +
218 "description=\"Base read coverage\" visibility=full " +
219 "color=0,0,0 altColor=255,0,0 priority=20 " +
220 "autoScale=on";
221 fw.write(browserLines+"\r\n"+trackLine+"\r\n");
222 }
223
224 private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read)
225 throws IOException {
226 fw.write("fixedStep chrom="+read.chrom()
227 +" start="+frame.start()
228 + " step=1\r\n");
229 tmpWigWriter.write("fixedStep chrom="+read.chrom()
230 +" start="+frame.start()
231 + " step=1\r\n");
232 }
233
234 private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException {
235 // TODO Auto-generated method stub
236 for(int base = frame.start(); base <= frame.end(); base++){
237 fw.write(frame.getHit(base)+"\r\n");
238 tmpWigWriter.write(frame.getHit(base)+"\r\n");
239 }
240 }
241
242 private ReadFrame computeNextRead(Scanner afScanner) {
243 ReducedReadLine rl = null;
244 try {
245 rl = new ReducedReadLine(afScanner.nextLine());
246 } catch (ChromosomeFormatException e) {
247 e.printStackTrace();
248 } catch (ChromosomeNotFoundException e) {
249 e.printStackTrace();
250 } catch (RangeFormatException e) {
251 e.printStackTrace();
252 } catch (RangeLimitNotFoundException e) {
253 e.printStackTrace();
254 }
255 return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end());
256 }
257
258 }
259
260
261
262
263
264
265
266
267