0
|
1 package converters;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.FileNotFoundException;
|
|
5 import java.io.FileWriter;
|
|
6 import java.io.IOException;
|
|
7 import java.util.Scanner;
|
|
8
|
|
9 import datastructures.GenomeFrame;
|
|
10 import datastructures.ReadFrame;
|
|
11 import datastructures.ReducedReadLine;
|
|
12 import exceptions.ChromosomeFormatException;
|
|
13 import exceptions.ChromosomeNotFoundException;
|
|
14 import exceptions.RangeFormatException;
|
|
15 import exceptions.RangeLimitNotFoundException;
|
|
16
|
|
17 import middlewares.Misc;
|
|
18
|
|
19 /**
|
|
20 * <P>This is a converter class, wich convert a reduced alignment file with the
|
|
21 * following format:</P>
|
|
22 * <TABLE>
|
|
23 * <TR>
|
|
24 * <TD width= "140"><B>read-name</B></TD>
|
|
25 * <TD width= "200"><B>chromosom-name</B></TD>
|
|
26 * <TD width= "150"><B>start-position</B></TD>
|
|
27 * <TD width= "200"><B>end-position</B></TD>
|
|
28 * </TR>
|
|
29 * <TR height=""></TR>
|
|
30 * </TABLE>
|
|
31 * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous-
|
|
32 * valued data in a track format and it is used to visualize the read enrichment
|
|
33 * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link:
|
|
34 * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new">
|
|
35 * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P>
|
|
36 *
|
|
37 * @author Ali Abdallah
|
|
38 * @version 06.01.2011
|
|
39 * @since jdk 1.6.0
|
|
40 */
|
|
41
|
|
42 public class Read2Wig {
|
|
43
|
|
44 private static final int LENGTH = 1024;
|
|
45 private File alignFile;
|
|
46 private File outputFile;
|
|
47 private String outputDir;
|
|
48 private int gMin = Integer.MAX_VALUE;
|
|
49 private int gMax = Integer.MIN_VALUE;
|
|
50 private FileWriter tmpWigWriter;
|
|
51
|
|
52 /**
|
|
53 * Constructs and initialzes a new Read2Wig object.
|
|
54 * Converts the read alignment file to a overall covrage wig file.
|
|
55 *
|
|
56 * @param alignFileName the name of the alignment file.
|
|
57 * @throws IOException
|
|
58 */
|
|
59 public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{
|
|
60 // Das File-Objekt zur Behandlung der Alignment-Datei erzeugen.
|
|
61 this.alignFile = new File(alignFileName);
|
|
62 this.outputDir = outputDir+Misc.slash(outputDir);
|
|
63 tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig");
|
|
64 convert(alignFileName,this.outputDir,outPrefix);
|
|
65 try{
|
|
66 wigToBigWig("hg19",tmpDir);
|
|
67 }
|
|
68 catch(Exception e){
|
|
69 System.err.println("Converting wig file to a bigwig file failed. " +
|
|
70 "Check whether you have a 64-bit linux system!");
|
|
71 }
|
|
72 }
|
|
73
|
|
74 private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException {
|
|
75 Scanner s = new Scanner(this.alignFile);
|
|
76 computeExtremas(s); s.close();
|
|
77 Scanner readScanner = new Scanner(this.alignFile);
|
|
78 alignFileName = Misc.prefix(alignFileName);
|
|
79 Scanner as = new Scanner(alignFileName);
|
|
80 as.useDelimiter("_");
|
|
81 as.next();
|
|
82 outputFile = new File(outputDir+outPrefix+".wig");
|
|
83 FileWriter fw = new FileWriter(outputFile);
|
|
84
|
|
85 annotationHeader(fw);
|
|
86
|
|
87 ReadFrame readF = computeNextRead(readScanner); //Current read frame
|
|
88 GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame
|
|
89 String chrom = readF.chrom(); //Current chromosome
|
|
90 if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read
|
|
91 writeHeader(fw, frame, readF); //Write header line
|
|
92
|
|
93 while(true){
|
|
94 while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){
|
|
95 frame.updateHits(readF); readF = computeNextRead(readScanner);
|
|
96 }
|
|
97 while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){
|
|
98 frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner);
|
|
99 }
|
|
100 if(!readScanner.hasNextLine()){break;}
|
|
101 else if(!chrom.equals(readF.chrom())){
|
|
102 frame = new GenomeFrame(readF.start(), LENGTH);
|
|
103 writeHeader(fw, frame, readF);
|
|
104 chrom = readF.chrom();
|
|
105 }
|
|
106 else if(!frame.overlaps(readF)){
|
|
107 GenomeFrame last = frame; writeFrame(fw, last);
|
|
108 frame = new GenomeFrame(readF.start(), LENGTH);
|
|
109 writeFrameLeak(fw, last, frame, readF);
|
|
110 }
|
|
111 else if(frame.limitExceeded(readF)){
|
|
112 writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH);
|
|
113 }
|
|
114 }
|
|
115 fw.close();
|
|
116 tmpWigWriter.close();
|
|
117 readScanner.close();
|
|
118 }
|
|
119
|
|
120 private void wigToBigWig(String genome,String tmpDir){
|
|
121 String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir());
|
|
122 String wig2bw = scriptDir + "../thirdparty/wigToBigWig";
|
|
123 String fetchChromSizes = scriptDir + "fetchChromSizes";
|
|
124
|
|
125 try {
|
|
126 Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome);
|
|
127 FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes");
|
|
128 Scanner s = new Scanner(p.getInputStream());
|
|
129 while(s.hasNextLine())
|
|
130 fw.write(s.nextLine()+"\r\n");
|
|
131 fw.close();
|
|
132
|
|
133 Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath()
|
|
134 +" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+
|
|
135 outputDir+Misc.slash(outputDir)+
|
|
136 Misc.prefix(outputFile.getAbsolutePath())+".bw");
|
|
137 new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete();
|
|
138 new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete();
|
|
139 } catch (IOException e) {
|
|
140 // TODO Auto-generated catch block
|
|
141 e.printStackTrace();
|
|
142 }
|
|
143 }
|
|
144
|
|
145 private void computeExtremas(Scanner s){
|
|
146 String chrom = "Datei falsch formatiert.";
|
|
147 String oldChrom = null;
|
|
148 int start = -1;
|
|
149 int end = -1;
|
|
150 String line = s.nextLine();
|
|
151
|
|
152 oldChrom = chrom(line); start = start(line); end = end(line);
|
|
153
|
|
154 while(s.hasNextLine()){
|
|
155 line = s.nextLine();
|
|
156 if(isHeader(line)){
|
|
157 chrom = chrom(line); start = start(line); end = end(line);
|
|
158 if(gMin > start && chrom.equals(oldChrom))
|
|
159 gMin = start;
|
|
160
|
|
161 if(gMax < end && chrom.equals(oldChrom))
|
|
162 gMax = end;
|
|
163 }
|
|
164 }
|
|
165 }
|
|
166
|
|
167 private boolean isHeader(String line){
|
|
168 return line.indexOf("chr")!=-1;
|
|
169 }
|
|
170
|
|
171 private int start(String line){
|
|
172 Scanner s = new Scanner(line);
|
|
173 s.next();s.next();
|
|
174 return s.nextInt();
|
|
175 }
|
|
176
|
|
177 private int end(String line){
|
|
178 Scanner s = new Scanner(line);
|
|
179 s.next();s.next();s.next();
|
|
180 return s.nextInt();
|
|
181 }
|
|
182
|
|
183 private String chrom(String line){
|
|
184 Scanner s = new Scanner(line);
|
|
185 s.next();
|
|
186 return s.next();
|
|
187 }
|
|
188
|
|
189 private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException {
|
|
190 for(int base = frame.start(); base < start2; base++){
|
|
191 fw.write(frame.getHit(base)+"\r\n");
|
|
192 tmpWigWriter.write(frame.getHit(base)+"\r\n");
|
|
193 }
|
|
194 }
|
|
195
|
|
196 private void writeFrameLeak(FileWriter fw,
|
|
197 GenomeFrame last, GenomeFrame frame, ReadFrame read)
|
|
198 throws IOException {
|
|
199 if(frame.start()-last.end() > 50){
|
|
200 writeHeader(fw, frame, read);
|
|
201 }
|
|
202 else{
|
|
203 for(int i = 0; i < frame.start()-last.end()-1; i++){
|
|
204 fw.write(0+"\r\n");
|
|
205 tmpWigWriter.write(0+"\r\n");
|
|
206 }
|
|
207 }
|
|
208 }
|
|
209
|
|
210 private void annotationHeader(FileWriter fw) throws IOException{
|
|
211 String browserLines = "browser position chr1:"+gMin+"-"+gMax+"\r\n"+
|
|
212 "browser hide all"+"\r\n"+
|
|
213 "browser pack refGene encodeRegions"+"\r\n"+
|
|
214 "browser full altGraph";
|
|
215 Scanner as = new Scanner(alignFile.getName());
|
|
216 as.useDelimiter("_");as.next();
|
|
217 String trackLine = "track type=wiggle_0 name=\""+as.next()+"\" " +
|
|
218 "description=\"Base read coverage\" visibility=full " +
|
|
219 "color=0,0,0 altColor=255,0,0 priority=20 " +
|
|
220 "autoScale=on";
|
|
221 fw.write(browserLines+"\r\n"+trackLine+"\r\n");
|
|
222 }
|
|
223
|
|
224 private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read)
|
|
225 throws IOException {
|
|
226 fw.write("fixedStep chrom="+read.chrom()
|
|
227 +" start="+frame.start()
|
|
228 + " step=1\r\n");
|
|
229 tmpWigWriter.write("fixedStep chrom="+read.chrom()
|
|
230 +" start="+frame.start()
|
|
231 + " step=1\r\n");
|
|
232 }
|
|
233
|
|
234 private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException {
|
|
235 // TODO Auto-generated method stub
|
|
236 for(int base = frame.start(); base <= frame.end(); base++){
|
|
237 fw.write(frame.getHit(base)+"\r\n");
|
|
238 tmpWigWriter.write(frame.getHit(base)+"\r\n");
|
|
239 }
|
|
240 }
|
|
241
|
|
242 private ReadFrame computeNextRead(Scanner afScanner) {
|
|
243 ReducedReadLine rl = null;
|
|
244 try {
|
|
245 rl = new ReducedReadLine(afScanner.nextLine());
|
|
246 } catch (ChromosomeFormatException e) {
|
|
247 e.printStackTrace();
|
|
248 } catch (ChromosomeNotFoundException e) {
|
|
249 e.printStackTrace();
|
|
250 } catch (RangeFormatException e) {
|
|
251 e.printStackTrace();
|
|
252 } catch (RangeLimitNotFoundException e) {
|
|
253 e.printStackTrace();
|
|
254 }
|
|
255 return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end());
|
|
256 }
|
|
257
|
|
258 }
|
|
259
|
|
260
|
|
261
|
|
262
|
|
263
|
|
264
|
|
265
|
|
266
|
|
267
|