diff NGSrich_0.5.5/src/converters/Read2Wig.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NGSrich_0.5.5/src/converters/Read2Wig.java	Mon Nov 21 08:12:19 2011 -0500
@@ -0,0 +1,267 @@
+package converters;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Scanner;
+
+import datastructures.GenomeFrame;
+import datastructures.ReadFrame;
+import datastructures.ReducedReadLine;
+import exceptions.ChromosomeFormatException;
+import exceptions.ChromosomeNotFoundException;
+import exceptions.RangeFormatException;
+import exceptions.RangeLimitNotFoundException;
+
+import middlewares.Misc;
+
+/**
+ * <P>This is a converter class, wich convert a reduced alignment file with the 
+ * following format:</P>
+ * <TABLE>
+ * 	<TR>
+ * 		<TD width= "140"><B>read-name</B></TD>		
+ * 		<TD width= "200"><B>chromosom-name</B></TD>		
+ * 		<TD width= "150"><B>start-position</B></TD>		
+ *		<TD width= "200"><B>end-position</B></TD>
+ * </TR>
+ * <TR height=""></TR>
+ * </TABLE>
+ * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous-
+ * valued data in a track format and it is used to visualize the read enrichment
+ * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link: 
+ * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new">
+ * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P>
+ * 
+ * @author Ali Abdallah
+ * @version 06.01.2011
+ * @since jdk 1.6.0
+ */
+
+public class Read2Wig {
+	
+	private static final int LENGTH = 1024;
+	private File alignFile;
+	private File outputFile;
+	private String outputDir;
+	private int gMin = Integer.MAX_VALUE;
+	private int gMax = Integer.MIN_VALUE;
+	private FileWriter tmpWigWriter;
+	
+	/**
+	 * Constructs and initialzes a new Read2Wig object. 
+	 * Converts the read alignment file to a overall covrage wig file.
+	 * 
+	 * @param alignFileName the name of the alignment file.
+	 * @throws IOException
+	 */
+    public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{
+		// Das File-Objekt zur Behandlung der Alignment-Datei erzeugen.
+		this.alignFile = new File(alignFileName);
+		this.outputDir = outputDir+Misc.slash(outputDir);
+		tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig");
+		convert(alignFileName,this.outputDir,outPrefix);
+		try{
+		    wigToBigWig("hg19",tmpDir);
+		}
+		catch(Exception e){
+			System.err.println("Converting wig file to a bigwig file failed. " +
+					"Check whether you have a 64-bit linux system!");
+		}
+	}
+    
+    private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException {
+		Scanner s = new Scanner(this.alignFile);
+		computeExtremas(s); s.close();
+		Scanner readScanner = new Scanner(this.alignFile);
+		alignFileName = Misc.prefix(alignFileName);
+		Scanner as = new Scanner(alignFileName);
+		as.useDelimiter("_");
+		as.next();
+		outputFile = new File(outputDir+outPrefix+".wig");
+		FileWriter fw = new FileWriter(outputFile);
+		
+		annotationHeader(fw);
+
+		ReadFrame readF = computeNextRead(readScanner); //Current read frame
+		GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame
+		String chrom = readF.chrom(); //Current chromosome
+		if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read
+		writeHeader(fw, frame, readF); //Write header line
+		
+		while(true){
+		    while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){
+			frame.updateHits(readF); readF = computeNextRead(readScanner);
+		    }
+		    while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){
+			frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner);
+		    }
+		    if(!readScanner.hasNextLine()){break;}
+		    else if(!chrom.equals(readF.chrom())){
+			frame = new GenomeFrame(readF.start(), LENGTH);
+			writeHeader(fw, frame, readF);
+			chrom = readF.chrom();
+		    }
+		    else if(!frame.overlaps(readF)){
+			GenomeFrame last = frame; writeFrame(fw, last);						
+			frame = new GenomeFrame(readF.start(), LENGTH);
+			writeFrameLeak(fw, last, frame, readF);
+		    }
+		    else if(frame.limitExceeded(readF)){
+			writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH);
+		    }
+		}
+		fw.close();
+		tmpWigWriter.close();
+		readScanner.close();
+	}
+	
+    private void wigToBigWig(String genome,String tmpDir){
+		String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir());
+		String wig2bw = scriptDir + "../thirdparty/wigToBigWig";
+		String fetchChromSizes = scriptDir + "fetchChromSizes";
+	
+		try {
+			Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome);
+				FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes");
+			Scanner s = new Scanner(p.getInputStream());
+			while(s.hasNextLine())
+				fw.write(s.nextLine()+"\r\n");
+			fw.close();
+			
+			Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath()
+					+" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+
+					outputDir+Misc.slash(outputDir)+
+					Misc.prefix(outputFile.getAbsolutePath())+".bw");
+			new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete();
+			new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+
+	private void computeExtremas(Scanner s){
+		String chrom = "Datei falsch formatiert.";
+		String oldChrom = null;
+		int start = -1;
+		int end = -1;
+		String line = s.nextLine();
+		
+		oldChrom = chrom(line); start = start(line); end = end(line);
+		
+		while(s.hasNextLine()){
+			line = s.nextLine();
+			if(isHeader(line)){
+				chrom = chrom(line); start = start(line); end = end(line);
+				if(gMin > start && chrom.equals(oldChrom))
+					gMin = start;
+
+				if(gMax < end && chrom.equals(oldChrom))
+					gMax = end;
+			}
+		}
+	}
+	
+	private boolean isHeader(String line){
+		return line.indexOf("chr")!=-1;
+	}
+	
+	private int start(String line){
+		Scanner s = new Scanner(line);
+		s.next();s.next();
+		return s.nextInt();
+	}
+	
+	private int end(String line){
+		Scanner s = new Scanner(line);
+		s.next();s.next();s.next();
+		return s.nextInt();
+	}
+	
+	private String chrom(String line){
+		Scanner s = new Scanner(line);
+		s.next();
+		return s.next();
+	}
+
+	private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException {		
+		for(int base = frame.start(); base < start2; base++){
+			fw.write(frame.getHit(base)+"\r\n");
+			tmpWigWriter.write(frame.getHit(base)+"\r\n");
+		}
+	}
+
+	private void writeFrameLeak(FileWriter fw, 
+								GenomeFrame last, GenomeFrame frame, ReadFrame read) 
+															throws IOException {
+		if(frame.start()-last.end() > 50){
+			writeHeader(fw, frame, read);
+		}
+		else{
+			for(int i = 0; i < frame.start()-last.end()-1; i++){
+				fw.write(0+"\r\n");
+				tmpWigWriter.write(0+"\r\n");
+			}
+		}
+	}
+
+	private void annotationHeader(FileWriter fw) throws IOException{
+		String browserLines = 	"browser position chr1:"+gMin+"-"+gMax+"\r\n"+
+								"browser hide all"+"\r\n"+
+								"browser pack refGene encodeRegions"+"\r\n"+
+								"browser full altGraph";
+		Scanner as = new Scanner(alignFile.getName());
+		as.useDelimiter("_");as.next();
+		String trackLine = 	"track type=wiggle_0 name=\""+as.next()+"\" " +
+							"description=\"Base read coverage\" visibility=full " +
+							"color=0,0,0 altColor=255,0,0 priority=20 " +
+							"autoScale=on";
+		fw.write(browserLines+"\r\n"+trackLine+"\r\n");
+	}
+
+	private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read)
+			throws IOException {
+		fw.write("fixedStep chrom="+read.chrom()
+									+" start="+frame.start()
+										+ " step=1\r\n");
+		tmpWigWriter.write("fixedStep chrom="+read.chrom()
+				+" start="+frame.start()
+					+ " step=1\r\n");
+	}
+	
+	private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException {
+		// TODO Auto-generated method stub
+		for(int base = frame.start(); base <= frame.end(); base++){
+			fw.write(frame.getHit(base)+"\r\n");
+			tmpWigWriter.write(frame.getHit(base)+"\r\n");
+		}
+	}
+
+	private ReadFrame computeNextRead(Scanner afScanner) {	
+		ReducedReadLine rl = null;
+		try {
+			rl = new ReducedReadLine(afScanner.nextLine());
+		} catch (ChromosomeFormatException e) {
+			e.printStackTrace();
+		} catch (ChromosomeNotFoundException e) {
+			e.printStackTrace();
+		} catch (RangeFormatException e) {
+			e.printStackTrace();
+		} catch (RangeLimitNotFoundException e) {
+			e.printStackTrace();
+		}				
+		return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end());
+	}
+
+}
+
+
+
+
+
+
+
+
+