view NGSrich_0.5.5/src/converters/Read2Wig.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
line wrap: on
line source

package converters;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Scanner;

import datastructures.GenomeFrame;
import datastructures.ReadFrame;
import datastructures.ReducedReadLine;
import exceptions.ChromosomeFormatException;
import exceptions.ChromosomeNotFoundException;
import exceptions.RangeFormatException;
import exceptions.RangeLimitNotFoundException;

import middlewares.Misc;

/**
 * <P>This is a converter class, wich convert a reduced alignment file with the 
 * following format:</P>
 * <TABLE>
 * 	<TR>
 * 		<TD width= "140"><B>read-name</B></TD>		
 * 		<TD width= "200"><B>chromosom-name</B></TD>		
 * 		<TD width= "150"><B>start-position</B></TD>		
 *		<TD width= "200"><B>end-position</B></TD>
 * </TR>
 * <TR height=""></TR>
 * </TABLE>
 * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous-
 * valued data in a track format and it is used to visualize the read enrichment
 * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link: 
 * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new">
 * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P>
 * 
 * @author Ali Abdallah
 * @version 06.01.2011
 * @since jdk 1.6.0
 */

public class Read2Wig {
	
	private static final int LENGTH = 1024;
	private File alignFile;
	private File outputFile;
	private String outputDir;
	private int gMin = Integer.MAX_VALUE;
	private int gMax = Integer.MIN_VALUE;
	private FileWriter tmpWigWriter;
	
	/**
	 * Constructs and initialzes a new Read2Wig object. 
	 * Converts the read alignment file to a overall covrage wig file.
	 * 
	 * @param alignFileName the name of the alignment file.
	 * @throws IOException
	 */
    public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{
		// Das File-Objekt zur Behandlung der Alignment-Datei erzeugen.
		this.alignFile = new File(alignFileName);
		this.outputDir = outputDir+Misc.slash(outputDir);
		tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig");
		convert(alignFileName,this.outputDir,outPrefix);
		try{
		    wigToBigWig("hg19",tmpDir);
		}
		catch(Exception e){
			System.err.println("Converting wig file to a bigwig file failed. " +
					"Check whether you have a 64-bit linux system!");
		}
	}
    
    private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException {
		Scanner s = new Scanner(this.alignFile);
		computeExtremas(s); s.close();
		Scanner readScanner = new Scanner(this.alignFile);
		alignFileName = Misc.prefix(alignFileName);
		Scanner as = new Scanner(alignFileName);
		as.useDelimiter("_");
		as.next();
		outputFile = new File(outputDir+outPrefix+".wig");
		FileWriter fw = new FileWriter(outputFile);
		
		annotationHeader(fw);

		ReadFrame readF = computeNextRead(readScanner); //Current read frame
		GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame
		String chrom = readF.chrom(); //Current chromosome
		if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read
		writeHeader(fw, frame, readF); //Write header line
		
		while(true){
		    while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){
			frame.updateHits(readF); readF = computeNextRead(readScanner);
		    }
		    while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){
			frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner);
		    }
		    if(!readScanner.hasNextLine()){break;}
		    else if(!chrom.equals(readF.chrom())){
			frame = new GenomeFrame(readF.start(), LENGTH);
			writeHeader(fw, frame, readF);
			chrom = readF.chrom();
		    }
		    else if(!frame.overlaps(readF)){
			GenomeFrame last = frame; writeFrame(fw, last);						
			frame = new GenomeFrame(readF.start(), LENGTH);
			writeFrameLeak(fw, last, frame, readF);
		    }
		    else if(frame.limitExceeded(readF)){
			writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH);
		    }
		}
		fw.close();
		tmpWigWriter.close();
		readScanner.close();
	}
	
    private void wigToBigWig(String genome,String tmpDir){
		String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir());
		String wig2bw = scriptDir + "../thirdparty/wigToBigWig";
		String fetchChromSizes = scriptDir + "fetchChromSizes";
	
		try {
			Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome);
				FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes");
			Scanner s = new Scanner(p.getInputStream());
			while(s.hasNextLine())
				fw.write(s.nextLine()+"\r\n");
			fw.close();
			
			Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath()
					+" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+
					outputDir+Misc.slash(outputDir)+
					Misc.prefix(outputFile.getAbsolutePath())+".bw");
			new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete();
			new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	private void computeExtremas(Scanner s){
		String chrom = "Datei falsch formatiert.";
		String oldChrom = null;
		int start = -1;
		int end = -1;
		String line = s.nextLine();
		
		oldChrom = chrom(line); start = start(line); end = end(line);
		
		while(s.hasNextLine()){
			line = s.nextLine();
			if(isHeader(line)){
				chrom = chrom(line); start = start(line); end = end(line);
				if(gMin > start && chrom.equals(oldChrom))
					gMin = start;

				if(gMax < end && chrom.equals(oldChrom))
					gMax = end;
			}
		}
	}
	
	private boolean isHeader(String line){
		return line.indexOf("chr")!=-1;
	}
	
	private int start(String line){
		Scanner s = new Scanner(line);
		s.next();s.next();
		return s.nextInt();
	}
	
	private int end(String line){
		Scanner s = new Scanner(line);
		s.next();s.next();s.next();
		return s.nextInt();
	}
	
	private String chrom(String line){
		Scanner s = new Scanner(line);
		s.next();
		return s.next();
	}

	private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException {		
		for(int base = frame.start(); base < start2; base++){
			fw.write(frame.getHit(base)+"\r\n");
			tmpWigWriter.write(frame.getHit(base)+"\r\n");
		}
	}

	private void writeFrameLeak(FileWriter fw, 
								GenomeFrame last, GenomeFrame frame, ReadFrame read) 
															throws IOException {
		if(frame.start()-last.end() > 50){
			writeHeader(fw, frame, read);
		}
		else{
			for(int i = 0; i < frame.start()-last.end()-1; i++){
				fw.write(0+"\r\n");
				tmpWigWriter.write(0+"\r\n");
			}
		}
	}

	private void annotationHeader(FileWriter fw) throws IOException{
		String browserLines = 	"browser position chr1:"+gMin+"-"+gMax+"\r\n"+
								"browser hide all"+"\r\n"+
								"browser pack refGene encodeRegions"+"\r\n"+
								"browser full altGraph";
		Scanner as = new Scanner(alignFile.getName());
		as.useDelimiter("_");as.next();
		String trackLine = 	"track type=wiggle_0 name=\""+as.next()+"\" " +
							"description=\"Base read coverage\" visibility=full " +
							"color=0,0,0 altColor=255,0,0 priority=20 " +
							"autoScale=on";
		fw.write(browserLines+"\r\n"+trackLine+"\r\n");
	}

	private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read)
			throws IOException {
		fw.write("fixedStep chrom="+read.chrom()
									+" start="+frame.start()
										+ " step=1\r\n");
		tmpWigWriter.write("fixedStep chrom="+read.chrom()
				+" start="+frame.start()
					+ " step=1\r\n");
	}
	
	private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException {
		// TODO Auto-generated method stub
		for(int base = frame.start(); base <= frame.end(); base++){
			fw.write(frame.getHit(base)+"\r\n");
			tmpWigWriter.write(frame.getHit(base)+"\r\n");
		}
	}

	private ReadFrame computeNextRead(Scanner afScanner) {	
		ReducedReadLine rl = null;
		try {
			rl = new ReducedReadLine(afScanner.nextLine());
		} catch (ChromosomeFormatException e) {
			e.printStackTrace();
		} catch (ChromosomeNotFoundException e) {
			e.printStackTrace();
		} catch (RangeFormatException e) {
			e.printStackTrace();
		} catch (RangeLimitNotFoundException e) {
			e.printStackTrace();
		}				
		return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end());
	}

}