Mercurial > repos > pfrommolt > ngsrich
view NGSrich_0.5.5/src/converters/Read2Wig.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
line wrap: on
line source
package converters; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.util.Scanner; import datastructures.GenomeFrame; import datastructures.ReadFrame; import datastructures.ReducedReadLine; import exceptions.ChromosomeFormatException; import exceptions.ChromosomeNotFoundException; import exceptions.RangeFormatException; import exceptions.RangeLimitNotFoundException; import middlewares.Misc; /** * <P>This is a converter class, wich convert a reduced alignment file with the * following format:</P> * <TABLE> * <TR> * <TD width= "140"><B>read-name</B></TD> * <TD width= "200"><B>chromosom-name</B></TD> * <TD width= "150"><B>start-position</B></TD> * <TD width= "200"><B>end-position</B></TD> * </TR> * <TR height=""></TR> * </TABLE> * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous- * valued data in a track format and it is used to visualize the read enrichment * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link: * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new"> * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P> * * @author Ali Abdallah * @version 06.01.2011 * @since jdk 1.6.0 */ public class Read2Wig { private static final int LENGTH = 1024; private File alignFile; private File outputFile; private String outputDir; private int gMin = Integer.MAX_VALUE; private int gMax = Integer.MIN_VALUE; private FileWriter tmpWigWriter; /** * Constructs and initialzes a new Read2Wig object. * Converts the read alignment file to a overall covrage wig file. * * @param alignFileName the name of the alignment file. * @throws IOException */ public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{ // Das File-Objekt zur Behandlung der Alignment-Datei erzeugen. this.alignFile = new File(alignFileName); this.outputDir = outputDir+Misc.slash(outputDir); tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig"); convert(alignFileName,this.outputDir,outPrefix); try{ wigToBigWig("hg19",tmpDir); } catch(Exception e){ System.err.println("Converting wig file to a bigwig file failed. " + "Check whether you have a 64-bit linux system!"); } } private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException { Scanner s = new Scanner(this.alignFile); computeExtremas(s); s.close(); Scanner readScanner = new Scanner(this.alignFile); alignFileName = Misc.prefix(alignFileName); Scanner as = new Scanner(alignFileName); as.useDelimiter("_"); as.next(); outputFile = new File(outputDir+outPrefix+".wig"); FileWriter fw = new FileWriter(outputFile); annotationHeader(fw); ReadFrame readF = computeNextRead(readScanner); //Current read frame GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame String chrom = readF.chrom(); //Current chromosome if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read writeHeader(fw, frame, readF); //Write header line while(true){ while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){ frame.updateHits(readF); readF = computeNextRead(readScanner); } while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){ frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner); } if(!readScanner.hasNextLine()){break;} else if(!chrom.equals(readF.chrom())){ frame = new GenomeFrame(readF.start(), LENGTH); writeHeader(fw, frame, readF); chrom = readF.chrom(); } else if(!frame.overlaps(readF)){ GenomeFrame last = frame; writeFrame(fw, last); frame = new GenomeFrame(readF.start(), LENGTH); writeFrameLeak(fw, last, frame, readF); } else if(frame.limitExceeded(readF)){ writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH); } } fw.close(); tmpWigWriter.close(); readScanner.close(); } private void wigToBigWig(String genome,String tmpDir){ String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir()); String wig2bw = scriptDir + "../thirdparty/wigToBigWig"; String fetchChromSizes = scriptDir + "fetchChromSizes"; try { Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome); FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes"); Scanner s = new Scanner(p.getInputStream()); while(s.hasNextLine()) fw.write(s.nextLine()+"\r\n"); fw.close(); Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath() +" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+ outputDir+Misc.slash(outputDir)+ Misc.prefix(outputFile.getAbsolutePath())+".bw"); new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete(); new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void computeExtremas(Scanner s){ String chrom = "Datei falsch formatiert."; String oldChrom = null; int start = -1; int end = -1; String line = s.nextLine(); oldChrom = chrom(line); start = start(line); end = end(line); while(s.hasNextLine()){ line = s.nextLine(); if(isHeader(line)){ chrom = chrom(line); start = start(line); end = end(line); if(gMin > start && chrom.equals(oldChrom)) gMin = start; if(gMax < end && chrom.equals(oldChrom)) gMax = end; } } } private boolean isHeader(String line){ return line.indexOf("chr")!=-1; } private int start(String line){ Scanner s = new Scanner(line); s.next();s.next(); return s.nextInt(); } private int end(String line){ Scanner s = new Scanner(line); s.next();s.next();s.next(); return s.nextInt(); } private String chrom(String line){ Scanner s = new Scanner(line); s.next(); return s.next(); } private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException { for(int base = frame.start(); base < start2; base++){ fw.write(frame.getHit(base)+"\r\n"); tmpWigWriter.write(frame.getHit(base)+"\r\n"); } } private void writeFrameLeak(FileWriter fw, GenomeFrame last, GenomeFrame frame, ReadFrame read) throws IOException { if(frame.start()-last.end() > 50){ writeHeader(fw, frame, read); } else{ for(int i = 0; i < frame.start()-last.end()-1; i++){ fw.write(0+"\r\n"); tmpWigWriter.write(0+"\r\n"); } } } private void annotationHeader(FileWriter fw) throws IOException{ String browserLines = "browser position chr1:"+gMin+"-"+gMax+"\r\n"+ "browser hide all"+"\r\n"+ "browser pack refGene encodeRegions"+"\r\n"+ "browser full altGraph"; Scanner as = new Scanner(alignFile.getName()); as.useDelimiter("_");as.next(); String trackLine = "track type=wiggle_0 name=\""+as.next()+"\" " + "description=\"Base read coverage\" visibility=full " + "color=0,0,0 altColor=255,0,0 priority=20 " + "autoScale=on"; fw.write(browserLines+"\r\n"+trackLine+"\r\n"); } private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read) throws IOException { fw.write("fixedStep chrom="+read.chrom() +" start="+frame.start() + " step=1\r\n"); tmpWigWriter.write("fixedStep chrom="+read.chrom() +" start="+frame.start() + " step=1\r\n"); } private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException { // TODO Auto-generated method stub for(int base = frame.start(); base <= frame.end(); base++){ fw.write(frame.getHit(base)+"\r\n"); tmpWigWriter.write(frame.getHit(base)+"\r\n"); } } private ReadFrame computeNextRead(Scanner afScanner) { ReducedReadLine rl = null; try { rl = new ReducedReadLine(afScanner.nextLine()); } catch (ChromosomeFormatException e) { e.printStackTrace(); } catch (ChromosomeNotFoundException e) { e.printStackTrace(); } catch (RangeFormatException e) { e.printStackTrace(); } catch (RangeLimitNotFoundException e) { e.printStackTrace(); } return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end()); } }