Mercurial > repos > pfrommolt > ngsrich
diff NGSrich_0.5.5/src/converters/Read2Wig.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NGSrich_0.5.5/src/converters/Read2Wig.java Mon Nov 21 08:12:19 2011 -0500 @@ -0,0 +1,267 @@ +package converters; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Scanner; + +import datastructures.GenomeFrame; +import datastructures.ReadFrame; +import datastructures.ReducedReadLine; +import exceptions.ChromosomeFormatException; +import exceptions.ChromosomeNotFoundException; +import exceptions.RangeFormatException; +import exceptions.RangeLimitNotFoundException; + +import middlewares.Misc; + +/** + * <P>This is a converter class, wich convert a reduced alignment file with the + * following format:</P> + * <TABLE> + * <TR> + * <TD width= "140"><B>read-name</B></TD> + * <TD width= "200"><B>chromosom-name</B></TD> + * <TD width= "150"><B>start-position</B></TD> + * <TD width= "200"><B>end-position</B></TD> + * </TR> + * <TR height=""></TR> + * </TABLE> + * <P>to the wiggle-format. The wiggle format (WIG) allows the display of continuous- + * valued data in a track format and it is used to visualize the read enrichment + * with the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway" target="_new">ucsc genome browser</a>. Click on the following link: + * <a href="http://genome.ucsc.edu/goldenPath/help/wiggle.html" target="_new"> + * http://genome.ucsc.edu/goldenPath/help/wiggle.html</a> for more information.</P> + * + * @author Ali Abdallah + * @version 06.01.2011 + * @since jdk 1.6.0 + */ + +public class Read2Wig { + + private static final int LENGTH = 1024; + private File alignFile; + private File outputFile; + private String outputDir; + private int gMin = Integer.MAX_VALUE; + private int gMax = Integer.MIN_VALUE; + private FileWriter tmpWigWriter; + + /** + * Constructs and initialzes a new Read2Wig object. + * Converts the read alignment file to a overall covrage wig file. + * + * @param alignFileName the name of the alignment file. + * @throws IOException + */ + public Read2Wig(String alignFileName,String outPrefix,String outputDir,String genome,String tmpDir) throws IOException{ + // Das File-Objekt zur Behandlung der Alignment-Datei erzeugen. + this.alignFile = new File(alignFileName); + this.outputDir = outputDir+Misc.slash(outputDir); + tmpWigWriter = new FileWriter(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig"); + convert(alignFileName,this.outputDir,outPrefix); + try{ + wigToBigWig("hg19",tmpDir); + } + catch(Exception e){ + System.err.println("Converting wig file to a bigwig file failed. " + + "Check whether you have a 64-bit linux system!"); + } + } + + private void convert(String alignFileName, String outputDir,String outPrefix) throws FileNotFoundException, IOException { + Scanner s = new Scanner(this.alignFile); + computeExtremas(s); s.close(); + Scanner readScanner = new Scanner(this.alignFile); + alignFileName = Misc.prefix(alignFileName); + Scanner as = new Scanner(alignFileName); + as.useDelimiter("_"); + as.next(); + outputFile = new File(outputDir+outPrefix+".wig"); + FileWriter fw = new FileWriter(outputFile); + + annotationHeader(fw); + + ReadFrame readF = computeNextRead(readScanner); //Current read frame + GenomeFrame frame = new GenomeFrame(readF.start(), LENGTH); //Current base frame + String chrom = readF.chrom(); //Current chromosome + if(frame.contains(readF)){frame.updateHits(readF);} //Sum up all hits of curr read + writeHeader(fw, frame, readF); //Write header line + + while(true){ + while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.contains(readF)){ + frame.updateHits(readF); readF = computeNextRead(readScanner); + } + while(readScanner.hasNextLine() && chrom.equals(readF.chrom()) && frame.overlaps(readF) && !frame.limitExceeded(readF)){ + frame.updateFrameFromRightEnd(readF); frame.updateHits(readF); readF = computeNextRead(readScanner); + } + if(!readScanner.hasNextLine()){break;} + else if(!chrom.equals(readF.chrom())){ + frame = new GenomeFrame(readF.start(), LENGTH); + writeHeader(fw, frame, readF); + chrom = readF.chrom(); + } + else if(!frame.overlaps(readF)){ + GenomeFrame last = frame; writeFrame(fw, last); + frame = new GenomeFrame(readF.start(), LENGTH); + writeFrameLeak(fw, last, frame, readF); + } + else if(frame.limitExceeded(readF)){ + writeFramePortion(fw, frame, readF.start()); frame.updateFrameFromBothEnds(readF, LENGTH); + } + } + fw.close(); + tmpWigWriter.close(); + readScanner.close(); + } + + private void wigToBigWig(String genome,String tmpDir){ + String scriptDir = Misc.binDir()+Misc.slash(Misc.binDir()); + String wig2bw = scriptDir + "../thirdparty/wigToBigWig"; + String fetchChromSizes = scriptDir + "fetchChromSizes"; + + try { + Process p = Runtime.getRuntime().exec("sh "+fetchChromSizes+" "+genome); + FileWriter fw = new FileWriter(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes"); + Scanner s = new Scanner(p.getInputStream()); + while(s.hasNextLine()) + fw.write(s.nextLine()+"\r\n"); + fw.close(); + + Runtime.getRuntime().exec(wig2bw+" "+outputFile.getAbsolutePath() + +" "+outputDir+Misc.slash(outputDir)+genome+".chrom.sizes "+ + outputDir+Misc.slash(outputDir)+ + Misc.prefix(outputFile.getAbsolutePath())+".bw"); + new File(outputDir+Misc.slash(outputDir)+genome+".chrom.sizes").delete(); + new File(tmpDir+Misc.slash(tmpDir)+"tmpWig.wig").delete(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + private void computeExtremas(Scanner s){ + String chrom = "Datei falsch formatiert."; + String oldChrom = null; + int start = -1; + int end = -1; + String line = s.nextLine(); + + oldChrom = chrom(line); start = start(line); end = end(line); + + while(s.hasNextLine()){ + line = s.nextLine(); + if(isHeader(line)){ + chrom = chrom(line); start = start(line); end = end(line); + if(gMin > start && chrom.equals(oldChrom)) + gMin = start; + + if(gMax < end && chrom.equals(oldChrom)) + gMax = end; + } + } + } + + private boolean isHeader(String line){ + return line.indexOf("chr")!=-1; + } + + private int start(String line){ + Scanner s = new Scanner(line); + s.next();s.next(); + return s.nextInt(); + } + + private int end(String line){ + Scanner s = new Scanner(line); + s.next();s.next();s.next(); + return s.nextInt(); + } + + private String chrom(String line){ + Scanner s = new Scanner(line); + s.next(); + return s.next(); + } + + private void writeFramePortion(FileWriter fw, GenomeFrame frame, int start2) throws IOException { + for(int base = frame.start(); base < start2; base++){ + fw.write(frame.getHit(base)+"\r\n"); + tmpWigWriter.write(frame.getHit(base)+"\r\n"); + } + } + + private void writeFrameLeak(FileWriter fw, + GenomeFrame last, GenomeFrame frame, ReadFrame read) + throws IOException { + if(frame.start()-last.end() > 50){ + writeHeader(fw, frame, read); + } + else{ + for(int i = 0; i < frame.start()-last.end()-1; i++){ + fw.write(0+"\r\n"); + tmpWigWriter.write(0+"\r\n"); + } + } + } + + private void annotationHeader(FileWriter fw) throws IOException{ + String browserLines = "browser position chr1:"+gMin+"-"+gMax+"\r\n"+ + "browser hide all"+"\r\n"+ + "browser pack refGene encodeRegions"+"\r\n"+ + "browser full altGraph"; + Scanner as = new Scanner(alignFile.getName()); + as.useDelimiter("_");as.next(); + String trackLine = "track type=wiggle_0 name=\""+as.next()+"\" " + + "description=\"Base read coverage\" visibility=full " + + "color=0,0,0 altColor=255,0,0 priority=20 " + + "autoScale=on"; + fw.write(browserLines+"\r\n"+trackLine+"\r\n"); + } + + private void writeHeader(FileWriter fw, GenomeFrame frame, ReadFrame read) + throws IOException { + fw.write("fixedStep chrom="+read.chrom() + +" start="+frame.start() + + " step=1\r\n"); + tmpWigWriter.write("fixedStep chrom="+read.chrom() + +" start="+frame.start() + + " step=1\r\n"); + } + + private void writeFrame(FileWriter fw, GenomeFrame frame) throws IOException { + // TODO Auto-generated method stub + for(int base = frame.start(); base <= frame.end(); base++){ + fw.write(frame.getHit(base)+"\r\n"); + tmpWigWriter.write(frame.getHit(base)+"\r\n"); + } + } + + private ReadFrame computeNextRead(Scanner afScanner) { + ReducedReadLine rl = null; + try { + rl = new ReducedReadLine(afScanner.nextLine()); + } catch (ChromosomeFormatException e) { + e.printStackTrace(); + } catch (ChromosomeNotFoundException e) { + e.printStackTrace(); + } catch (RangeFormatException e) { + e.printStackTrace(); + } catch (RangeLimitNotFoundException e) { + e.printStackTrace(); + } + return new ReadFrame(rl.name(), rl.chrom(), rl.start(), rl.end()); + } + +} + + + + + + + + +