diff java-genomics-toolkit/src/edu/unc/genomics/wigmath/WigMathTool.java @ 0:1daf3026d231

Upload alpha version
author timpalpant
date Mon, 13 Feb 2012 21:55:55 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/java-genomics-toolkit/src/edu/unc/genomics/wigmath/WigMathTool.java	Mon Feb 13 21:55:55 2012 -0500
@@ -0,0 +1,170 @@
+package edu.unc.genomics.wigmath;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+
+import com.beust.jcommander.Parameter;
+
+import edu.unc.genomics.CommandLineTool;
+import edu.unc.genomics.io.WigFile;
+import edu.unc.genomics.io.WigFileException;
+
+/**
+ * Abstract class for writing programs to do computation on Wig files
+ * Concrete subclasses must implement the compute method
+ * 
+ * @author timpalpant
+ *
+ */
+public abstract class WigMathTool extends CommandLineTool {
+	
+	private static final Logger log = Logger.getLogger(WigMathTool.class);
+	
+	public static final int DEFAULT_CHUNK_SIZE = 500_000;
+	
+	// TODO: Variable resolution output
+	
+	@Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
+	public Path outputFile;
+	
+	protected List<WigFile> inputs = new ArrayList<WigFile>();
+	
+	public void addInputFile(WigFile wig) {
+		inputs.add(wig);
+	}
+	
+	/**
+	 * Setup the computation. Should add all input Wig files
+	 * with addInputFile() during setup
+	 */
+	public abstract void setup();
+	
+	/**
+	 * Do the computation on a chunk and return the results
+	 * Must return (stop-start+1) values
+	 * 
+	 * @param chr
+	 * @param start
+	 * @param stop
+	 * @return the results of the computation for this chunk
+	 * @throws IOException
+	 * @throws WigFileException
+	 */
+	public abstract float[] compute(String chr, int start, int stop)
+			 throws IOException, WigFileException;
+	
+	@Override
+	public void run() throws IOException {
+		log.debug("Executing setup operations");
+		setup();
+		
+		log.debug("Processing files and writing result to disk");
+		try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
+			// Write the Wig header
+			writer.write("track type=wiggle_0");
+			writer.newLine();
+			
+			Set<String> chromosomes = getCommonChromosomes(inputs);
+			log.debug("Found " + chromosomes.size() + " chromosomes in common between all inputs");
+			for (String chr : chromosomes) {
+				int start = getMaxChrStart(inputs, chr);
+				int stop = getMinChrStop(inputs, chr);
+				log.debug("Processing chromosome " + chr + " region " + start + "-" + stop);
+				
+				// Write the chromosome header to output
+				writer.write("fixedStep chrom="+chr+" start="+start+" step=1 span=1");
+				writer.newLine();
+				
+				// Process the chromosome in chunks
+				int bp = start;
+				while (bp < stop) {
+					int chunkStart = bp;
+					int chunkStop = Math.min(bp+DEFAULT_CHUNK_SIZE-1, stop);
+					int expectedLength = chunkStop - chunkStart + 1;
+					log.debug("Processing chunk "+chr+":"+chunkStart+"-"+chunkStop);
+					
+					float[] result = null;
+					try {
+						result = compute(chr, chunkStart, chunkStop);
+					} catch (WigFileException e) {
+						log.fatal("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
+						e.printStackTrace();
+						throw new RuntimeException("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
+					}
+					
+					if (result.length != expectedLength) {
+						log.error("Expected result length="+expectedLength+", got="+result.length);
+						throw new RuntimeException("Result is not the expected length!");
+					}
+	
+					for (int i = 0; i < result.length; i++) {
+						writer.write(Float.toString(result[i]));
+						writer.newLine();
+					}
+					
+					bp = chunkStop + 1;
+				}
+			}
+		}
+		
+		for (WigFile wig : inputs) {
+			wig.close();
+		}
+	}
+	
+	public int getMaxChrStart(List<WigFile> wigs, String chr) {
+		int max = -1;
+		for (WigFile wig : wigs) {
+			if (wig.getChrStart(chr) > max) {
+				max = wig.getChrStart(chr);
+			}
+		}
+		
+		return max;
+	}
+	
+	public int getMinChrStop(List<WigFile> wigs, String chr) {
+		if (wigs.size() == 0) {
+			return -1;
+		}
+		
+		int min = Integer.MAX_VALUE;
+		for (WigFile wig : wigs) {
+			if (wig.getChrStop(chr) < min) {
+				min = wig.getChrStop(chr);
+			}
+		}
+		
+		return min;
+	}
+	
+	public Set<String> getCommonChromosomes(List<WigFile> wigs) {
+		if (wigs.size() == 0) {
+			return new HashSet<String>();
+		}
+		
+		Set<String> chromosomes = wigs.get(0).chromosomes();
+		Iterator<String> it = chromosomes.iterator();
+		while(it.hasNext()) {
+			String chr = it.next();
+			for (WigFile wig : wigs) {
+				if (!wig.includes(chr)) {
+					it.remove();
+					break;
+				}
+			}
+		}
+
+		return chromosomes;
+	}
+}