diff java-genomics-toolkit/src/edu/unc/genomics/ngs/PowerSpectrum.java @ 0:1daf3026d231

Upload alpha version
author timpalpant
date Mon, 13 Feb 2012 21:55:55 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/java-genomics-toolkit/src/edu/unc/genomics/ngs/PowerSpectrum.java	Mon Feb 13 21:55:55 2012 -0500
@@ -0,0 +1,106 @@
+package edu.unc.genomics.ngs;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Iterator;
+
+import org.apache.log4j.Logger;
+import org.broad.igv.bbfile.WigItem;
+
+import com.beust.jcommander.Parameter;
+
+import edu.emory.mathcs.jtransforms.fft.FloatFFT_1D;
+import edu.unc.genomics.CommandLineTool;
+import edu.unc.genomics.Interval;
+import edu.unc.genomics.io.IntervalFile;
+import edu.unc.genomics.io.WigFile;
+import edu.unc.genomics.io.WigFileException;
+
+public class PowerSpectrum extends CommandLineTool {
+	
+	private static final Logger log = Logger.getLogger(PowerSpectrum.class);
+
+	@Parameter(names = {"-i", "--input"}, description = "Input file (Wig)", required = true)
+	public WigFile inputFile;
+	@Parameter(names = {"-l", "--loci"}, description = "Genomic loci (Bed format)", required = true)
+	public IntervalFile<? extends Interval> loci;
+	@Parameter(names = {"-o", "--output"}, description = "Output file (tabular)", required = true)
+	public Path outputFile;
+		
+	/**
+	 * Computes the power spectrum from FFT data
+	 * taking into accound even/odd length arrays
+	 * refer to JTransforms documentation for layout of the FFT data
+	 * @param f
+	 * @return
+	 */
+	private float[] abs2(float[] f) {
+		int n = f.length;
+		float[] ps = new float[n/2+1];
+		// DC component
+		ps[0] = (f[0]*f[0]) / (n*n); 
+		
+		// Even
+		if (n % 2 == 0) {
+			for (int k = 1; k < n/2; k++) {
+				ps[k] = f[2*k]*f[2*k] + f[2*k+1]*f[2*k+1];
+			}
+			ps[n/2] = f[1]*f[1];
+		// Odd
+		} else {
+			for (int k = 1; k < (n-1)/2; k++) {
+				ps[k] = f[2*k]*f[2*k] + f[2*k+1]*f[2*k+1];
+			}
+			
+			ps[(n-1)/2] = f[n-1]*f[n-1] + f[1]*f[1];
+		}
+		
+		return ps;
+	}
+	
+	public void run() throws IOException {		
+		try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
+			log.debug("Computing power spectrum for each window");
+			int skipped = 0;
+			for (Interval interval : loci) {
+				Iterator<WigItem> wigIter;
+				try {
+					wigIter = inputFile.query(interval);
+				} catch (IOException | WigFileException e) {
+					log.debug("Skipping interval: " + interval.toString());
+					skipped++;
+					continue;
+				}
+				
+				float[] data = WigFile.flattenData(wigIter, interval.getStart(), interval.getStop());
+				// Compute the power spectrum
+				FloatFFT_1D fft = new FloatFFT_1D(data.length);
+				fft.realForward(data);
+				float[] ps = abs2(data);
+				// and normalize the power spectrum
+				float sum = 0;
+				for (int i = 1; i < ps.length; i++) {
+					sum += ps[i];
+				}
+				for (int i = 1; i < ps.length; i++) {
+					ps[i] /= sum;
+				}
+	
+				writer.write(interval.toBed());
+				for (int i = 1; i < Math.min(ps.length, 40); i++) {
+					writer.write("\t"+ps[i]);
+				}
+				writer.newLine();
+			}
+			
+			log.info("Skipped " + skipped + " intervals");
+		}		
+	}
+	
+	public static void main(String[] args) {
+		new PowerSpectrum().instanceMain(args);
+	}
+}
\ No newline at end of file