annotate src/edu/unc/genomics/wigmath/WigMathTool.java @ 2:e16016635b2a

Uploaded
author timpalpant
date Mon, 13 Feb 2012 22:12:06 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
1 package edu.unc.genomics.wigmath;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
2
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
3 import java.io.BufferedWriter;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
4 import java.io.IOException;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
5 import java.nio.charset.Charset;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
6 import java.nio.file.Files;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
7 import java.nio.file.Path;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
8 import java.util.ArrayList;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
9 import java.util.HashSet;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
10 import java.util.Iterator;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
11 import java.util.List;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
12 import java.util.Set;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
13
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
14 import org.apache.log4j.Logger;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
15
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
16 import com.beust.jcommander.Parameter;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
17
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
18 import edu.unc.genomics.CommandLineTool;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
19 import edu.unc.genomics.io.WigFile;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
20 import edu.unc.genomics.io.WigFileException;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
21
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
22 /**
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
23 * Abstract class for writing programs to do computation on Wig files
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
24 * Concrete subclasses must implement the compute method
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
25 *
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
26 * @author timpalpant
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
27 *
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
28 */
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
29 public abstract class WigMathTool extends CommandLineTool {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
30
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
31 private static final Logger log = Logger.getLogger(WigMathTool.class);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
32
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
33 public static final int DEFAULT_CHUNK_SIZE = 500_000;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
34
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
35 // TODO: Variable resolution output
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
36
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
37 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
38 public Path outputFile;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
39
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
40 protected List<WigFile> inputs = new ArrayList<WigFile>();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
41
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
42 public void addInputFile(WigFile wig) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
43 inputs.add(wig);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
44 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
45
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
46 /**
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
47 * Setup the computation. Should add all input Wig files
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
48 * with addInputFile() during setup
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
49 */
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
50 public abstract void setup();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
51
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
52 /**
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
53 * Do the computation on a chunk and return the results
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
54 * Must return (stop-start+1) values
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
55 *
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
56 * @param chr
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
57 * @param start
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
58 * @param stop
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
59 * @return the results of the computation for this chunk
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
60 * @throws IOException
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
61 * @throws WigFileException
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
62 */
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
63 public abstract float[] compute(String chr, int start, int stop)
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
64 throws IOException, WigFileException;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
65
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
66 @Override
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
67 public void run() throws IOException {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
68 log.debug("Executing setup operations");
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
69 setup();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
70
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
71 log.debug("Processing files and writing result to disk");
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
72 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
73 // Write the Wig header
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
74 writer.write("track type=wiggle_0");
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
75 writer.newLine();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
76
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
77 Set<String> chromosomes = getCommonChromosomes(inputs);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
78 log.debug("Found " + chromosomes.size() + " chromosomes in common between all inputs");
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
79 for (String chr : chromosomes) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
80 int start = getMaxChrStart(inputs, chr);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
81 int stop = getMinChrStop(inputs, chr);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
82 log.debug("Processing chromosome " + chr + " region " + start + "-" + stop);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
83
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
84 // Write the chromosome header to output
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
85 writer.write("fixedStep chrom="+chr+" start="+start+" step=1 span=1");
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
86 writer.newLine();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
87
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
88 // Process the chromosome in chunks
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
89 int bp = start;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
90 while (bp < stop) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
91 int chunkStart = bp;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
92 int chunkStop = Math.min(bp+DEFAULT_CHUNK_SIZE-1, stop);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
93 int expectedLength = chunkStop - chunkStart + 1;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
94 log.debug("Processing chunk "+chr+":"+chunkStart+"-"+chunkStop);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
95
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
96 float[] result = null;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
97 try {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
98 result = compute(chr, chunkStart, chunkStop);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
99 } catch (WigFileException e) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
100 log.fatal("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
101 e.printStackTrace();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
102 throw new RuntimeException("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
103 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
104
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
105 if (result.length != expectedLength) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
106 log.error("Expected result length="+expectedLength+", got="+result.length);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
107 throw new RuntimeException("Result is not the expected length!");
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
108 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
109
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
110 for (int i = 0; i < result.length; i++) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
111 writer.write(Float.toString(result[i]));
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
112 writer.newLine();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
113 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
114
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
115 bp = chunkStop + 1;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
116 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
117 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
118 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
119
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
120 for (WigFile wig : inputs) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
121 wig.close();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
122 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
123 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
124
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
125 public int getMaxChrStart(List<WigFile> wigs, String chr) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
126 int max = -1;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
127 for (WigFile wig : wigs) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
128 if (wig.getChrStart(chr) > max) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
129 max = wig.getChrStart(chr);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
130 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
131 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
132
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
133 return max;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
134 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
135
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
136 public int getMinChrStop(List<WigFile> wigs, String chr) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
137 if (wigs.size() == 0) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
138 return -1;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
139 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
140
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
141 int min = Integer.MAX_VALUE;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
142 for (WigFile wig : wigs) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
143 if (wig.getChrStop(chr) < min) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
144 min = wig.getChrStop(chr);
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
145 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
146 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
147
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
148 return min;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
149 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
150
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
151 public Set<String> getCommonChromosomes(List<WigFile> wigs) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
152 if (wigs.size() == 0) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
153 return new HashSet<String>();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
154 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
155
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
156 Set<String> chromosomes = wigs.get(0).chromosomes();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
157 Iterator<String> it = chromosomes.iterator();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
158 while(it.hasNext()) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
159 String chr = it.next();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
160 for (WigFile wig : wigs) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
161 if (!wig.includes(chr)) {
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
162 it.remove();
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
163 break;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
164 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
165 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
166 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
167
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
168 return chromosomes;
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
169 }
e16016635b2a Uploaded
timpalpant
parents:
diff changeset
170 }