annotate java-genomics-toolkit/src/edu/unc/genomics/wigmath/WigMathTool.java @ 0:1daf3026d231

Upload alpha version
author timpalpant
date Mon, 13 Feb 2012 21:55:55 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
1 package edu.unc.genomics.wigmath;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
2
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
3 import java.io.BufferedWriter;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
4 import java.io.IOException;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
5 import java.nio.charset.Charset;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
6 import java.nio.file.Files;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
7 import java.nio.file.Path;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
8 import java.util.ArrayList;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
9 import java.util.HashSet;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
10 import java.util.Iterator;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
11 import java.util.List;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
12 import java.util.Set;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
13
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
14 import org.apache.log4j.Logger;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
15
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
16 import com.beust.jcommander.Parameter;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
17
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
18 import edu.unc.genomics.CommandLineTool;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
19 import edu.unc.genomics.io.WigFile;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
20 import edu.unc.genomics.io.WigFileException;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
21
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
22 /**
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
23 * Abstract class for writing programs to do computation on Wig files
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
24 * Concrete subclasses must implement the compute method
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
25 *
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
26 * @author timpalpant
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
27 *
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
28 */
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
29 public abstract class WigMathTool extends CommandLineTool {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
30
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
31 private static final Logger log = Logger.getLogger(WigMathTool.class);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
32
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
33 public static final int DEFAULT_CHUNK_SIZE = 500_000;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
34
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
35 // TODO: Variable resolution output
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
36
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
37 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
38 public Path outputFile;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
39
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
40 protected List<WigFile> inputs = new ArrayList<WigFile>();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
41
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
42 public void addInputFile(WigFile wig) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
43 inputs.add(wig);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
44 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
45
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
46 /**
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
47 * Setup the computation. Should add all input Wig files
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
48 * with addInputFile() during setup
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
49 */
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
50 public abstract void setup();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
51
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
52 /**
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
53 * Do the computation on a chunk and return the results
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
54 * Must return (stop-start+1) values
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
55 *
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
56 * @param chr
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
57 * @param start
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
58 * @param stop
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
59 * @return the results of the computation for this chunk
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
60 * @throws IOException
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
61 * @throws WigFileException
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
62 */
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
63 public abstract float[] compute(String chr, int start, int stop)
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
64 throws IOException, WigFileException;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
65
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
66 @Override
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
67 public void run() throws IOException {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
68 log.debug("Executing setup operations");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
69 setup();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
70
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
71 log.debug("Processing files and writing result to disk");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
72 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
73 // Write the Wig header
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
74 writer.write("track type=wiggle_0");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
75 writer.newLine();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
76
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
77 Set<String> chromosomes = getCommonChromosomes(inputs);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
78 log.debug("Found " + chromosomes.size() + " chromosomes in common between all inputs");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
79 for (String chr : chromosomes) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
80 int start = getMaxChrStart(inputs, chr);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
81 int stop = getMinChrStop(inputs, chr);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
82 log.debug("Processing chromosome " + chr + " region " + start + "-" + stop);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
83
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
84 // Write the chromosome header to output
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
85 writer.write("fixedStep chrom="+chr+" start="+start+" step=1 span=1");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
86 writer.newLine();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
87
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
88 // Process the chromosome in chunks
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
89 int bp = start;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
90 while (bp < stop) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
91 int chunkStart = bp;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
92 int chunkStop = Math.min(bp+DEFAULT_CHUNK_SIZE-1, stop);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
93 int expectedLength = chunkStop - chunkStart + 1;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
94 log.debug("Processing chunk "+chr+":"+chunkStart+"-"+chunkStop);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
95
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
96 float[] result = null;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
97 try {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
98 result = compute(chr, chunkStart, chunkStop);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
99 } catch (WigFileException e) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
100 log.fatal("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
101 e.printStackTrace();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
102 throw new RuntimeException("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
103 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
104
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
105 if (result.length != expectedLength) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
106 log.error("Expected result length="+expectedLength+", got="+result.length);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
107 throw new RuntimeException("Result is not the expected length!");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
108 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
109
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
110 for (int i = 0; i < result.length; i++) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
111 writer.write(Float.toString(result[i]));
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
112 writer.newLine();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
113 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
114
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
115 bp = chunkStop + 1;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
116 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
117 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
118 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
119
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
120 for (WigFile wig : inputs) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
121 wig.close();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
122 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
123 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
124
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
125 public int getMaxChrStart(List<WigFile> wigs, String chr) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
126 int max = -1;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
127 for (WigFile wig : wigs) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
128 if (wig.getChrStart(chr) > max) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
129 max = wig.getChrStart(chr);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
130 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
131 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
132
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
133 return max;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
134 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
135
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
136 public int getMinChrStop(List<WigFile> wigs, String chr) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
137 if (wigs.size() == 0) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
138 return -1;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
139 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
140
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
141 int min = Integer.MAX_VALUE;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
142 for (WigFile wig : wigs) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
143 if (wig.getChrStop(chr) < min) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
144 min = wig.getChrStop(chr);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
145 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
146 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
147
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
148 return min;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
149 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
150
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
151 public Set<String> getCommonChromosomes(List<WigFile> wigs) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
152 if (wigs.size() == 0) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
153 return new HashSet<String>();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
154 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
155
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
156 Set<String> chromosomes = wigs.get(0).chromosomes();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
157 Iterator<String> it = chromosomes.iterator();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
158 while(it.hasNext()) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
159 String chr = it.next();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
160 for (WigFile wig : wigs) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
161 if (!wig.includes(chr)) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
162 it.remove();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
163 break;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
164 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
165 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
166 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
167
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
168 return chromosomes;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
169 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
170 }