comparison src/edu/unc/genomics/wigmath/WigMathTool.java @ 2:e16016635b2a

Uploaded
author timpalpant
date Mon, 13 Feb 2012 22:12:06 -0500
parents
children
comparison
equal deleted inserted replaced
1:a54db233ee3d 2:e16016635b2a
1 package edu.unc.genomics.wigmath;
2
3 import java.io.BufferedWriter;
4 import java.io.IOException;
5 import java.nio.charset.Charset;
6 import java.nio.file.Files;
7 import java.nio.file.Path;
8 import java.util.ArrayList;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Set;
13
14 import org.apache.log4j.Logger;
15
16 import com.beust.jcommander.Parameter;
17
18 import edu.unc.genomics.CommandLineTool;
19 import edu.unc.genomics.io.WigFile;
20 import edu.unc.genomics.io.WigFileException;
21
22 /**
23 * Abstract class for writing programs to do computation on Wig files
24 * Concrete subclasses must implement the compute method
25 *
26 * @author timpalpant
27 *
28 */
29 public abstract class WigMathTool extends CommandLineTool {
30
31 private static final Logger log = Logger.getLogger(WigMathTool.class);
32
33 public static final int DEFAULT_CHUNK_SIZE = 500_000;
34
35 // TODO: Variable resolution output
36
37 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
38 public Path outputFile;
39
40 protected List<WigFile> inputs = new ArrayList<WigFile>();
41
42 public void addInputFile(WigFile wig) {
43 inputs.add(wig);
44 }
45
46 /**
47 * Setup the computation. Should add all input Wig files
48 * with addInputFile() during setup
49 */
50 public abstract void setup();
51
52 /**
53 * Do the computation on a chunk and return the results
54 * Must return (stop-start+1) values
55 *
56 * @param chr
57 * @param start
58 * @param stop
59 * @return the results of the computation for this chunk
60 * @throws IOException
61 * @throws WigFileException
62 */
63 public abstract float[] compute(String chr, int start, int stop)
64 throws IOException, WigFileException;
65
66 @Override
67 public void run() throws IOException {
68 log.debug("Executing setup operations");
69 setup();
70
71 log.debug("Processing files and writing result to disk");
72 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
73 // Write the Wig header
74 writer.write("track type=wiggle_0");
75 writer.newLine();
76
77 Set<String> chromosomes = getCommonChromosomes(inputs);
78 log.debug("Found " + chromosomes.size() + " chromosomes in common between all inputs");
79 for (String chr : chromosomes) {
80 int start = getMaxChrStart(inputs, chr);
81 int stop = getMinChrStop(inputs, chr);
82 log.debug("Processing chromosome " + chr + " region " + start + "-" + stop);
83
84 // Write the chromosome header to output
85 writer.write("fixedStep chrom="+chr+" start="+start+" step=1 span=1");
86 writer.newLine();
87
88 // Process the chromosome in chunks
89 int bp = start;
90 while (bp < stop) {
91 int chunkStart = bp;
92 int chunkStop = Math.min(bp+DEFAULT_CHUNK_SIZE-1, stop);
93 int expectedLength = chunkStop - chunkStart + 1;
94 log.debug("Processing chunk "+chr+":"+chunkStart+"-"+chunkStop);
95
96 float[] result = null;
97 try {
98 result = compute(chr, chunkStart, chunkStop);
99 } catch (WigFileException e) {
100 log.fatal("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
101 e.printStackTrace();
102 throw new RuntimeException("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
103 }
104
105 if (result.length != expectedLength) {
106 log.error("Expected result length="+expectedLength+", got="+result.length);
107 throw new RuntimeException("Result is not the expected length!");
108 }
109
110 for (int i = 0; i < result.length; i++) {
111 writer.write(Float.toString(result[i]));
112 writer.newLine();
113 }
114
115 bp = chunkStop + 1;
116 }
117 }
118 }
119
120 for (WigFile wig : inputs) {
121 wig.close();
122 }
123 }
124
125 public int getMaxChrStart(List<WigFile> wigs, String chr) {
126 int max = -1;
127 for (WigFile wig : wigs) {
128 if (wig.getChrStart(chr) > max) {
129 max = wig.getChrStart(chr);
130 }
131 }
132
133 return max;
134 }
135
136 public int getMinChrStop(List<WigFile> wigs, String chr) {
137 if (wigs.size() == 0) {
138 return -1;
139 }
140
141 int min = Integer.MAX_VALUE;
142 for (WigFile wig : wigs) {
143 if (wig.getChrStop(chr) < min) {
144 min = wig.getChrStop(chr);
145 }
146 }
147
148 return min;
149 }
150
151 public Set<String> getCommonChromosomes(List<WigFile> wigs) {
152 if (wigs.size() == 0) {
153 return new HashSet<String>();
154 }
155
156 Set<String> chromosomes = wigs.get(0).chromosomes();
157 Iterator<String> it = chromosomes.iterator();
158 while(it.hasNext()) {
159 String chr = it.next();
160 for (WigFile wig : wigs) {
161 if (!wig.includes(chr)) {
162 it.remove();
163 break;
164 }
165 }
166 }
167
168 return chromosomes;
169 }
170 }