0
|
1 package edu.unc.genomics.wigmath;
|
|
2
|
|
3 import java.io.BufferedWriter;
|
|
4 import java.io.IOException;
|
|
5 import java.nio.charset.Charset;
|
|
6 import java.nio.file.Files;
|
|
7 import java.nio.file.Path;
|
|
8 import java.util.ArrayList;
|
|
9 import java.util.HashSet;
|
|
10 import java.util.Iterator;
|
|
11 import java.util.List;
|
|
12 import java.util.Set;
|
|
13
|
|
14 import org.apache.log4j.Logger;
|
|
15
|
|
16 import com.beust.jcommander.Parameter;
|
|
17
|
|
18 import edu.unc.genomics.CommandLineTool;
|
|
19 import edu.unc.genomics.io.WigFile;
|
|
20 import edu.unc.genomics.io.WigFileException;
|
|
21
|
|
22 /**
|
|
23 * Abstract class for writing programs to do computation on Wig files
|
|
24 * Concrete subclasses must implement the compute method
|
|
25 *
|
|
26 * @author timpalpant
|
|
27 *
|
|
28 */
|
|
29 public abstract class WigMathTool extends CommandLineTool {
|
|
30
|
|
31 private static final Logger log = Logger.getLogger(WigMathTool.class);
|
|
32
|
|
33 public static final int DEFAULT_CHUNK_SIZE = 500_000;
|
|
34
|
|
35 // TODO: Variable resolution output
|
|
36
|
|
37 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
|
|
38 public Path outputFile;
|
|
39
|
|
40 protected List<WigFile> inputs = new ArrayList<WigFile>();
|
|
41
|
|
42 public void addInputFile(WigFile wig) {
|
|
43 inputs.add(wig);
|
|
44 }
|
|
45
|
|
46 /**
|
|
47 * Setup the computation. Should add all input Wig files
|
|
48 * with addInputFile() during setup
|
|
49 */
|
|
50 public abstract void setup();
|
|
51
|
|
52 /**
|
|
53 * Do the computation on a chunk and return the results
|
|
54 * Must return (stop-start+1) values
|
|
55 *
|
|
56 * @param chr
|
|
57 * @param start
|
|
58 * @param stop
|
|
59 * @return the results of the computation for this chunk
|
|
60 * @throws IOException
|
|
61 * @throws WigFileException
|
|
62 */
|
|
63 public abstract float[] compute(String chr, int start, int stop)
|
|
64 throws IOException, WigFileException;
|
|
65
|
|
66 @Override
|
|
67 public void run() throws IOException {
|
|
68 log.debug("Executing setup operations");
|
|
69 setup();
|
|
70
|
|
71 log.debug("Processing files and writing result to disk");
|
|
72 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
|
|
73 // Write the Wig header
|
|
74 writer.write("track type=wiggle_0");
|
|
75 writer.newLine();
|
|
76
|
|
77 Set<String> chromosomes = getCommonChromosomes(inputs);
|
|
78 log.debug("Found " + chromosomes.size() + " chromosomes in common between all inputs");
|
|
79 for (String chr : chromosomes) {
|
|
80 int start = getMaxChrStart(inputs, chr);
|
|
81 int stop = getMinChrStop(inputs, chr);
|
|
82 log.debug("Processing chromosome " + chr + " region " + start + "-" + stop);
|
|
83
|
|
84 // Write the chromosome header to output
|
|
85 writer.write("fixedStep chrom="+chr+" start="+start+" step=1 span=1");
|
|
86 writer.newLine();
|
|
87
|
|
88 // Process the chromosome in chunks
|
|
89 int bp = start;
|
|
90 while (bp < stop) {
|
|
91 int chunkStart = bp;
|
|
92 int chunkStop = Math.min(bp+DEFAULT_CHUNK_SIZE-1, stop);
|
|
93 int expectedLength = chunkStop - chunkStart + 1;
|
|
94 log.debug("Processing chunk "+chr+":"+chunkStart+"-"+chunkStop);
|
|
95
|
|
96 float[] result = null;
|
|
97 try {
|
|
98 result = compute(chr, chunkStart, chunkStop);
|
|
99 } catch (WigFileException e) {
|
|
100 log.fatal("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
|
|
101 e.printStackTrace();
|
|
102 throw new RuntimeException("Wig file error while processing chunk " + chr + " region " + start + "-" + stop);
|
|
103 }
|
|
104
|
|
105 if (result.length != expectedLength) {
|
|
106 log.error("Expected result length="+expectedLength+", got="+result.length);
|
|
107 throw new RuntimeException("Result is not the expected length!");
|
|
108 }
|
|
109
|
|
110 for (int i = 0; i < result.length; i++) {
|
|
111 writer.write(Float.toString(result[i]));
|
|
112 writer.newLine();
|
|
113 }
|
|
114
|
|
115 bp = chunkStop + 1;
|
|
116 }
|
|
117 }
|
|
118 }
|
|
119
|
|
120 for (WigFile wig : inputs) {
|
|
121 wig.close();
|
|
122 }
|
|
123 }
|
|
124
|
|
125 public int getMaxChrStart(List<WigFile> wigs, String chr) {
|
|
126 int max = -1;
|
|
127 for (WigFile wig : wigs) {
|
|
128 if (wig.getChrStart(chr) > max) {
|
|
129 max = wig.getChrStart(chr);
|
|
130 }
|
|
131 }
|
|
132
|
|
133 return max;
|
|
134 }
|
|
135
|
|
136 public int getMinChrStop(List<WigFile> wigs, String chr) {
|
|
137 if (wigs.size() == 0) {
|
|
138 return -1;
|
|
139 }
|
|
140
|
|
141 int min = Integer.MAX_VALUE;
|
|
142 for (WigFile wig : wigs) {
|
|
143 if (wig.getChrStop(chr) < min) {
|
|
144 min = wig.getChrStop(chr);
|
|
145 }
|
|
146 }
|
|
147
|
|
148 return min;
|
|
149 }
|
|
150
|
|
151 public Set<String> getCommonChromosomes(List<WigFile> wigs) {
|
|
152 if (wigs.size() == 0) {
|
|
153 return new HashSet<String>();
|
|
154 }
|
|
155
|
|
156 Set<String> chromosomes = wigs.get(0).chromosomes();
|
|
157 Iterator<String> it = chromosomes.iterator();
|
|
158 while(it.hasNext()) {
|
|
159 String chr = it.next();
|
|
160 for (WigFile wig : wigs) {
|
|
161 if (!wig.includes(chr)) {
|
|
162 it.remove();
|
|
163 break;
|
|
164 }
|
|
165 }
|
|
166 }
|
|
167
|
|
168 return chromosomes;
|
|
169 }
|
|
170 }
|