2
|
1 package edu.unc.genomics.ngs;
|
|
2
|
|
3 import java.io.BufferedWriter;
|
|
4 import java.io.IOException;
|
|
5 import java.nio.charset.Charset;
|
|
6 import java.nio.file.Files;
|
|
7 import java.nio.file.Path;
|
|
8 import java.nio.file.Paths;
|
|
9 import java.util.ArrayList;
|
|
10 import java.util.Iterator;
|
|
11 import java.util.List;
|
|
12
|
|
13 import org.apache.commons.lang3.StringUtils;
|
|
14 import org.apache.commons.math.stat.descriptive.SummaryStatistics;
|
|
15 import org.apache.log4j.Logger;
|
|
16 import org.broad.igv.bbfile.WigItem;
|
|
17
|
|
18 import com.beust.jcommander.Parameter;
|
|
19
|
|
20 import edu.unc.genomics.CommandLineTool;
|
|
21 import edu.unc.genomics.Interval;
|
|
22 import edu.unc.genomics.io.IntervalFile;
|
|
23 import edu.unc.genomics.io.WigFile;
|
|
24 import edu.unc.genomics.io.WigFileException;
|
|
25
|
|
26 public class IntervalStats extends CommandLineTool {
|
|
27
|
|
28 private static final Logger log = Logger.getLogger(IntervalStats.class);
|
|
29
|
|
30 @Parameter(description = "Input files", required = true)
|
|
31 public List<String> inputFiles = new ArrayList<String>();
|
|
32 @Parameter(names = {"-l", "--loci"}, description = "Loci file (Bed)", required = true)
|
|
33 public IntervalFile<? extends Interval> lociFile;
|
|
34 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
|
|
35 public Path outputFile;
|
|
36
|
|
37 private List<WigFile> wigs = new ArrayList<>();
|
|
38
|
|
39 @Override
|
|
40 public void run() throws IOException {
|
|
41 log.debug("Initializing input Wig file(s)");
|
|
42 for (String inputFile : inputFiles) {
|
|
43 try {
|
|
44 WigFile wig = WigFile.autodetect(Paths.get(inputFile));
|
|
45 wigs.add(wig);
|
|
46 } catch (WigFileException e) {
|
|
47 log.error("Error initializing Wig input file: " + inputFile);
|
|
48 e.printStackTrace();
|
|
49 throw new RuntimeException("Error initializing Wig input file: " + inputFile);
|
|
50 }
|
|
51 }
|
|
52
|
|
53 log.debug("Initializing output file");
|
|
54 int count = 0, skipped = 0;
|
|
55 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
|
|
56 writer.write("#Chr\tStart\tStop\tID\tValue\tStrand");
|
|
57 for (String inputFile : inputFiles) {
|
|
58 Path p = Paths.get(inputFile);
|
|
59 writer.write("\t" + p.getFileName().toString());
|
|
60 }
|
|
61 writer.newLine();
|
|
62
|
|
63 log.debug("Iterating over all intervals and computing statistics");
|
|
64 SummaryStatistics stats = new SummaryStatistics();
|
|
65 for (Interval interval : lociFile) {
|
|
66 List<Double> means = new ArrayList<>(wigs.size());
|
|
67 for (WigFile wig : wigs) {
|
|
68 stats.clear();
|
|
69 try {
|
|
70 Iterator<WigItem> result = wig.query(interval);
|
|
71 while(result.hasNext()) {
|
|
72 WigItem item = result.next();
|
|
73 for (int i = item.getStartBase(); i <= item.getEndBase(); i++) {
|
|
74 stats.addValue(item.getWigValue());
|
|
75 }
|
|
76 }
|
|
77 means.add(stats.getMean());
|
|
78 } catch (WigFileException e) {
|
|
79 means.add(Double.NaN);
|
|
80 skipped++;
|
|
81 }
|
|
82 }
|
|
83
|
|
84 writer.write(interval.toBed() + "\t" + StringUtils.join(means, "\t"));
|
|
85 writer.newLine();
|
|
86 count++;
|
|
87 }
|
|
88 }
|
|
89
|
|
90 lociFile.close();
|
|
91 for (WigFile wig : wigs) {
|
|
92 wig.close();
|
|
93 }
|
|
94 log.info(count + " intervals processed");
|
|
95 log.info(skipped + " interval skipped");
|
|
96 }
|
|
97
|
|
98 public static void main(String[] args) {
|
|
99 new IntervalStats().instanceMain(args);
|
|
100 }
|
|
101
|
|
102 }
|