0
|
1 package edu.unc.genomics.nucleosomes;
|
|
2
|
|
3 import java.io.BufferedWriter;
|
|
4 import java.io.IOException;
|
|
5 import java.nio.charset.Charset;
|
|
6 import java.nio.file.Files;
|
|
7 import java.nio.file.Path;
|
|
8 import java.util.ArrayList;
|
|
9 import java.util.Collections;
|
|
10 import java.util.HashMap;
|
|
11 import java.util.List;
|
|
12 import java.util.Map;
|
|
13
|
|
14 import org.apache.log4j.Logger;
|
|
15
|
|
16 import com.beust.jcommander.Parameter;
|
|
17
|
|
18 import edu.unc.genomics.CommandLineTool;
|
|
19 import edu.unc.genomics.Interval;
|
|
20 import edu.unc.genomics.ReadablePathValidator;
|
|
21 import edu.unc.genomics.io.IntervalFile;
|
|
22
|
|
23 public class FindBoundaryNucleosomes extends CommandLineTool {
|
|
24
|
|
25 private static final Logger log = Logger.getLogger(FindBoundaryNucleosomes.class);
|
|
26
|
|
27 @Parameter(names = {"-i", "--input"}, description = "Input file (nucleosome calls)", required = true, validateWith = ReadablePathValidator.class)
|
|
28 public Path inputFile;
|
|
29 @Parameter(names = {"-l", "--loci"}, description = "Boundary loci (Bed format)", required = true)
|
|
30 public IntervalFile<? extends Interval> lociFile;
|
|
31 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
|
|
32 public Path outputFile;
|
|
33
|
|
34 private Map<String,List<NucleosomeCall>> nucs = new HashMap<>();
|
|
35
|
|
36 private List<NucleosomeCall> getIntervalNucleosomes(Interval i) {
|
|
37 List<NucleosomeCall> intervalNucs = new ArrayList<>();
|
|
38 for (NucleosomeCall call : nucs.get(i.getChr())) {
|
|
39 if (call.getDyad() >= i.low() && call.getDyad() <= i.high()) {
|
|
40 intervalNucs.add(call);
|
|
41 }
|
|
42 }
|
|
43
|
|
44 return intervalNucs;
|
|
45 }
|
|
46
|
|
47 @Override
|
|
48 public void run() throws IOException {
|
|
49 log.debug("Initializing input file");
|
|
50 NucleosomeCallsFile nucsFile = new NucleosomeCallsFile(inputFile);
|
|
51 log.debug("Loading all nucleosomes");
|
|
52 for (NucleosomeCall nuc : nucsFile) {
|
|
53 if (nuc == null) continue;
|
|
54 if (!nucs.containsKey(nuc.getChr())) {
|
|
55 nucs.put(nuc.getChr(), new ArrayList<NucleosomeCall>());
|
|
56 }
|
|
57 nucs.get(nuc.getChr()).add(nuc);
|
|
58 }
|
|
59 nucsFile.close();
|
|
60
|
|
61 log.debug("Initializing output file");
|
|
62 int skipped = 0;
|
|
63 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
|
|
64 log.debug("Finding boundary nucleosomes for each interval");
|
|
65 NucleosomeCall.DyadComparator comparator = new NucleosomeCall.DyadComparator();
|
|
66 for (Interval interval : lociFile) {
|
|
67 writer.write(interval.toBed());
|
|
68
|
|
69 // Get all of the nucleosomes within this interval
|
|
70 List<NucleosomeCall> intervalNucs = getIntervalNucleosomes(interval);
|
|
71
|
|
72 if (intervalNucs.size() > 0) {
|
|
73 // Sort the list by nucleosome position
|
|
74 Collections.sort(intervalNucs, comparator);
|
|
75 if (interval.isCrick()) {
|
|
76 Collections.reverse(intervalNucs);
|
|
77 }
|
|
78
|
|
79 int fivePrime = intervalNucs.get(0).getDyad();
|
|
80 int threePrime = intervalNucs.get(intervalNucs.size()-1).getDyad();
|
|
81 writer.write("\t"+fivePrime+"\t"+threePrime);
|
|
82 } else {
|
|
83 skipped++;
|
|
84 writer.write("\tNA\tNA");
|
|
85 }
|
|
86
|
|
87 writer.newLine();
|
|
88 }
|
|
89 }
|
|
90
|
|
91 lociFile.close();
|
|
92 log.info("Skipped "+skipped+" intervals with 0 nucleosomes");
|
|
93 }
|
|
94
|
|
95 public static void main(String[] args) {
|
|
96 new FindBoundaryNucleosomes().instanceMain(args);
|
|
97 }
|
|
98 } |