annotate java-genomics-toolkit/src/edu/unc/genomics/nucleosomes/FindBoundaryNucleosomes.java @ 0:1daf3026d231

Upload alpha version
author timpalpant
date Mon, 13 Feb 2012 21:55:55 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
1 package edu.unc.genomics.nucleosomes;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
2
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
3 import java.io.BufferedWriter;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
4 import java.io.IOException;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
5 import java.nio.charset.Charset;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
6 import java.nio.file.Files;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
7 import java.nio.file.Path;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
8 import java.util.ArrayList;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
9 import java.util.Collections;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
10 import java.util.HashMap;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
11 import java.util.List;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
12 import java.util.Map;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
13
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
14 import org.apache.log4j.Logger;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
15
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
16 import com.beust.jcommander.Parameter;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
17
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
18 import edu.unc.genomics.CommandLineTool;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
19 import edu.unc.genomics.Interval;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
20 import edu.unc.genomics.ReadablePathValidator;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
21 import edu.unc.genomics.io.IntervalFile;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
22
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
23 public class FindBoundaryNucleosomes extends CommandLineTool {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
24
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
25 private static final Logger log = Logger.getLogger(FindBoundaryNucleosomes.class);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
26
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
27 @Parameter(names = {"-i", "--input"}, description = "Input file (nucleosome calls)", required = true, validateWith = ReadablePathValidator.class)
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
28 public Path inputFile;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
29 @Parameter(names = {"-l", "--loci"}, description = "Boundary loci (Bed format)", required = true)
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
30 public IntervalFile<? extends Interval> lociFile;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
31 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true)
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
32 public Path outputFile;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
33
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
34 private Map<String,List<NucleosomeCall>> nucs = new HashMap<>();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
35
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
36 private List<NucleosomeCall> getIntervalNucleosomes(Interval i) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
37 List<NucleosomeCall> intervalNucs = new ArrayList<>();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
38 for (NucleosomeCall call : nucs.get(i.getChr())) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
39 if (call.getDyad() >= i.low() && call.getDyad() <= i.high()) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
40 intervalNucs.add(call);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
41 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
42 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
43
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
44 return intervalNucs;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
45 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
46
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
47 @Override
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
48 public void run() throws IOException {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
49 log.debug("Initializing input file");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
50 NucleosomeCallsFile nucsFile = new NucleosomeCallsFile(inputFile);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
51 log.debug("Loading all nucleosomes");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
52 for (NucleosomeCall nuc : nucsFile) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
53 if (nuc == null) continue;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
54 if (!nucs.containsKey(nuc.getChr())) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
55 nucs.put(nuc.getChr(), new ArrayList<NucleosomeCall>());
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
56 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
57 nucs.get(nuc.getChr()).add(nuc);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
58 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
59 nucsFile.close();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
60
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
61 log.debug("Initializing output file");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
62 int skipped = 0;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
63 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
64 log.debug("Finding boundary nucleosomes for each interval");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
65 NucleosomeCall.DyadComparator comparator = new NucleosomeCall.DyadComparator();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
66 for (Interval interval : lociFile) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
67 writer.write(interval.toBed());
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
68
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
69 // Get all of the nucleosomes within this interval
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
70 List<NucleosomeCall> intervalNucs = getIntervalNucleosomes(interval);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
71
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
72 if (intervalNucs.size() > 0) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
73 // Sort the list by nucleosome position
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
74 Collections.sort(intervalNucs, comparator);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
75 if (interval.isCrick()) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
76 Collections.reverse(intervalNucs);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
77 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
78
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
79 int fivePrime = intervalNucs.get(0).getDyad();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
80 int threePrime = intervalNucs.get(intervalNucs.size()-1).getDyad();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
81 writer.write("\t"+fivePrime+"\t"+threePrime);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
82 } else {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
83 skipped++;
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
84 writer.write("\tNA\tNA");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
85 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
86
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
87 writer.newLine();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
88 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
89 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
90
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
91 lociFile.close();
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
92 log.info("Skipped "+skipped+" intervals with 0 nucleosomes");
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
93 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
94
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
95 public static void main(String[] args) {
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
96 new FindBoundaryNucleosomes().instanceMain(args);
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
97 }
1daf3026d231 Upload alpha version
timpalpant
parents:
diff changeset
98 }