Mercurial > repos > timpalpant > java_genomics_toolkit
comparison java-genomics-toolkit/src/edu/unc/genomics/nucleosomes/FindBoundaryNucleosomes.java @ 0:1daf3026d231
Upload alpha version
author | timpalpant |
---|---|
date | Mon, 13 Feb 2012 21:55:55 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1daf3026d231 |
---|---|
1 package edu.unc.genomics.nucleosomes; | |
2 | |
3 import java.io.BufferedWriter; | |
4 import java.io.IOException; | |
5 import java.nio.charset.Charset; | |
6 import java.nio.file.Files; | |
7 import java.nio.file.Path; | |
8 import java.util.ArrayList; | |
9 import java.util.Collections; | |
10 import java.util.HashMap; | |
11 import java.util.List; | |
12 import java.util.Map; | |
13 | |
14 import org.apache.log4j.Logger; | |
15 | |
16 import com.beust.jcommander.Parameter; | |
17 | |
18 import edu.unc.genomics.CommandLineTool; | |
19 import edu.unc.genomics.Interval; | |
20 import edu.unc.genomics.ReadablePathValidator; | |
21 import edu.unc.genomics.io.IntervalFile; | |
22 | |
23 public class FindBoundaryNucleosomes extends CommandLineTool { | |
24 | |
25 private static final Logger log = Logger.getLogger(FindBoundaryNucleosomes.class); | |
26 | |
27 @Parameter(names = {"-i", "--input"}, description = "Input file (nucleosome calls)", required = true, validateWith = ReadablePathValidator.class) | |
28 public Path inputFile; | |
29 @Parameter(names = {"-l", "--loci"}, description = "Boundary loci (Bed format)", required = true) | |
30 public IntervalFile<? extends Interval> lociFile; | |
31 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true) | |
32 public Path outputFile; | |
33 | |
34 private Map<String,List<NucleosomeCall>> nucs = new HashMap<>(); | |
35 | |
36 private List<NucleosomeCall> getIntervalNucleosomes(Interval i) { | |
37 List<NucleosomeCall> intervalNucs = new ArrayList<>(); | |
38 for (NucleosomeCall call : nucs.get(i.getChr())) { | |
39 if (call.getDyad() >= i.low() && call.getDyad() <= i.high()) { | |
40 intervalNucs.add(call); | |
41 } | |
42 } | |
43 | |
44 return intervalNucs; | |
45 } | |
46 | |
47 @Override | |
48 public void run() throws IOException { | |
49 log.debug("Initializing input file"); | |
50 NucleosomeCallsFile nucsFile = new NucleosomeCallsFile(inputFile); | |
51 log.debug("Loading all nucleosomes"); | |
52 for (NucleosomeCall nuc : nucsFile) { | |
53 if (nuc == null) continue; | |
54 if (!nucs.containsKey(nuc.getChr())) { | |
55 nucs.put(nuc.getChr(), new ArrayList<NucleosomeCall>()); | |
56 } | |
57 nucs.get(nuc.getChr()).add(nuc); | |
58 } | |
59 nucsFile.close(); | |
60 | |
61 log.debug("Initializing output file"); | |
62 int skipped = 0; | |
63 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) { | |
64 log.debug("Finding boundary nucleosomes for each interval"); | |
65 NucleosomeCall.DyadComparator comparator = new NucleosomeCall.DyadComparator(); | |
66 for (Interval interval : lociFile) { | |
67 writer.write(interval.toBed()); | |
68 | |
69 // Get all of the nucleosomes within this interval | |
70 List<NucleosomeCall> intervalNucs = getIntervalNucleosomes(interval); | |
71 | |
72 if (intervalNucs.size() > 0) { | |
73 // Sort the list by nucleosome position | |
74 Collections.sort(intervalNucs, comparator); | |
75 if (interval.isCrick()) { | |
76 Collections.reverse(intervalNucs); | |
77 } | |
78 | |
79 int fivePrime = intervalNucs.get(0).getDyad(); | |
80 int threePrime = intervalNucs.get(intervalNucs.size()-1).getDyad(); | |
81 writer.write("\t"+fivePrime+"\t"+threePrime); | |
82 } else { | |
83 skipped++; | |
84 writer.write("\tNA\tNA"); | |
85 } | |
86 | |
87 writer.newLine(); | |
88 } | |
89 } | |
90 | |
91 lociFile.close(); | |
92 log.info("Skipped "+skipped+" intervals with 0 nucleosomes"); | |
93 } | |
94 | |
95 public static void main(String[] args) { | |
96 new FindBoundaryNucleosomes().instanceMain(args); | |
97 } | |
98 } |