Mercurial > repos > timpalpant > java_genomics_toolkit
comparison java-genomics-toolkit/src/edu/unc/genomics/nucleosomes/FindBoundaryNucleosomes.java @ 0:1daf3026d231
Upload alpha version
| author | timpalpant |
|---|---|
| date | Mon, 13 Feb 2012 21:55:55 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1daf3026d231 |
|---|---|
| 1 package edu.unc.genomics.nucleosomes; | |
| 2 | |
| 3 import java.io.BufferedWriter; | |
| 4 import java.io.IOException; | |
| 5 import java.nio.charset.Charset; | |
| 6 import java.nio.file.Files; | |
| 7 import java.nio.file.Path; | |
| 8 import java.util.ArrayList; | |
| 9 import java.util.Collections; | |
| 10 import java.util.HashMap; | |
| 11 import java.util.List; | |
| 12 import java.util.Map; | |
| 13 | |
| 14 import org.apache.log4j.Logger; | |
| 15 | |
| 16 import com.beust.jcommander.Parameter; | |
| 17 | |
| 18 import edu.unc.genomics.CommandLineTool; | |
| 19 import edu.unc.genomics.Interval; | |
| 20 import edu.unc.genomics.ReadablePathValidator; | |
| 21 import edu.unc.genomics.io.IntervalFile; | |
| 22 | |
| 23 public class FindBoundaryNucleosomes extends CommandLineTool { | |
| 24 | |
| 25 private static final Logger log = Logger.getLogger(FindBoundaryNucleosomes.class); | |
| 26 | |
| 27 @Parameter(names = {"-i", "--input"}, description = "Input file (nucleosome calls)", required = true, validateWith = ReadablePathValidator.class) | |
| 28 public Path inputFile; | |
| 29 @Parameter(names = {"-l", "--loci"}, description = "Boundary loci (Bed format)", required = true) | |
| 30 public IntervalFile<? extends Interval> lociFile; | |
| 31 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true) | |
| 32 public Path outputFile; | |
| 33 | |
| 34 private Map<String,List<NucleosomeCall>> nucs = new HashMap<>(); | |
| 35 | |
| 36 private List<NucleosomeCall> getIntervalNucleosomes(Interval i) { | |
| 37 List<NucleosomeCall> intervalNucs = new ArrayList<>(); | |
| 38 for (NucleosomeCall call : nucs.get(i.getChr())) { | |
| 39 if (call.getDyad() >= i.low() && call.getDyad() <= i.high()) { | |
| 40 intervalNucs.add(call); | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 return intervalNucs; | |
| 45 } | |
| 46 | |
| 47 @Override | |
| 48 public void run() throws IOException { | |
| 49 log.debug("Initializing input file"); | |
| 50 NucleosomeCallsFile nucsFile = new NucleosomeCallsFile(inputFile); | |
| 51 log.debug("Loading all nucleosomes"); | |
| 52 for (NucleosomeCall nuc : nucsFile) { | |
| 53 if (nuc == null) continue; | |
| 54 if (!nucs.containsKey(nuc.getChr())) { | |
| 55 nucs.put(nuc.getChr(), new ArrayList<NucleosomeCall>()); | |
| 56 } | |
| 57 nucs.get(nuc.getChr()).add(nuc); | |
| 58 } | |
| 59 nucsFile.close(); | |
| 60 | |
| 61 log.debug("Initializing output file"); | |
| 62 int skipped = 0; | |
| 63 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) { | |
| 64 log.debug("Finding boundary nucleosomes for each interval"); | |
| 65 NucleosomeCall.DyadComparator comparator = new NucleosomeCall.DyadComparator(); | |
| 66 for (Interval interval : lociFile) { | |
| 67 writer.write(interval.toBed()); | |
| 68 | |
| 69 // Get all of the nucleosomes within this interval | |
| 70 List<NucleosomeCall> intervalNucs = getIntervalNucleosomes(interval); | |
| 71 | |
| 72 if (intervalNucs.size() > 0) { | |
| 73 // Sort the list by nucleosome position | |
| 74 Collections.sort(intervalNucs, comparator); | |
| 75 if (interval.isCrick()) { | |
| 76 Collections.reverse(intervalNucs); | |
| 77 } | |
| 78 | |
| 79 int fivePrime = intervalNucs.get(0).getDyad(); | |
| 80 int threePrime = intervalNucs.get(intervalNucs.size()-1).getDyad(); | |
| 81 writer.write("\t"+fivePrime+"\t"+threePrime); | |
| 82 } else { | |
| 83 skipped++; | |
| 84 writer.write("\tNA\tNA"); | |
| 85 } | |
| 86 | |
| 87 writer.newLine(); | |
| 88 } | |
| 89 } | |
| 90 | |
| 91 lociFile.close(); | |
| 92 log.info("Skipped "+skipped+" intervals with 0 nucleosomes"); | |
| 93 } | |
| 94 | |
| 95 public static void main(String[] args) { | |
| 96 new FindBoundaryNucleosomes().instanceMain(args); | |
| 97 } | |
| 98 } |
