Mercurial > repos > timpalpant > java_genomics_toolkit
comparison java-genomics-toolkit/src/edu/unc/genomics/nucleosomes/GreedyCaller.java @ 0:1daf3026d231
Upload alpha version
author | timpalpant |
---|---|
date | Mon, 13 Feb 2012 21:55:55 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1daf3026d231 |
---|---|
1 package edu.unc.genomics.nucleosomes; | |
2 | |
3 import java.io.BufferedWriter; | |
4 import java.io.IOException; | |
5 import java.nio.charset.Charset; | |
6 import java.nio.file.Files; | |
7 import java.nio.file.Path; | |
8 import java.util.Iterator; | |
9 | |
10 import org.apache.log4j.Logger; | |
11 import org.broad.igv.bbfile.WigItem; | |
12 | |
13 import com.beust.jcommander.Parameter; | |
14 import edu.unc.genomics.CommandLineTool; | |
15 import edu.unc.genomics.CommandLineToolException; | |
16 import edu.unc.genomics.ReadablePathValidator; | |
17 import edu.unc.genomics.io.WigFile; | |
18 import edu.unc.genomics.io.WigFileException; | |
19 import edu.unc.utils.SortUtils; | |
20 | |
21 public class GreedyCaller extends CommandLineTool { | |
22 | |
23 private static final Logger log = Logger.getLogger(GreedyCaller.class); | |
24 | |
25 private static final int CHUNK_SIZE = 500_000; | |
26 | |
27 @Parameter(names = {"-d", "--dyads"}, description = "Dyad counts file", required = true, validateWith = ReadablePathValidator.class) | |
28 public WigFile dyadsFile; | |
29 @Parameter(names = {"-s", "--smoothed"}, description = "Smoothed dyad counts file", required = true, validateWith = ReadablePathValidator.class) | |
30 public WigFile smoothedDyadsFile; | |
31 @Parameter(names = {"-n", "--size"}, description = "Nucleosome size (bp)") | |
32 public int nucleosomeSize = 147; | |
33 @Parameter(names = {"-o", "--output"}, description = "Output file", required = true) | |
34 public Path outputFile; | |
35 | |
36 public void run() throws IOException { | |
37 int halfNuc = nucleosomeSize / 2; | |
38 | |
39 try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) { | |
40 for (String chr : smoothedDyadsFile.chromosomes()) { | |
41 log.debug("Processing chromosome "+chr); | |
42 int chunkStart = smoothedDyadsFile.getChrStart(chr); | |
43 int chrStop = smoothedDyadsFile.getChrStop(chr); | |
44 while (chunkStart < chrStop) { | |
45 int chunkStop = chunkStart + CHUNK_SIZE; | |
46 int paddedStart = chunkStart - nucleosomeSize; | |
47 int paddedStop = chunkStop + nucleosomeSize; | |
48 log.debug("Processing chunk "+chunkStart+"-"+chunkStop); | |
49 | |
50 log.debug("Loading data and sorting"); | |
51 Iterator<WigItem> dyadsIter; | |
52 Iterator<WigItem> smoothedIter; | |
53 try { | |
54 dyadsIter = dyadsFile.query(chr, paddedStart, paddedStop); | |
55 smoothedIter = smoothedDyadsFile.query(chr, paddedStart, paddedStop); | |
56 } catch (IOException | WigFileException e) { | |
57 e.printStackTrace(); | |
58 throw new CommandLineToolException("Error loading data from Wig file"); | |
59 } | |
60 | |
61 float[] dyads = WigFile.flattenData(dyadsIter, paddedStart, paddedStop); | |
62 float[] smoothed = WigFile.flattenData(smoothedIter, paddedStart, paddedStop); | |
63 int[] sortedIndices = SortUtils.indexSort(smoothed); | |
64 | |
65 // Proceed through the data in descending order | |
66 log.debug("Calling nucleosomes"); | |
67 for (int j = sortedIndices.length; j >= 0; j++) { | |
68 int i = sortedIndices[j]; | |
69 int dyad = paddedStart + i; | |
70 | |
71 if (smoothed[i] > 0) { | |
72 int nucStart = Math.max(1, dyad-halfNuc); | |
73 int nucStop = Math.min(dyad+halfNuc, chrStop); | |
74 NucleosomeCall call = new NucleosomeCall(chr, nucStart, nucStop); | |
75 call.setDyad(dyad); | |
76 | |
77 double occupancy = 0; | |
78 double weightedSum = 0; | |
79 double smoothedSum = 0; | |
80 double sumOfSquares = 0; | |
81 for (int bp = nucStart; bp <= nucStop; bp++) { | |
82 occupancy += dyads[bp-paddedStart]; | |
83 weightedSum += bp * dyads[bp-paddedStart]; | |
84 smoothedSum += smoothed[bp-paddedStart]; | |
85 sumOfSquares += bp * bp * dyads[bp-paddedStart]; | |
86 } | |
87 call.setOccupancy(occupancy); | |
88 | |
89 if (occupancy > 0) { | |
90 call.setDyadMean(Math.round(weightedSum/occupancy)); | |
91 call.setConditionalPosition(smoothed[i] / smoothedSum); | |
92 double variance = (sumOfSquares - weightedSum*call.getDyadMean()) / occupancy; | |
93 call.setDyadStdev(Math.sqrt(variance)); | |
94 | |
95 // Only write nucleosomes within the current chunk to disk | |
96 if (chunkStart <= dyad && dyad <= chunkStop) { | |
97 writer.write(call.toString()); | |
98 writer.newLine(); | |
99 } | |
100 | |
101 // Don't allow nucleosome calls overlapping this nucleosome | |
102 int low = Math.max(i-nucleosomeSize, 0); | |
103 int high = Math.min(i-nucleosomeSize, paddedStop-1); | |
104 for (int k = low; k <= high; k++) { | |
105 smoothed[k] = 0; | |
106 } | |
107 } | |
108 } | |
109 } | |
110 | |
111 chunkStart = chunkStop + 1; | |
112 } | |
113 } | |
114 } | |
115 } | |
116 | |
117 public static void main(String[] args) { | |
118 new GreedyCaller().instanceMain(args); | |
119 } | |
120 } |