annotate NGSrich_0.5.5/src/_main/EnrichmentStatsComputer.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
1 package _main;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
2
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
3 import java.io.File;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
4 import java.io.FileNotFoundException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
5 import java.io.FileWriter;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
6 import java.io.IOException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
7 import java.util.Scanner;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
8
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
9 import org.jdom.Element;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
10
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
11 import datastructures.ReducedReadLine;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
12 import datastructures.TargetLine;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
13 import exceptions.ChromosomeFormatException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
14 import exceptions.ChromosomeNotFoundException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
15 import exceptions.NullOrNegativeRangeException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
16 import exceptions.RangeFormatException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
17 import exceptions.RangeLimitNotFoundException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
18
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
19 import middlewares.GeneExtractor;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
20 import middlewares.HitsCounter;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
21 import middlewares.Misc;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
22 import middlewares.ReadCounter;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
23 import middlewares.XMLSummaryFileBuilder;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
24
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
25 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
26 * This class is for computation of the coverage, BED coverage and statistics
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
27 * output files.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
28 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
29 * @author Ali Abdallah
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
30 * @version 02.02.2011
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
31 * @since jdk 1.6.0
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
32 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
33
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
34 public class EnrichmentStatsComputer {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
35
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
36 /* ***********************************************************************
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
37 * FIELDS OF THE CLASS
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
38 * ***********************************************************************/
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
39 private String
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
40 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
41 * 4-column alignment file sorted by alignment subject and position.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
42 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
43 align,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
44 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
45 * 3-column target file sorted by subject and target starting position.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
46 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
47 target,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
48 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
49 * Annotation file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
50 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
51 genome,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
52 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
53 * Statistics file in the xml-format.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
54 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
55 summary,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
56 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
57 * Coverage file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
58 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
59 details,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
60 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
61 * Coverage BED file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
62 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
63 targetCS,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
64 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
65 * The absolute name of the read alignment file without the extension.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
66 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
67 prefix,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
68
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
69 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
70 * The directory under which temporary data are saved (temporary directory).
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
71 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
72 tmpDir,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
73 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
74 * The directory of the computed results (output directory).
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
75 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
76 outDir;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
77
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
78
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
79 private Scanner
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
80 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
81 * Scanner of the read alignment file (sam format).
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
82 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
83 rScanner,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
84 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
85 * Scanner of the genome annotation file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
86 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
87 gScanner,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
88 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
89 * Scanner of the file of targets.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
90 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
91 tScanner;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
92
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
93 private FileWriter
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
94 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
95 * Writer used to write the computed statistics into the statistics file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
96 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
97 summaryWriter,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
98 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
99 * Writer used to write coverage data at base-level into the coverage file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
100 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
101 detailsWriter,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
102 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
103 * Writer used to write the summarized coverage data at target level into
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
104 * the coverage BED file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
105 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
106 targetCSWriter;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
107
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
108 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
109 * Counter used to counts the coverage frequencies at different levels.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
110 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
111 private HitsCounter hc;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
112 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
113 * Counter used to count both: Reads exactly on the targets or overlapping
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
114 * the targets and Reads doing this in a scope of 100 resp. 200 bases from
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
115 * both end.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
116 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
117 private ReadCounter rc;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
118
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
119 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
120 * A Gene Extractor used to extract the genes overlapping the specified
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
121 * targets.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
122 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
123 private GeneExtractor ge;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
124
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
125 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
126 * Counter for the reads.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
127 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
128 private int numReads = 0,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
129 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
130 * Counter for all target bases.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
131 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
132 targetBases = 0,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
133 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
134 * Counter for the bases in a specific target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
135 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
136 currTargetBases = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
137
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
138 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
139 * The average Coverage of the targets.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
140 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
141 private double avCov = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
142 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
143 * The average Coverage of the current observed target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
144 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
145 private double curAvCov = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
146 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
147 * Flag indicating the end of the read file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
148 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
149 private boolean endReads = false;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
150 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
151 * Flag indicates if the coverage line was written or not.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
152 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
153 private boolean covWritten = false;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
154
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
155 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
156 * Contains always the current observers read line from the sam file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
157 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
158 private ReducedReadLine rLine;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
159
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
160
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
161 private int rlength;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
162
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
163 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
164 * ID's of the leaf-tags in the XML summary file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
165 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
166 private static final int
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
167 from30xPERC = 28, from30xBASES = 27,from20xPERC = 26, from20xBASES = 25,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
168 from10xPERC = 24, from10xBASES = 23,from5xPERC = 22, from5xBASES = 21,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
169 from1xPERC = 20, from1xBASES = 19, OV200P = 18, OV_200 = 17,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
170 OV100P = 16, OV_100 = 15, OV_PERC = 14, OV = 13, ON200P = 12,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
171 ON_200 = 11, ON100P = 10, ON_100 = 9, ON_PERC = 8, ON = 7,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
172 STANDARD_ERROR = 6, AVERAGE_COVERAGE = 5, TARGET_BASES = 4,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
173 NUMBER_OF_READS = 3, READ_LENGTH = 2, SAMPLE_NAME = 1, SAMPLE_NUMBER = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
174
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
175 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
176 * Constructs a EnrichmentStatsComputer object.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
177 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
178 * @param prefix the name of the sam file without its extension.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
179 * @param proc the number of the process running the software.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
180 * @param tmpDir the temporary directory
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
181 * @param outDir the output directory.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
182 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
183 public EnrichmentStatsComputer(String prefix, String proc, String tmpDir,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
184 String outDir) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
185 super();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
186
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
187 this.prefix = prefix;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
188
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
189 // Setting the paths.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
190 this.tmpDir = tmpDir + Misc.slash(tmpDir);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
191 this.outDir = outDir + Misc.slash(outDir);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
192 this.align = this.tmpDir + "NGSrich_" + prefix + "_" + proc+ ".txt";
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
193 this.target = this.tmpDir + "NGSrich_target_" + proc + ".txt";
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
194 this.genome = this.tmpDir + "NGSrich_genome_" + proc + ".txt";
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
195 this.details = this.tmpDir + "coverage_" + proc + ".txt";
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
196 this.summary = this.outDir + "data/" + prefix + "_enrichment.xml";
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
197 this.targetCS = this.outDir + "data/" + prefix + "_enrichment.bed";
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
198
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
199 // Constructing and initializing various objects.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
200 try {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
201 init();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
202 } catch (ChromosomeFormatException e) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
203 e.printStackTrace();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
204 } catch (ChromosomeNotFoundException e) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
205 e.printStackTrace();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
206 } catch (RangeFormatException e) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
207 e.printStackTrace();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
208 } catch (RangeLimitNotFoundException e) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
209 e.printStackTrace();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
210 } catch (IOException e) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
211 e.printStackTrace();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
212 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
213 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
214
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
215 public void init() throws IOException, ChromosomeFormatException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
216 ChromosomeNotFoundException, RangeFormatException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
217 RangeLimitNotFoundException {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
218 hc = new HitsCounter();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
219 rc = new ReadCounter();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
220 rScanner = new Scanner(new File(align));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
221 tScanner = new Scanner(new File(target));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
222 gScanner = new Scanner(new File(genome));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
223 File sum = new File(summary);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
224 if (!sum.exists()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
225 sum.createNewFile();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
226 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
227 summaryWriter = new FileWriter(sum);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
228 detailsWriter = new FileWriter(new File(details));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
229 targetCSWriter = new FileWriter(new File(targetCS));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
230 rLine = new ReducedReadLine(rScanner.nextLine());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
231 rlength = (rLine.isForwardRead())?rLine.end()-rLine.start()+1
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
232 :rLine.start()-rLine.end()+1;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
233 numReads++;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
234 ge = new GeneExtractor(genome);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
235 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
236
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
237 private void finit() throws IOException {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
238 summaryWriter.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
239 rScanner.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
240 tScanner.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
241 gScanner.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
242 detailsWriter.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
243 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
244
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
245 /* ***********************************************************************
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
246 * MAIN COMPUTATION
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
247 * ***********************************************************************/
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
248 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
249 * The main computation method.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
250 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
251 * @throws IOException if the coverage file or the summary file can't be
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
252 * written.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
253 * @throws NullOrNegativeRangeException
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
254 * @throws RangeLimitNotFoundException
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
255 * @throws ChromosomeNotFoundException
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
256 * @throws ChromosomeFormatException
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
257 * @throws RangeFormatException
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
258 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
259 public void compute() throws IOException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
260 RangeFormatException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
261 ChromosomeFormatException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
262 ChromosomeNotFoundException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
263 RangeLimitNotFoundException,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
264 NullOrNegativeRangeException {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
265 // run over all targets.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
266 while (tScanner.hasNextLine()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
267 // Set a flag indicating if the target file and the read file are
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
268 // at the same chromosome level. (Standard value: false)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
269 boolean chromAlreadyReached = false;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
270 // Read the next target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
271 TargetLine tLine = new TargetLine(tScanner.nextLine());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
272 // Save the number of target bases.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
273 currTargetBases = tLine.end() - tLine.start() + 1;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
274 // Initialize the array of the number if hits on each base of the
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
275 // target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
276 int[] bases = new int[currTargetBases];
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
277 // Update the current total number of observed target bases.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
278 targetBases += currTargetBases;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
279 // Initialize the average coverage of the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
280 curAvCov = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
281 // Set a flag indicating if the coverage data for the current target
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
282 // are already written or not.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
283 covWritten = false;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
284 // Find the gene overlapping the current target by using the
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
285 // logarithmic time gene extractor.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
286 String currTargetGene = ge.extractGene(tLine);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
287
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
288 // Stop if end of the alignment file is reached.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
289 while (!endReads) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
290 // Otherwise.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
291 // Check if the alignment file and the target file are at the
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
292 // same chromosome level.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
293 if (rLine.chrom().equals(tLine.chrom())) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
294 chromAlreadyReached = true;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
295 // Target not exceeded yet.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
296 if (!readExceededTarget(rLine, tLine)) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
297 // Increment "on target" and "overlapping" values.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
298 incOnTargetValues(rLine, tLine);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
299 incOverlapsValues(rLine, tLine, bases);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
300 } else {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
301 // Compute the average coverage of the current target
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
302 // and update the target hits values at the 5 different
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
303 // levels.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
304 curAvCov = updateTargetHitsValues(bases);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
305 // Write the coverage data for the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
306 writeCoverageLines(currTargetBases, curAvCov, tLine
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
307 + "", bases, currTargetGene);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
308 // Break the while-loop, since all next reads belong
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
309 // to next targets.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
310 break;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
311 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
312 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
313 // otherwise
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
314 else {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
315 // alignment took already place beforehand.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
316 if (!covWritten && chromAlreadyReached) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
317 curAvCov = updateTargetHitsValues(bases);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
318 writeCoverageLines(currTargetBases, curAvCov, tLine
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
319 + "", bases, currTargetGene);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
320 break;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
321 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
322 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
323 // If exists, get the next read for the next target and
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
324 // increment the number of reads by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
325 if (rScanner.hasNextLine()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
326 rLine = new ReducedReadLine(rScanner.nextLine());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
327 numReads++;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
328 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
329 // otherwise (no reads anymore).
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
330 else {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
331 endReads = true;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
332 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
333 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
334 // Reads completely processed but last coverage line not written.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
335 if (endReads && covWritten == false) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
336 curAvCov = updateTargetHitsValues(bases);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
337 writeCoverageLines(currTargetBases, curAvCov, tLine + "",
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
338 bases, currTargetGene);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
339 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
340 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
341 // Count the remaining reads.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
342 while (rScanner.hasNextLine()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
343 rScanner.nextLine();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
344 numReads++;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
345 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
346 targetCSWriter.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
347 System.out.println("Target BED coverage file created ("+targetCS+")");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
348 System.out.println("Per-base coverage file created ("+details+")");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
349
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
350 // Self explanatory.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
351 double sd = Math.sqrt(var(targetBases, avCov));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
352 writeSummary(numReads, targetBases, avCov, sd);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
353
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
354 System.out.println("Summary statistics file created ("+summary+")");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
355 finit();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
356 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
357
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
358 /* ***********************************************************************
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
359 * AUXILIARY METHODS
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
360 * ***********************************************************************/
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
361 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
362 * Updates the hits values at 5 different levels (1x, 5x, 10x, 20x, 30x) and
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
363 * the average target coverage.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
364 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
365 * @param bases the hits on the bases of the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
366 * @return the average coverage of the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
367 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
368 private double updateTargetHitsValues(int[] bases) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
369 double curAvCov = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
370 for(int i = 0; i < bases.length; i++) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
371 curAvCov += bases[i];
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
372 hc.updateCurrentHits(bases[i]);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
373 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
374 hc.updateHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
375 avCov += curAvCov;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
376 return curAvCov;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
377 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
378
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
379 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
380 * Writes the summary file containing overall statistics for the observed
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
381 * sample.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
382 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
383 * @param numReads the total number of reads.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
384 * @param targetBases the total number of target bases.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
385 * @param avCov the average target coverage.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
386 * @param sd the standard deviation of the target coverage.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
387 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
388 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
389 private void writeSummary(int numReads, int targetBases, double avCov,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
390 double sd){
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
391
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
392 int x1 = hc.getNextLevelHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
393 int x5 = hc.getNextLevelHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
394 int x10 = hc.getNextLevelHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
395 int x20 = hc.getNextLevelHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
396 int x30 = hc.getNextLevelHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
397
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
398 XMLSummaryFileBuilder xbf =
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
399 new XMLSummaryFileBuilder(summary, prefix);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
400 Element[] tags = xbf.getLeafs();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
401 tags[SAMPLE_NUMBER].setText("1");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
402 tags[SAMPLE_NAME].setText(prefix);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
403 tags[READ_LENGTH].setText(rlength+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
404 tags[NUMBER_OF_READS].setText(numReads+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
405 tags[TARGET_BASES].setText(targetBases+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
406 int d = 2;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
407 tags[AVERAGE_COVERAGE].setText(dfloor(avCov / targetBases, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
408 tags[STANDARD_ERROR].setText(dfloor(sd, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
409
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
410 tags[ON].setText(rc.on()+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
411 tags[ON_PERC].setText(dfloor((rc.on()/(double)numReads)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
412 tags[ON_100].setText(rc.on100()+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
413 tags[ON100P].setText(dfloor((rc.on100()/(double)numReads)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
414 tags[ON_200].setText(rc.on200()+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
415 tags[ON200P].setText(dfloor((rc.on200()/(double)numReads)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
416
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
417 tags[OV].setText(rc.over()+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
418 tags[OV_PERC].setText(dfloor((rc.over()/(double)numReads)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
419 tags[OV_100].setText(rc.over100()+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
420 tags[OV100P].setText(dfloor((rc.over100()/(double)numReads)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
421 tags[OV_200].setText(rc.over200()+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
422 tags[OV200P].setText(dfloor((rc.over200()/(double)numReads)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
423
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
424 tags[from1xBASES].setText(x1+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
425 tags[from1xPERC].setText(dfloor((x1/(double)targetBases)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
426 tags[from5xBASES].setText(x5+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
427 tags[from5xPERC].setText(dfloor((x5/(double)targetBases)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
428 tags[from10xBASES].setText(x10+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
429 tags[from10xPERC].setText(dfloor((x10/(double)targetBases)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
430 tags[from20xBASES].setText(x20+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
431 tags[from20xPERC].setText(dfloor((x20/(double)targetBases)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
432 tags[from30xBASES].setText(x30+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
433 tags[from30xPERC].setText(dfloor((x30/(double)targetBases)*100, d)+"");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
434
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
435 xbf.writeXMLFile(summary);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
436 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
437
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
438 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
439 * Computes the variance of the target coverage.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
440 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
441 * @param targetBases the total number of target bases.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
442 * @param avCov the average target coverage.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
443 * @return The variance of the target coverage.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
444 * @throws FileNotFoundException if the coverage BED file can't be found.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
445 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
446 private double var(int targetBases, double avCov)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
447 throws FileNotFoundException {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
448
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
449 Scanner mdScanner = new Scanner(new File(targetCS));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
450 int n = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
451 double X = avCov / targetBases;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
452 double sum = 0;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
453
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
454 while (mdScanner.hasNextLine()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
455 String mdLine = mdScanner.nextLine();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
456 String[] mdFields = mdLine.split("\t");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
457 double X_i = Double.parseDouble(mdFields[4]);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
458 sum += (X_i - X) * (X_i - X);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
459 n += 1;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
460 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
461
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
462 sum /= n - 1;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
463 return sum;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
464 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
465
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
466 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
467 * Writes detailed coverage data to the coverage file and a coverage line
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
468 * to the coverage BED file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
469 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
470 * @param currTargetBases number of bases of the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
471 * @param curAvCov the average coverage on the specified target tLine.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
472 * @param tLine the target line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
473 * @param bases the hits on the bases of the target tLine.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
474 * @param currTargetGene the gene overlapping the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
475 * @throws IOException if the writer fails to write to coverage line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
476 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
477 private void writeCoverageLines(int currTargetBases, double curAvCov,
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
478 String tLine, int[] bases, String currTargetGene)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
479 throws IOException {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
480 detailsWriter.write(tLine + "\r\n");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
481 for(int i = 0; i < bases.length; i++)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
482 detailsWriter.write(bases[i] + "\n");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
483 targetCSWriter.write(tLine + "\t" + currTargetGene + "\t"
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
484 + dfloor((curAvCov / (double)currTargetBases), 2) + "\t"
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
485 + dfloor((hc.getNextLevelCurrentHits() / (double)currTargetBases)*100, 2)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
486 + "\t"
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
487 + dfloor((hc.getNextLevelCurrentHits() / (double)currTargetBases)*100, 2)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
488 + "\t"
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
489 + dfloor((hc.getNextLevelCurrentHits() / (double)currTargetBases)*100, 2)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
490 + "\t"
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
491 + dfloor((hc.getNextLevelCurrentHits() / (double)currTargetBases)*100, 2)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
492 + "\t"
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
493 + dfloor((hc.getNextLevelCurrentHits() / (double)currTargetBases)*100, 2)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
494 + "\r\n");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
495 covWritten = true;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
496 hc.resetCurrentHits();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
497 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
498
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
499 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
500 * Cut decimal places greater than the specified one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
501 * @param number the number to be cut.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
502 * @param decimalPlace the decimalPlace after which the cut-operation
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
503 * take place.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
504 * @return the cut number.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
505 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
506 public static double dfloor(double number, int decimalPlace) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
507 return Math.floor(number * Math.pow(10, decimalPlace))
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
508 / Math.pow(10, decimalPlace);
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
509 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
510
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
511 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
512 * Checks if the read exceeded the target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
513 * @param rl read line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
514 * @param tl target line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
515 * @return true if the read starts right to the end position of the target
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
516 * and false otherwise.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
517 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
518 public static boolean readExceededTarget(ReducedReadLine rl, TargetLine tl) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
519 return !(rl.start() <= tl.end() || rl.end() <= tl.end());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
520 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
521
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
522 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
523 * <pre>
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
524 * 1. Increments the number of Reads, overlapping targets within a scope of
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
525 * 0, 100 and 200 bases from both ends, by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
526 * 2. Increments the number of hits on the hitted bases by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
527 * </pre>
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
528 * @param rl the read line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
529 * @param tl the target line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
530 * @param bases the base hits array of the target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
531 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
532 private void incOverlapsValues(ReducedReadLine rl, TargetLine tl, int[] bases) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
533 // Forward reads.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
534 if (rl.isForwardRead()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
535 if (rl.end() >= tl.start() && rl.start() <= tl.end()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
536 // incrementing the number of his on the hitted bases by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
537 int start = Math.max(0, rl.start() - tl.start());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
538 int stop = Math.min(bases.length - 1, rl.end() - tl.start());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
539 for (int i = start; i <= stop; i++)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
540 bases[i]++;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
541 // incrementing the number of Reads overlapping targets by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
542 rc.incOver();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
543 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
544 if (tl.start() - 100 <= rl.end() && rl.start() <= tl.end() + 100)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
545 // incrementing the number of Reads overlapping targets(+/-100)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
546 // by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
547 rc.incOver100();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
548 if (tl.start() - 100 <= rl.end() && rl.start() <= tl.end() + 100)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
549 // incrementing the number of Reads overlapping targets(+/-200)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
550 // by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
551 rc.incOver200();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
552 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
553 // Reverse reads.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
554 else {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
555 if (rl.start() >= tl.start() && rl.end() <= tl.end()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
556 int start = Math.max(0, rl.end() - tl.start());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
557 int stop = Math.min(bases.length - 1, rl.start() - tl.start());
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
558 for (int i = start; i <= stop; i++) bases[i]++;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
559 rc.incOver();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
560 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
561 if (rl.start() >= tl.start() - 100 && rl.end() <= tl.end() + 100)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
562 rc.incOver100();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
563 if (rl.start() >= tl.start() - 100 && rl.end() <= tl.end() + 100)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
564 rc.incOver200();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
565 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
566 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
567
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
568 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
569 * Increments the number of Reads on targets within a scope of 0, 100 and
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
570 * 200 bases from both ends, by one.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
571 * @param rl the read line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
572 * @param tl the target line.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
573 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
574 private void incOnTargetValues(ReducedReadLine rl, TargetLine tl) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
575 // Forward read.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
576 if (rl.isForwardRead()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
577 if ((tl.start() <= rl.start()) && (rl.end() <= tl.end()))
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
578 rc.incOn();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
579 if ((tl.start()-100 <= rl.start()) && (rl.end() <= tl.end()+100))
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
580 rc.incOn100();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
581 if ((tl.start()-200 <= rl.start()) && (rl.end() <= tl.end()+200))
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
582 rc.incOn200();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
583 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
584 // Reverse read.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
585 else {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
586 if (tl.start() <= rl.end() && rl.start() <= tl.end())
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
587 rc.incOn();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
588 if (tl.start()-100 <= rl.end() && rl.start()<= tl.end()+100)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
589 rc.incOn100();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
590 if (tl.start()-200 <= rl.end() && rl.start() <= tl.end()+200)
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
591 rc.incOn200();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
592 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
593 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
594
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
595 }