annotate NGSrich_0.5.5/src/middlewares/GeneExtractor.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
1 package middlewares;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
2 import java.io.File;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
3 import java.io.FileNotFoundException;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
4 import java.util.Scanner;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
5
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
6 import datastructures.AVLTree;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
7 import datastructures.AnnotationLine;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
8 import datastructures.TargetLine;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
9
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
10 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
11 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
12 * Generate a AVL-Tree for fast extraction of genes (logarithmic time).
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
13 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
14 * @author Ali Abdallah
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
15 * @version 0.4.5, 14.07.2011
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
16 * @since jdk 1.6.0
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
17 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
18 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
19
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
20 public class GeneExtractor {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
21
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
22 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
23 * The path of the genome annotation file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
24 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
25 String genomeAnnotation;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
26
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
27 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
28 * The avl tree representing the genome annotation file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
29 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
30 AVLTree genesTree;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
31
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
32 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
33 * The scanner scanning the genome file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
34 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
35 Scanner s;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
36
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
37 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
38 * Constructs the avl-tree based on the genome annotation file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
39 * @param genomeAnnotation the genome annotation file.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
40 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
41 public GeneExtractor(String genomeAnnotation) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
42 genesTree = new AVLTree();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
43 this.genomeAnnotation = genomeAnnotation;
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
44 try {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
45 s = new Scanner(new File(genomeAnnotation));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
46 } catch (FileNotFoundException e) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
47 e.printStackTrace();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
48 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
49 while (s.hasNextLine()) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
50 genesTree.insert(new AnnotationLine(s.nextLine()));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
51 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
52 s.close();
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
53 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
54
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
55 /**
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
56 * Search the tree for a gene overlapping the specified target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
57 *
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
58 * @param tl the target line of the current target.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
59 * @return the gene overlapping the specified target, if it exists and
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
60 * "unknown" otherwise.
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
61 */
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
62 public String extractGene(TargetLine tl) {
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
63 AnnotationLine a =
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
64 (AnnotationLine) genesTree.find(new AnnotationLine
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
65 ("*", tl.chrom(), "dummy", tl.start(), tl.end()));
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
66 return ((a != null) ? a.gene() : "unknown");
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
67 }
89ad0a9cca52 Uploaded
pfrommolt
parents:
diff changeset
68 }