Mercurial > repos > iuc > ngsutils_bam_filter
comparison ngsutils/support/regions.py @ 2:7a68005de299 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author | iuc |
---|---|
date | Sun, 27 Nov 2016 15:01:21 -0500 |
parents | 4e4e4093d65d |
children |
comparison
equal
deleted
inserted
replaced
1:8187a729d9f4 | 2:7a68005de299 |
---|---|
2 ''' | 2 ''' |
3 Simple genomic ranges. You can define chrom:start-end ranges, then ask if a | 3 Simple genomic ranges. You can define chrom:start-end ranges, then ask if a |
4 particular genomic coordinate maps to any of those ranges. This is less- | 4 particular genomic coordinate maps to any of those ranges. This is less- |
5 efficient than an R-Tree, but easier to code. | 5 efficient than an R-Tree, but easier to code. |
6 ''' | 6 ''' |
7 | |
7 def __init__(self, name): | 8 def __init__(self, name): |
8 self.ranges = {} | 9 self.ranges = {} |
9 self.name = name | 10 self.name = name |
10 | 11 |
11 def add_range(self, chrom, strand, start, end): | 12 def add_range(self, chrom, strand, start, end): |
12 if not chrom in self.ranges: | 13 if chrom not in self.ranges: |
13 self.ranges[chrom] = {} | 14 self.ranges[chrom] = {} |
14 | 15 |
15 bin = start / 100000 | 16 bin = start / 100000 |
16 if not bin in self.ranges[chrom]: | 17 if bin not in self.ranges[chrom]: |
17 self.ranges[chrom][bin] = [] | 18 self.ranges[chrom][bin] = [] |
18 self.ranges[chrom][bin].insert(0, (start, end, strand)) | 19 self.ranges[chrom][bin].insert(0, (start, end, strand)) |
19 | 20 |
20 if (end / 100000) != bin: | 21 if (end / 100000) != bin: |
21 for bin in xrange(bin + 1, (end / 100000) + 1): | 22 for bin in xrange(bin + 1, (end / 100000) + 1): |
22 if not bin in self.ranges[chrom]: | 23 if bin not in self.ranges[chrom]: |
23 self.ranges[chrom][bin] = [] | 24 self.ranges[chrom][bin] = [] |
24 self.ranges[chrom][bin].insert(0, (start, end, strand)) | 25 self.ranges[chrom][bin].insert(0, (start, end, strand)) |
25 | 26 |
26 def get_tag(self, chrom, strand, pos, ignore_strand=False): | 27 def get_tag(self, chrom, strand, pos, ignore_strand=False): |
27 ''' | 28 ''' |
28 returns (region, is_reverse_orientation) | 29 returns (region, is_reverse_orientation) |
29 ''' | 30 ''' |
30 if not chrom in self.ranges: | 31 if chrom not in self.ranges: |
31 return None, False | 32 return None, False |
32 bin = pos / 100000 | 33 bin = pos / 100000 |
33 if not bin in self.ranges[chrom]: | 34 if bin not in self.ranges[chrom]: |
34 return None, False | 35 return None, False |
35 for start, end, r_strand in self.ranges[chrom][bin]: | 36 for start, end, r_strand in self.ranges[chrom][bin]: |
36 if pos >= start and pos <= end: | 37 if pos >= start and pos <= end: |
37 if ignore_strand or strand == r_strand: | 38 if ignore_strand or strand == r_strand: |
38 return self.name, False | 39 return self.name, False |
52 utr_3 = RangeMatch('utr-3') | 53 utr_3 = RangeMatch('utr-3') |
53 introns = RangeMatch('intron') | 54 introns = RangeMatch('intron') |
54 promoters = RangeMatch('promoter') | 55 promoters = RangeMatch('promoter') |
55 | 56 |
56 for gene in gtf.genes: | 57 for gene in gtf.genes: |
57 if valid_chroms and not gene.chrom in valid_chroms: | 58 if valid_chroms and gene.chrom not in valid_chroms: |
58 continue | 59 continue |
59 if gene.strand == '+': | 60 if gene.strand == '+': |
60 promoters.add_range(gene.chrom, gene.strand, gene.start - 2000, gene.start) | 61 promoters.add_range(gene.chrom, gene.strand, gene.start - 2000, gene.start) |
61 else: | 62 else: |
62 promoters.add_range(gene.chrom, gene.strand, gene.end, gene.end + 2000) | 63 promoters.add_range(gene.chrom, gene.strand, gene.end, gene.end + 2000) |
76 for start, end in transcript.exons: | 77 for start, end in transcript.exons: |
77 if last_end: | 78 if last_end: |
78 introns.add_range(gene.chrom, gene.strand, last_end, start) | 79 introns.add_range(gene.chrom, gene.strand, last_end, start) |
79 exons.add_range(gene.chrom, gene.strand, start, end) | 80 exons.add_range(gene.chrom, gene.strand, start, end) |
80 last_end = end | 81 last_end = end |
81 | |
82 | 82 |
83 self.regions.append(coding) | 83 self.regions.append(coding) |
84 self.regions.append(utr_5) | 84 self.regions.append(utr_5) |
85 self.regions.append(utr_3) | 85 self.regions.append(utr_3) |
86 self.regions.append(exons) | 86 self.regions.append(exons) |
104 self.counts['mitochondrial'] = 0 | 104 self.counts['mitochondrial'] = 0 |
105 | 105 |
106 def add_read(self, read, chrom): | 106 def add_read(self, read, chrom): |
107 if read.is_unmapped: | 107 if read.is_unmapped: |
108 return | 108 return |
109 | 109 |
110 if self.only_first_fragment and read.is_paired and not read.is_read1: | 110 if self.only_first_fragment and read.is_paired and not read.is_read1: |
111 return | 111 return |
112 | 112 |
113 tag = None | 113 tag = None |
114 is_rev = False | 114 is_rev = False |