comparison ngsutils/support/regions.py @ 2:7a68005de299 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author iuc
date Sun, 27 Nov 2016 15:01:21 -0500
parents 4e4e4093d65d
children
comparison
equal deleted inserted replaced
1:8187a729d9f4 2:7a68005de299
2 ''' 2 '''
3 Simple genomic ranges. You can define chrom:start-end ranges, then ask if a 3 Simple genomic ranges. You can define chrom:start-end ranges, then ask if a
4 particular genomic coordinate maps to any of those ranges. This is less- 4 particular genomic coordinate maps to any of those ranges. This is less-
5 efficient than an R-Tree, but easier to code. 5 efficient than an R-Tree, but easier to code.
6 ''' 6 '''
7
7 def __init__(self, name): 8 def __init__(self, name):
8 self.ranges = {} 9 self.ranges = {}
9 self.name = name 10 self.name = name
10 11
11 def add_range(self, chrom, strand, start, end): 12 def add_range(self, chrom, strand, start, end):
12 if not chrom in self.ranges: 13 if chrom not in self.ranges:
13 self.ranges[chrom] = {} 14 self.ranges[chrom] = {}
14 15
15 bin = start / 100000 16 bin = start / 100000
16 if not bin in self.ranges[chrom]: 17 if bin not in self.ranges[chrom]:
17 self.ranges[chrom][bin] = [] 18 self.ranges[chrom][bin] = []
18 self.ranges[chrom][bin].insert(0, (start, end, strand)) 19 self.ranges[chrom][bin].insert(0, (start, end, strand))
19 20
20 if (end / 100000) != bin: 21 if (end / 100000) != bin:
21 for bin in xrange(bin + 1, (end / 100000) + 1): 22 for bin in xrange(bin + 1, (end / 100000) + 1):
22 if not bin in self.ranges[chrom]: 23 if bin not in self.ranges[chrom]:
23 self.ranges[chrom][bin] = [] 24 self.ranges[chrom][bin] = []
24 self.ranges[chrom][bin].insert(0, (start, end, strand)) 25 self.ranges[chrom][bin].insert(0, (start, end, strand))
25 26
26 def get_tag(self, chrom, strand, pos, ignore_strand=False): 27 def get_tag(self, chrom, strand, pos, ignore_strand=False):
27 ''' 28 '''
28 returns (region, is_reverse_orientation) 29 returns (region, is_reverse_orientation)
29 ''' 30 '''
30 if not chrom in self.ranges: 31 if chrom not in self.ranges:
31 return None, False 32 return None, False
32 bin = pos / 100000 33 bin = pos / 100000
33 if not bin in self.ranges[chrom]: 34 if bin not in self.ranges[chrom]:
34 return None, False 35 return None, False
35 for start, end, r_strand in self.ranges[chrom][bin]: 36 for start, end, r_strand in self.ranges[chrom][bin]:
36 if pos >= start and pos <= end: 37 if pos >= start and pos <= end:
37 if ignore_strand or strand == r_strand: 38 if ignore_strand or strand == r_strand:
38 return self.name, False 39 return self.name, False
52 utr_3 = RangeMatch('utr-3') 53 utr_3 = RangeMatch('utr-3')
53 introns = RangeMatch('intron') 54 introns = RangeMatch('intron')
54 promoters = RangeMatch('promoter') 55 promoters = RangeMatch('promoter')
55 56
56 for gene in gtf.genes: 57 for gene in gtf.genes:
57 if valid_chroms and not gene.chrom in valid_chroms: 58 if valid_chroms and gene.chrom not in valid_chroms:
58 continue 59 continue
59 if gene.strand == '+': 60 if gene.strand == '+':
60 promoters.add_range(gene.chrom, gene.strand, gene.start - 2000, gene.start) 61 promoters.add_range(gene.chrom, gene.strand, gene.start - 2000, gene.start)
61 else: 62 else:
62 promoters.add_range(gene.chrom, gene.strand, gene.end, gene.end + 2000) 63 promoters.add_range(gene.chrom, gene.strand, gene.end, gene.end + 2000)
76 for start, end in transcript.exons: 77 for start, end in transcript.exons:
77 if last_end: 78 if last_end:
78 introns.add_range(gene.chrom, gene.strand, last_end, start) 79 introns.add_range(gene.chrom, gene.strand, last_end, start)
79 exons.add_range(gene.chrom, gene.strand, start, end) 80 exons.add_range(gene.chrom, gene.strand, start, end)
80 last_end = end 81 last_end = end
81
82 82
83 self.regions.append(coding) 83 self.regions.append(coding)
84 self.regions.append(utr_5) 84 self.regions.append(utr_5)
85 self.regions.append(utr_3) 85 self.regions.append(utr_3)
86 self.regions.append(exons) 86 self.regions.append(exons)
104 self.counts['mitochondrial'] = 0 104 self.counts['mitochondrial'] = 0
105 105
106 def add_read(self, read, chrom): 106 def add_read(self, read, chrom):
107 if read.is_unmapped: 107 if read.is_unmapped:
108 return 108 return
109 109
110 if self.only_first_fragment and read.is_paired and not read.is_read1: 110 if self.only_first_fragment and read.is_paired and not read.is_read1:
111 return 111 return
112 112
113 tag = None 113 tag = None
114 is_rev = False 114 is_rev = False