Mercurial > repos > artbio > small_rna_maps
comparison small_rna_maps.py @ 26:376fae7c9f32 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 32eba59fa52705ae74fd9fe65f6f55be783bfc74
author | artbio |
---|---|
date | Sun, 14 Apr 2019 14:43:44 -0400 |
parents | 07aa8f928d4b |
children | fe1a9cfaf5c3 |
comparison
equal
deleted
inserted
replaced
25:07aa8f928d4b | 26:376fae7c9f32 |
---|---|
9 def Parser(): | 9 def Parser(): |
10 the_parser = argparse.ArgumentParser() | 10 the_parser = argparse.ArgumentParser() |
11 the_parser.add_argument('--inputs', dest='inputs', required=True, | 11 the_parser.add_argument('--inputs', dest='inputs', required=True, |
12 nargs='+', help='list of input BAM files') | 12 nargs='+', help='list of input BAM files') |
13 the_parser.add_argument('--minsize', dest='minsize', type=int, | 13 the_parser.add_argument('--minsize', dest='minsize', type=int, |
14 default=0, help='minimal size of reads') | 14 default=19, help='minimal size of reads') |
15 the_parser.add_argument('--maxsize', dest='maxsize', type=int, | 15 the_parser.add_argument('--maxsize', dest='maxsize', type=int, |
16 default=10000, help='maximal size of reads') | 16 default=29, help='maximal size of reads') |
17 the_parser.add_argument('--cluster', dest='cluster', type=int, | 17 the_parser.add_argument('--cluster', dest='cluster', type=int, |
18 default=0, help='clustering distance') | 18 default=0, help='clustering distance') |
19 the_parser.add_argument('--sample_names', dest='sample_names', | 19 the_parser.add_argument('--sample_names', dest='sample_names', |
20 required=True, nargs='+', | 20 required=True, nargs='+', |
21 help='list of sample names') | 21 help='list of sample names') |
61 else: | 61 else: |
62 self.nostrand = True | 62 self.nostrand = True |
63 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') | 63 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
64 self.chromosomes = dict(zip(self.bam_object.references, | 64 self.chromosomes = dict(zip(self.bam_object.references, |
65 self.bam_object.lengths)) | 65 self.bam_object.lengths)) |
66 self.map_dict = self.create_map(self.bam_object, self.minsize, | 66 self.map_dict = self.create_map(self.bam_object, self.nostrand) |
67 self.maxsize, self.nostrand) | |
68 if self.cluster: | 67 if self.cluster: |
69 self.map_dict = self.tile_map(self.map_dict, self.cluster) | 68 self.map_dict = self.tile_map(self.map_dict, self.cluster) |
70 | 69 |
71 def create_map(self, bam_object, minsize, maxsize, nostrand=False): | 70 def create_map(self, bam_object, nostrand=False): |
72 ''' | 71 ''' |
73 Returns a map_dictionary {(chromosome,read_position,polarity): | 72 Returns a map_dictionary {(chromosome,read_position,polarity): |
74 [read_length, ...]} | 73 [read_length, ...]} |
75 ''' | 74 ''' |
76 map_dictionary = defaultdict(list) | 75 map_dictionary = defaultdict(list) |
292 Dataset, Chromosome, Chrom_length, <category (size)>, <some value> | 291 Dataset, Chromosome, Chrom_length, <category (size)>, <some value> |
293 from a dictionary of sizes: {chrom: {polarity: {size: nbre of reads}}} | 292 from a dictionary of sizes: {chrom: {polarity: {size: nbre of reads}}} |
294 out is an *open* file handler | 293 out is an *open* file handler |
295 ''' | 294 ''' |
296 for chrom in sorted(sizedic): | 295 for chrom in sorted(sizedic): |
297 sizes = sizedic[chrom]['F'].keys() | 296 sizes = range(self.minsize, self.maxsize+1) |
298 sizes.extend(sizedic[chrom]['R'].keys()) | |
299 strandness = defaultdict(int) | 297 strandness = defaultdict(int) |
300 sizeness = defaultdict(int) | 298 sizeness = defaultdict(int) |
301 for polarity in sizedic[chrom]: | 299 for polarity in sizedic[chrom]: |
302 for size in range(min(sizes), max(sizes)+1): | 300 for size in sizes: |
303 try: | 301 strandness[polarity] += sizedic[chrom][polarity][size] |
304 strandness[polarity] += sizedic[chrom][polarity][size] | |
305 except KeyError: | |
306 pass | |
307 sizeness[size] += sizedic[chrom][polarity][size] | 302 sizeness[size] += sizedic[chrom][polarity][size] |
308 Strandbias = strandness['F'] + strandness['R'] | 303 Strandbias = strandness['F'] + strandness['R'] |
309 if Strandbias: | 304 if Strandbias: |
310 Strandbias = strandness['F'] / float(Strandbias) | 305 Strandbias = strandness['F'] / float(Strandbias) |
311 else: | 306 else: |
316 if StDev: | 311 if StDev: |
317 sizeness[size] = (sizeness[size] - Mean) / StDev | 312 sizeness[size] = (sizeness[size] - Mean) / StDev |
318 else: | 313 else: |
319 sizeness[size] = 0 | 314 sizeness[size] = 0 |
320 for polarity in sorted(sizedic[chrom]): | 315 for polarity in sorted(sizedic[chrom]): |
321 for size in range(min(sizes), max(sizes)+1): | 316 for size in sizes: |
322 try: | 317 try: |
323 line = [self.sample_name, chrom, polarity, size, | 318 line = [self.sample_name, chrom, polarity, size, |
324 sizedic[chrom][polarity][size], | 319 sizedic[chrom][polarity][size], |
325 Strandbias, sizeness[size]] | 320 Strandbias, sizeness[size]] |
326 except KeyError: | 321 except KeyError: |