annotate small_rna_maps.py @ 25:07aa8f928d4b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
author artbio
date Wed, 10 Apr 2019 06:38:33 -0400
parents e75a10eba0a6
children 376fae7c9f32
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
1 import argparse
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
2 from collections import defaultdict
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
3
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
4 import numpy
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
5
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
6 import pysam
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
7
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
8
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
9 def Parser():
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
10 the_parser = argparse.ArgumentParser()
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
11 the_parser.add_argument('--inputs', dest='inputs', required=True,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
12 nargs='+', help='list of input BAM files')
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
13 the_parser.add_argument('--minsize', dest='minsize', type=int,
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
14 default=0, help='minimal size of reads')
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
15 the_parser.add_argument('--maxsize', dest='maxsize', type=int,
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
16 default=10000, help='maximal size of reads')
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
17 the_parser.add_argument('--cluster', dest='cluster', type=int,
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
18 default=0, help='clustering distance')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
19 the_parser.add_argument('--sample_names', dest='sample_names',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
20 required=True, nargs='+',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
21 help='list of sample names')
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
22 the_parser.add_argument('--bed', dest='bed', required=False,
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
23 help='Name of bed output must be specified\
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
24 if --cluster option used')
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
25 the_parser.add_argument('--bed_skipsize', dest='bed_skipsize',
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
26 required=False, type=int, default=1,
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
27 help='Skip clusters of size equal or less than\
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
28 specified integer in the bed output. \
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
29 Default = 0, not skipping')
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
30 the_parser.add_argument('--bed_skipdensity', dest='bed_skipdensity',
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
31 required=False, type=float, default=0,
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
32 help='Skip clusters of density equal or less than\
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
33 specified float number in the bed output. \
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
34 Default = 0, not skipping')
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
35 the_parser.add_argument('--bed_skipcounts', dest='bed_skipcounts',
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
36 required=False, type=int, default=1,
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
37 help='Skip clusters of size equal or less than\
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
38 specified integer in the bed output. \
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
39 Default = 0, not skipping')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
40 the_parser.add_argument('--outputs', nargs='+', action='store',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
41 help='list of two output paths (only two)')
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
42 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
43 help='list of 2 plot methods (only two) among:\
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
44 Counts, Max, Mean, Median, Coverage and Size')
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
45 the_parser.add_argument('--nostrand', action='store_true',
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
46 help='Consider reads regardless their polarity')
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
47
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
48 args = the_parser.parse_args()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
49 return args
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
50
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
51
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
52 class Map:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
53
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
54 def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
55 self.sample_name = sample
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
56 self.minsize = minsize
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
57 self.maxsize = maxsize
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
58 self.cluster = cluster
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
59 if not nostrand:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
60 self.nostrand = False
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
61 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
62 self.nostrand = True
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
63 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
64 self.chromosomes = dict(zip(self.bam_object.references,
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
65 self.bam_object.lengths))
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
66 self.map_dict = self.create_map(self.bam_object, self.minsize,
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
67 self.maxsize, self.nostrand)
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
68 if self.cluster:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
69 self.map_dict = self.tile_map(self.map_dict, self.cluster)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
70
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
71 def create_map(self, bam_object, minsize, maxsize, nostrand=False):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
72 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
73 Returns a map_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
74 [read_length, ...]}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
75 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
76 map_dictionary = defaultdict(list)
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
77 for chrom in self.chromosomes:
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
78 # get empty value for start and end of each chromosome
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
79 map_dictionary[(chrom, 1, 'F')] = []
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
80 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
81 if not nostrand:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
82 for read in bam_object.fetch(chrom):
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
83 positions = read.positions # a list of covered positions
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
84 if read.is_reverse:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
85 map_dictionary[(chrom, positions[-1]+1, 'R')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
86 read.query_alignment_length)
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
87 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
88 map_dictionary[(chrom, positions[0]+1, 'F')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
89 read.query_alignment_length)
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
90 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
91 for read in bam_object.fetch(chrom):
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
92 positions = read.positions # a list of covered positions
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
93 if read.is_reverse:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
94 map_dictionary[(chrom, positions[-1]+1, 'F')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
95 read.query_alignment_length)
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
96 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
97 map_dictionary[(chrom, positions[0]+1, 'F')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
98 read.query_alignment_length)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
99 return map_dictionary
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
100
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
101 def grouper(self, iterable, clust_distance):
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
102 prev = None
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
103 group = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
104 for item in iterable:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
105 if not prev or item - prev <= clust_distance:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
106 group.append(item)
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
107 else:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
108 yield group
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
109 group = [item]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
110 prev = item
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
111 if group:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
112 yield group
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
113
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
114 def tile_map(self, map_dic, clust_distance):
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
115 '''
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
116 takes a map_dictionary {(chromosome,read_position,polarity):
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
117 [read_length, ...]}
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
118 and returns a map_dictionary with structure:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
119 {(chromosome,read_position,polarity):
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
120 [*counts*, [start_clust, end_clust]]}
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
121 '''
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
122 clustered_dic = defaultdict(list)
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
123 for chrom in self.chromosomes:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
124 F_chrom_coord = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
125 R_chrom_coord = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
126 for key in map_dic:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
127 if key[0] == chrom and key[2] == 'F':
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
128 F_chrom_coord.append(key[1])
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
129 elif key[0] == chrom and key[2] == 'R':
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
130 R_chrom_coord.append(key[1])
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
131 F_chrom_coord = list(set(F_chrom_coord))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
132 R_chrom_coord = list(set(R_chrom_coord))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
133 F_chrom_coord.sort()
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
134 R_chrom_coord.sort()
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
135 F_clust_values = [i for i in self.grouper(F_chrom_coord,
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
136 clust_distance)]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
137 F_clust_keys = [(i[-1]+i[0])/2 for i in F_clust_values]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
138 R_clust_values = [i for i in self.grouper(R_chrom_coord,
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
139 clust_distance)]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
140 R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values]
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
141 # now 2 dictionnaries (F and R) with structure:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
142 # {centered_coordinate: [coord1, coord2, coord3, ..]}
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
143 F_clust_dic = dict(zip(F_clust_keys, F_clust_values))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
144 R_clust_dic = dict(zip(R_clust_keys, R_clust_values))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
145 for centcoor in F_clust_dic:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
146 accumulator = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
147 for coor in F_clust_dic[centcoor]:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
148 accumulator.extend(map_dic[(chrom, coor, 'F')])
24
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
149 '''
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
150 compute the offset of the cluster due to
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
151 size of reads
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
152 '''
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
153 last = sorted(F_clust_dic[centcoor])[-1]
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
154 try:
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
155 margin = max(map_dic[(chrom, last, 'F')]) - 1
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
156 except ValueError:
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
157 margin = 0
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
158 clustered_dic[(chrom, centcoor, 'F')] = [len(accumulator), [
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
159 F_clust_dic[centcoor][0],
24
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
160 F_clust_dic[centcoor][-1] + margin]]
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
161 for centcoor in R_clust_dic:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
162 accumulator = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
163 for coor in R_clust_dic[centcoor]:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
164 accumulator.extend(map_dic[(chrom, coor, 'R')])
24
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
165 '''
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
166 compute the offset of the cluster due to
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
167 size of reads
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
168 '''
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
169 first = sorted(R_clust_dic[centcoor])[0]
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
170 try:
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
171 margin = max(map_dic[(chrom, first, 'R')]) - 1
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
172 except ValueError:
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
173 margin = 0
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
174 clustered_dic[(chrom, centcoor, 'R')] = [len(accumulator), [
24
e75a10eba0a6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 0293b0f9b07302ba0b71e7877be1ea7a7ed04718
artbio
parents: 20
diff changeset
175 R_clust_dic[centcoor][0] - margin,
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
176 R_clust_dic[centcoor][-1]]]
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
177 return clustered_dic
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
178
3
ed8b0142538d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 7b2ceb05489c27ddb769c38fdec56274108a6fa1
artbio
parents: 2
diff changeset
179 def compute_readcount(self, map_dictionary, out):
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
180 '''
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
181 takes a map_dictionary as input and writes
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
182 a readmap_dictionary {(chromosome,read_position,polarity):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
183 number_of_reads}
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
184 in an open file handler out
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
185 '''
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
186 readmap_dictionary = dict()
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
187 for key in map_dictionary:
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
188 readmap_dictionary[key] = len(map_dictionary[key])
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
189 self.write_table(readmap_dictionary, out)
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
190
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
191 def compute_max(self, map_dictionary, out):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
192 '''
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
193 takes a map_dictionary as input and writes
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
194 a max_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
195 max_of_number_of_read_at_any_position}
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
196 Not clear this function is still required
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
197 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
198 merge_keylist = [(i[0], 0) for i in map_dictionary.keys()]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
199 max_dictionary = dict(merge_keylist)
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
200 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
201 if len(map_dictionary[key]) > max_dictionary[key[0]]:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
202 max_dictionary[key[0]] = len(map_dictionary[key])
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
203 self.write_table(max_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
204
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
205 def compute_mean(self, map_dictionary, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
206 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
207 takes a map_dictionary as input and returns
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
208 a mean_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
209 mean_value_of_reads}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
210 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
211 mean_dictionary = dict()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
212 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
213 if len(map_dictionary[key]) == 0:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
214 mean_dictionary[key] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
215 else:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
216 mean_dictionary[key] = round(numpy.mean(map_dictionary[key]),
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
217 1)
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
218 self.write_table(mean_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
219
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
220 def compute_median(self, map_dictionary, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
221 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
222 takes a map_dictionary as input and returns
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
223 a mean_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
224 mean_value_of_reads}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
225 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
226 median_dictionary = dict()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
227 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
228 if len(map_dictionary[key]) == 0:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
229 median_dictionary[key] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
230 else:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
231 median_dictionary[key] = numpy.median(map_dictionary[key])
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
232 self.write_table(median_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
233
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
234 def compute_coverage(self, map_dictionary, out, quality=15):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
235 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
236 takes a map_dictionary as input and returns
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
237 a coverage_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
238 coverage}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
239 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
240 coverage_dictionary = dict()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
241 for chrom in self.chromosomes:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
242 coverage_dictionary[(chrom, 1, 'F')] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
243 coverage_dictionary[(chrom, self.chromosomes[chrom], 'F')] = 0
4
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
244 for read in self.bam_object.fetch(chrom):
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
245 positions = read.positions # a list of covered positions
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
246 for pos in positions:
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
247 if not map_dictionary[(chrom, pos+1, 'F')]:
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
248 map_dictionary[(chrom, pos+1, 'F')] = []
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
249 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
250 coverage = self.bam_object.count_coverage(
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
251 reference=key[0],
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
252 start=key[1]-1,
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
253 end=key[1],
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
254 quality_threshold=quality)
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
255 """ Add the 4 coverage values """
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
256 coverage = [sum(x) for x in zip(*coverage)]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
257 coverage_dictionary[key] = coverage[0]
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
258 self.write_table(coverage_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
259
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
260 def compute_size(self, map_dictionary, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
261 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
262 Takes a map_dictionary and returns a dictionary of sizes:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
263 {chrom: {polarity: {size: nbre of reads}}}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
264 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
265 size_dictionary = defaultdict(lambda: defaultdict(
1
40972a8dfab9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit ad60e6655aabe30246043d95f14646b2527c9255
artbio
parents: 0
diff changeset
266 lambda: defaultdict(int)))
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
267 # to track empty chromosomes
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
268 for chrom in self.chromosomes:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
269 if self.bam_object.count(chrom) == 0:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
270 size_dictionary[chrom]['F'][10] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
271 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
272 for size in map_dictionary[key]:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
273 size_dictionary[key[0]][key[2]][size] += 1
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
274 self.write_size_table(size_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
275
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
276 def write_table(self, mapdict, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
277 '''
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
278 Writer of a tabular file
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
279 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
280 <some mapped value>
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
281 out is an *open* file handler
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
282 '''
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
283 for key in sorted(mapdict):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
284 line = [self.sample_name, key[0], self.chromosomes[key[0]],
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
285 key[1], key[2], mapdict[key]]
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
286 line = [str(i) for i in line]
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
287 out.write('\t'.join(line) + '\n')
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
288
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
289 def write_size_table(self, sizedic, out):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
290 '''
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
291 Writer of a tabular file
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
292 Dataset, Chromosome, Chrom_length, <category (size)>, <some value>
25
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
293 from a dictionary of sizes: {chrom: {polarity: {size: nbre of reads}}}
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
294 out is an *open* file handler
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
295 '''
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
296 for chrom in sorted(sizedic):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
297 sizes = sizedic[chrom]['F'].keys()
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
298 sizes.extend(sizedic[chrom]['R'].keys())
25
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
299 strandness = defaultdict(int)
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
300 sizeness = defaultdict(int)
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
301 for polarity in sizedic[chrom]:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
302 for size in range(min(sizes), max(sizes)+1):
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
303 try:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
304 strandness[polarity] += sizedic[chrom][polarity][size]
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
305 except KeyError:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
306 pass
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
307 sizeness[size] += sizedic[chrom][polarity][size]
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
308 Strandbias = strandness['F'] + strandness['R']
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
309 if Strandbias:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
310 Strandbias = strandness['F'] / float(Strandbias)
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
311 else:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
312 Strandbias = 2
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
313 Mean = numpy.mean(sizeness.values())
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
314 StDev = numpy.std(sizeness.values())
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
315 for size in sizeness:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
316 if StDev:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
317 sizeness[size] = (sizeness[size] - Mean) / StDev
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
318 else:
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
319 sizeness[size] = 0
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
320 for polarity in sorted(sizedic[chrom]):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
321 for size in range(min(sizes), max(sizes)+1):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
322 try:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
323 line = [self.sample_name, chrom, polarity, size,
25
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
324 sizedic[chrom][polarity][size],
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
325 Strandbias, sizeness[size]]
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
326 except KeyError:
25
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
327 line = [self.sample_name, chrom, polarity, size, 0,
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
328 Strandbias, sizeness[size]]
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
329 line = [str(i) for i in line]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
330 out.write('\t'.join(line) + '\n')
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
331
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
332 def write_cluster_table(self, clustered_dic, out, bedpath):
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
333 '''
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
334 Writer of a tabular file
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
335 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
336 <some mapped value>
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
337 out is an *open* file handler
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
338 bed is an a file handler internal to the function
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
339 '''
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
340 def filterCluster(size, count, density):
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
341 if size < args.bed_skipsize:
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
342 return False
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
343 if count < args.bed_skipcounts:
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
344 return False
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
345 if density <= args.bed_skipdensity:
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
346 return False
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
347 return True
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
348 bed = open(bedpath, 'w')
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
349 clusterid = 0
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
350 for key in sorted(clustered_dic):
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
351 start = clustered_dic[key][1][0]
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
352 end = clustered_dic[key][1][1]
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
353 size = end - start + 1
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
354 read_count = clustered_dic[key][0]
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
355 if self.nostrand:
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
356 polarity = '.'
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
357 elif key[2] == 'F':
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
358 polarity = '+'
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
359 else:
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
360 polarity = '-'
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
361 density = float(read_count) / size
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
362 line = [self.sample_name, key[0], self.chromosomes[key[0]],
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
363 key[1], key[2], read_count,
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
364 str(start) + "-" + str(end), str(size), str(density)]
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
365 line = [str(i) for i in line]
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
366 out.write('\t'.join(line) + '\n')
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
367 if filterCluster(size, read_count, density):
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
368 clusterid += 1
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
369 name = 'cluster_' + str(clusterid)
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
370 bedline = [key[0], str(start-1), str(end), name,
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
371 str(read_count), polarity, str(density)]
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
372 bed.write('\t'.join(bedline) + '\n')
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
373 print("number of reported clusters:", clusterid)
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
374 bed.close()
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
375
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
376
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
377 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
378 nostrand, bedfile=None, bed_skipsize=0):
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
379 for method, output in zip(methods, outputs):
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
380 out = open(output, 'w')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
381 if method == 'Size':
25
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
382 header = ["Dataset", "Chromosome", "Polarity", method, "Counts",
07aa8f928d4b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit a4aa17f675a9caaca0859fee210fae6ada74460f
artbio
parents: 24
diff changeset
383 "Strandness", "z-score"]
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
384 elif cluster:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
385 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
386 "Polarity", method, "Start-End", "Cluster Size",
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
387 "density"]
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
388 else:
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
389 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
390 "Polarity", method]
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
391 out.write('\t'.join(header) + '\n')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
392 for input, sample in zip(inputs, samples):
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
393 mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand)
3
ed8b0142538d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 7b2ceb05489c27ddb769c38fdec56274108a6fa1
artbio
parents: 2
diff changeset
394 token = {"Counts": mapobj.compute_readcount,
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
395 "Max": mapobj.compute_max,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
396 "Mean": mapobj.compute_mean,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
397 "Median": mapobj.compute_median,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
398 "Coverage": mapobj.compute_coverage,
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
399 "Size": mapobj.compute_size,
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
400 "cluster": mapobj.write_cluster_table}
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
401 if cluster:
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
402 token["cluster"](mapobj.map_dict, out, bedfile)
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
403 else:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
404 token[method](mapobj.map_dict, out)
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
405 out.close()
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
406
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
407
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
408 if __name__ == "__main__":
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
409 args = Parser()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
410 # if identical sample names
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
411 if len(set(args.sample_names)) != len(args.sample_names):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
412 args.sample_names = [name + '_' + str(i) for
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
413 i, name in enumerate(args.sample_names)]
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
414 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
20
de7fbcb1348c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
artbio
parents: 19
diff changeset
415 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed)