annotate small_rna_maps.py @ 19:f33afecac67a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit ee61e232fbde2e4f9b222607ba928bceaf271289
author artbio
date Thu, 22 Nov 2018 20:05:32 -0500
parents 2c95c899d0a4
children de7fbcb1348c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
1 import argparse
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
2 from collections import defaultdict
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
3
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
4 import numpy
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
5
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
6 import pysam
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
7
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
8
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
9 def Parser():
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
10 the_parser = argparse.ArgumentParser()
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
11 the_parser.add_argument('--inputs', dest='inputs', required=True,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
12 nargs='+', help='list of input BAM files')
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
13 the_parser.add_argument('--minsize', dest='minsize', type=int,
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
14 default=0, help='minimal size of reads')
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
15 the_parser.add_argument('--maxsize', dest='maxsize', type=int,
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
16 default=10000, help='maximal size of reads')
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
17 the_parser.add_argument('--cluster', dest='cluster', type=int,
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
18 default=0, help='clustering distance')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
19 the_parser.add_argument('--sample_names', dest='sample_names',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
20 required=True, nargs='+',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
21 help='list of sample names')
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
22 the_parser.add_argument('--bed', dest='bed', required=False,
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
23 help='Name of bed output must be specified\
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
24 if --cluster option used')
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
25 the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster',
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
26 required=False, type=int, default=0,
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
27 help='Skip clusters of size equal or less than\
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
28 specified integer in the bed output. \
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
29 Default = 1')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
30 the_parser.add_argument('--outputs', nargs='+', action='store',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
31 help='list of two output paths (only two)')
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
32 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
33 help='list of 2 plot methods (only two) among:\
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
34 Counts, Max, Mean, Median, Coverage and Size')
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
35 the_parser.add_argument('--nostrand', action='store_true',
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
36 help='Consider reads regardless their polarity')
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
37
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
38 args = the_parser.parse_args()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
39 return args
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
40
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
41
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
42 class Map:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
43
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
44 def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
45 self.sample_name = sample
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
46 self.minsize = minsize
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
47 self.maxsize = maxsize
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
48 self.cluster = cluster
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
49 if not nostrand:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
50 self.nostrand = False
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
51 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
52 self.nostrand = True
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
53 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
54 self.chromosomes = dict(zip(self.bam_object.references,
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
55 self.bam_object.lengths))
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
56 self.map_dict = self.create_map(self.bam_object, self.minsize,
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
57 self.maxsize, self.nostrand)
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
58 if self.cluster:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
59 self.map_dict = self.tile_map(self.map_dict, self.cluster)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
60
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
61 def create_map(self, bam_object, minsize, maxsize, nostrand=False):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
62 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
63 Returns a map_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
64 [read_length, ...]}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
65 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
66 map_dictionary = defaultdict(list)
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
67 for chrom in self.chromosomes:
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
68 # get empty value for start and end of each chromosome
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
69 map_dictionary[(chrom, 1, 'F')] = []
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
70 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
71 if not nostrand:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
72 for read in bam_object.fetch(chrom):
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
73 positions = read.positions # a list of covered positions
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
74 if read.is_reverse:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
75 map_dictionary[(chrom, positions[-1]+1, 'R')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
76 read.query_alignment_length)
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
77 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
78 map_dictionary[(chrom, positions[0]+1, 'F')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
79 read.query_alignment_length)
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
80 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
81 for read in bam_object.fetch(chrom):
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
82 positions = read.positions # a list of covered positions
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
83 if read.is_reverse:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
84 map_dictionary[(chrom, positions[-1]+1, 'F')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
85 read.query_alignment_length)
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
86 else:
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
87 map_dictionary[(chrom, positions[0]+1, 'F')].append(
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
88 read.query_alignment_length)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
89 return map_dictionary
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
90
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
91 def grouper(self, iterable, clust_distance):
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
92 prev = None
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
93 group = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
94 for item in iterable:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
95 if not prev or item - prev <= clust_distance:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
96 group.append(item)
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
97 else:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
98 yield group
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
99 group = [item]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
100 prev = item
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
101 if group:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
102 yield group
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
103
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
104 def tile_map(self, map_dic, clust_distance):
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
105 '''
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
106 takes a map_dictionary {(chromosome,read_position,polarity):
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
107 [read_length, ...]}
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
108 and returns a map_dictionary with structure:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
109 {(chromosome,read_position,polarity):
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
110 ([read_length, ...], [start_clust, end_clust])}
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
111 '''
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
112 clustered_dic = defaultdict(list)
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
113 for chrom in self.chromosomes:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
114 F_chrom_coord = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
115 R_chrom_coord = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
116 for key in map_dic:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
117 if key[0] == chrom and key[2] == 'F':
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
118 F_chrom_coord.append(key[1])
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
119 elif key[0] == chrom and key[2] == 'R':
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
120 R_chrom_coord.append(key[1])
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
121 F_chrom_coord = list(set(F_chrom_coord))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
122 R_chrom_coord = list(set(R_chrom_coord))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
123 F_chrom_coord.sort()
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
124 R_chrom_coord.sort()
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
125 F_clust_values = [i for i in self.grouper(F_chrom_coord,
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
126 clust_distance)]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
127 F_clust_keys = [(i[-1]+i[0])/2 for i in F_clust_values]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
128 R_clust_values = [i for i in self.grouper(R_chrom_coord,
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
129 clust_distance)]
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
130 R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values]
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
131 # now 2 dictionnaries (F and R) with structure:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
132 # {centered_coordinate: [coord1, coord2, coord3, ..]}
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
133 F_clust_dic = dict(zip(F_clust_keys, F_clust_values))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
134 R_clust_dic = dict(zip(R_clust_keys, R_clust_values))
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
135 for centcoor in F_clust_dic:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
136 accumulator = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
137 for coor in F_clust_dic[centcoor]:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
138 accumulator.extend(map_dic[(chrom, coor, 'F')])
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
139 clustered_dic[(chrom, centcoor, 'F')] = [len(accumulator), [
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
140 F_clust_dic[centcoor][0],
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
141 F_clust_dic[centcoor][-1]]]
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
142 for centcoor in R_clust_dic:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
143 accumulator = []
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
144 for coor in R_clust_dic[centcoor]:
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
145 accumulator.extend(map_dic[(chrom, coor, 'R')])
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
146 clustered_dic[(chrom, centcoor, 'R')] = [len(accumulator), [
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
147 R_clust_dic[centcoor][0],
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
148 R_clust_dic[centcoor][-1]]]
9
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
149 return clustered_dic
3ea75c573429 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 6199193c7fe2cb56403eea8af0b40d44f7311fd5
artbio
parents: 8
diff changeset
150
3
ed8b0142538d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 7b2ceb05489c27ddb769c38fdec56274108a6fa1
artbio
parents: 2
diff changeset
151 def compute_readcount(self, map_dictionary, out):
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
152 '''
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
153 takes a map_dictionary as input and writes
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
154 a readmap_dictionary {(chromosome,read_position,polarity):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
155 number_of_reads}
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
156 in an open file handler out
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
157 '''
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
158 readmap_dictionary = dict()
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
159 for key in map_dictionary:
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
160 readmap_dictionary[key] = len(map_dictionary[key])
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
161 self.write_table(readmap_dictionary, out)
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
162
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
163 def compute_max(self, map_dictionary, out):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
164 '''
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
165 takes a map_dictionary as input and writes
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
166 a max_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
167 max_of_number_of_read_at_any_position}
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
168 Not clear this function is still required
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
169 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
170 merge_keylist = [(i[0], 0) for i in map_dictionary.keys()]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
171 max_dictionary = dict(merge_keylist)
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
172 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
173 if len(map_dictionary[key]) > max_dictionary[key[0]]:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
174 max_dictionary[key[0]] = len(map_dictionary[key])
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
175 self.write_table(max_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
176
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
177 def compute_mean(self, map_dictionary, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
178 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
179 takes a map_dictionary as input and returns
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
180 a mean_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
181 mean_value_of_reads}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
182 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
183 mean_dictionary = dict()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
184 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
185 if len(map_dictionary[key]) == 0:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
186 mean_dictionary[key] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
187 else:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
188 mean_dictionary[key] = round(numpy.mean(map_dictionary[key]),
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
189 1)
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
190 self.write_table(mean_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
191
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
192 def compute_median(self, map_dictionary, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
193 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
194 takes a map_dictionary as input and returns
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
195 a mean_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
196 mean_value_of_reads}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
197 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
198 median_dictionary = dict()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
199 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
200 if len(map_dictionary[key]) == 0:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
201 median_dictionary[key] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
202 else:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
203 median_dictionary[key] = numpy.median(map_dictionary[key])
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
204 self.write_table(median_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
205
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
206 def compute_coverage(self, map_dictionary, out, quality=15):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
207 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
208 takes a map_dictionary as input and returns
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
209 a coverage_dictionary {(chromosome,read_position,polarity):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
210 coverage}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
211 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
212 coverage_dictionary = dict()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
213 for chrom in self.chromosomes:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
214 coverage_dictionary[(chrom, 1, 'F')] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
215 coverage_dictionary[(chrom, self.chromosomes[chrom], 'F')] = 0
4
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
216 for read in self.bam_object.fetch(chrom):
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
217 positions = read.positions # a list of covered positions
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
218 for pos in positions:
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
219 if not map_dictionary[(chrom, pos+1, 'F')]:
a6b9a081064b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b58cb36616bf351278e57ec1949e9ebf3c3cdff1
artbio
parents: 3
diff changeset
220 map_dictionary[(chrom, pos+1, 'F')] = []
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
221 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
222 coverage = self.bam_object.count_coverage(
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
223 reference=key[0],
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
224 start=key[1]-1,
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
225 end=key[1],
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
226 quality_threshold=quality)
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
227 """ Add the 4 coverage values """
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
228 coverage = [sum(x) for x in zip(*coverage)]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
229 coverage_dictionary[key] = coverage[0]
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
230 self.write_table(coverage_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
231
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
232 def compute_size(self, map_dictionary, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
233 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
234 Takes a map_dictionary and returns a dictionary of sizes:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
235 {chrom: {polarity: {size: nbre of reads}}}
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
236 '''
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
237 size_dictionary = defaultdict(lambda: defaultdict(
1
40972a8dfab9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit ad60e6655aabe30246043d95f14646b2527c9255
artbio
parents: 0
diff changeset
238 lambda: defaultdict(int)))
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
239 # to track empty chromosomes
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
240 for chrom in self.chromosomes:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
241 if self.bam_object.count(chrom) == 0:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
242 size_dictionary[chrom]['F'][10] = 0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
243 for key in map_dictionary:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
244 for size in map_dictionary[key]:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
245 size_dictionary[key[0]][key[2]][size] += 1
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
246 self.write_size_table(size_dictionary, out)
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
247
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
248 def write_table(self, mapdict, out):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
249 '''
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
250 Writer of a tabular file
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
251 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
252 <some mapped value>
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
253 out is an *open* file handler
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
254 '''
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
255 for key in sorted(mapdict):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
256 line = [self.sample_name, key[0], self.chromosomes[key[0]],
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
257 key[1], key[2], mapdict[key]]
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
258 line = [str(i) for i in line]
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
259 out.write('\t'.join(line) + '\n')
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
260
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
261 def write_size_table(self, sizedic, out):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
262 '''
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
263 Writer of a tabular file
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
264 Dataset, Chromosome, Chrom_length, <category (size)>, <some value>
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
265 out is an *open* file handler
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
266 '''
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
267 for chrom in sorted(sizedic):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
268 sizes = sizedic[chrom]['F'].keys()
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
269 sizes.extend(sizedic[chrom]['R'].keys())
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
270 for polarity in sorted(sizedic[chrom]):
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
271 for size in range(min(sizes), max(sizes)+1):
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
272 try:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
273 line = [self.sample_name, chrom, polarity, size,
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
274 sizedic[chrom][polarity][size]]
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
275 except KeyError:
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
276 line = [self.sample_name, chrom, polarity, size, 0]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
277 line = [str(i) for i in line]
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
278 out.write('\t'.join(line) + '\n')
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
279
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
280 def write_cluster_table(self, clustered_dic, out, bedpath, skip):
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
281 '''
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
282 Writer of a tabular file
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
283 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
284 <some mapped value>
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
285 out is an *open* file handler
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
286 bed is an a file handler internal to the function
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
287 '''
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
288 bed = open(bedpath, 'w')
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
289 for key in sorted(clustered_dic):
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
290 start = clustered_dic[key][1][0]
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
291 end = clustered_dic[key][1][1]
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
292 size = end - start + 1
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
293 if self.nostrand:
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
294 polarity = '.'
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
295 elif key[2] == 'F':
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
296 polarity = '+'
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
297 else:
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
298 polarity = '-'
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
299 density = float(clustered_dic[key][0]) / size
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
300 line = [self.sample_name, key[0], self.chromosomes[key[0]],
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
301 key[1], key[2], clustered_dic[key][0],
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
302 str(start) + "-" + str(end), str(size), str(density)]
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
303 line = [str(i) for i in line]
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
304 if size > skip:
19
f33afecac67a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit ee61e232fbde2e4f9b222607ba928bceaf271289
artbio
parents: 18
diff changeset
305 bedline = [key[0], str(start-1), str(end), 'cluster',
f33afecac67a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit ee61e232fbde2e4f9b222607ba928bceaf271289
artbio
parents: 18
diff changeset
306 str(clustered_dic[key][0]), polarity]
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
307 bed.write('\t'.join(bedline) + '\n')
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
308 out.write('\t'.join(line) + '\n')
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
309 bed.close()
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
310
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
311
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
312 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
313 nostrand, bedfile=None, bed_skipcluster=0):
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
314 for method, output in zip(methods, outputs):
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
315 out = open(output, 'w')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
316 if method == 'Size':
5
12c14642e6ac planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 24a21619d79d83b38cef7f1a7b858c621e4c8449
artbio
parents: 4
diff changeset
317 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
318 elif cluster:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
319 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
320 "Polarity", method, "Start-End", "Cluster Size",
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
321 "density"]
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
322 else:
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
323 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
324 "Polarity", method]
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
325 out.write('\t'.join(header) + '\n')
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
326 for input, sample in zip(inputs, samples):
17
b28dcd4051e8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
artbio
parents: 16
diff changeset
327 mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand)
3
ed8b0142538d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 7b2ceb05489c27ddb769c38fdec56274108a6fa1
artbio
parents: 2
diff changeset
328 token = {"Counts": mapobj.compute_readcount,
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
329 "Max": mapobj.compute_max,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
330 "Mean": mapobj.compute_mean,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
331 "Median": mapobj.compute_median,
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
332 "Coverage": mapobj.compute_coverage,
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
333 "Size": mapobj.compute_size,
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
334 "cluster": mapobj.write_cluster_table}
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
335 if cluster:
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
336 token["cluster"](mapobj.map_dict, out, bedfile,
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
337 bed_skipcluster)
16
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
338 else:
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
339 token[method](mapobj.map_dict, out)
600e2498bd21 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
artbio
parents: 15
diff changeset
340 out.close()
0
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
341
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
342
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
343 if __name__ == "__main__":
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
344 args = Parser()
6d48150495e3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
345 # if identical sample names
2
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
346 if len(set(args.sample_names)) != len(args.sample_names):
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
347 args.sample_names = [name + '_' + str(i) for
507383cce5a8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit edbb53cb13b52bf8e71c562fa8acc2c3be2fb270
artbio
parents: 1
diff changeset
348 i, name in enumerate(args.sample_names)]
8
1827b74f872b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
artbio
parents: 5
diff changeset
349 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
18
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
350 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed,
2c95c899d0a4 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
artbio
parents: 17
diff changeset
351 args.bed_skipcluster)