annotate signature.py @ 13:124f404b0fe7 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit bc672cf9d5fe9c1f7eaf02abdc906cf2c1763668
author artbio
date Sat, 25 Feb 2023 09:54:28 +0000
parents 8d3ca9652a5b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
1 import argparse
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
2 from collections import defaultdict
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
3
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
4 import numpy
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
5
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
6 import pysam
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
7
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
8
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
9 def Parser():
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
10 the_parser = argparse.ArgumentParser()
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
11 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
12 '--input', action="store", type=str, help="bam alignment file")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
13 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
14 '--minquery', type=int,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
15 help="Minimum readsize of query reads (nt) - must be an integer")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
16 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
17 '--maxquery', type=int,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
18 help="Maximum readsize of query reads (nt) - must be an integer")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
19 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
20 '--mintarget', type=int,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
21 help="Minimum readsize of target reads (nt) - must be an integer")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
22 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
23 '--maxtarget', type=int,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
24 help="Maximum readsize of target reads (nt) - must be an integer")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
25 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
26 '--minscope', type=int,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
27 help="Minimum overlap analyzed (nt) - must be an integer")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
28 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
29 '--maxscope', type=int,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
30 help="Maximum overlap analyzed (nt) - must be an integer")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
31 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
32 '--output_h', action="store", type=str,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
33 help="h-signature dataframe")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
34 the_parser.add_argument(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
35 '--output_z', action="store", type=str,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
36 help="z-signature dataframe")
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
37 args = the_parser.parse_args()
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
38 return args
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
39
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
40
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
41 class Map:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
42
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
43 def __init__(self, bam_file, minquery=23, maxquery=29, mintarget=23,
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
44 maxtarget=29, minscope=1, maxscope=19, output_h='',
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
45 output_z=''):
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
46 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
47 self.query_range = range(minquery, maxquery + 1)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
48 self.target_range = range(mintarget, maxtarget + 1)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
49 self.scope = range(minscope, maxscope + 1)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
50 self.H = open(output_h, 'w')
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
51 self.Z = open(output_z, 'w')
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
52 self.chromosomes = dict(zip(self.bam_object.references,
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
53 self.bam_object.lengths))
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
54 self.map_dict = self.create_map(self.bam_object)
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
55 self.query_positions = self.compute_query_positions()
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
56 self.Z.write(self.compute_signature_pairs())
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
57 self.H.write(self.compute_signature_h())
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
58 self.H.close()
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
59 self.Z.close()
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
60
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
61 def create_map(self, bam_object):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
62 '''
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
63 Returns a map_dictionary {(chromosome,read_position,polarity):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
64 [read_length, ...]}
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
65 '''
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
66 map_dictionary = defaultdict(list)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
67 # get empty value for start and end of each chromosome
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
68 for chrom in self.chromosomes:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
69 map_dictionary[(chrom, 1, 'F')] = []
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
70 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
71 for chrom in self.chromosomes:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
72 for read in bam_object.fetch(chrom):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
73 if read.is_reverse:
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
74 map_dictionary[(chrom, read.reference_end,
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
75 'R')].append(read.query_alignment_length)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
76 else:
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
77 map_dictionary[(chrom, read.reference_start+1,
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
78 'F')].append(read.query_alignment_length)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
79 return map_dictionary
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
80
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
81 def compute_query_positions(self):
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
82 ''' this method does not filter on read size, just forward reads
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
83 that overlap reverse reads in the overlap range'''
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
84 all_query_positions = defaultdict(list)
11
8d3ca9652a5b "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 8691f2575cacd71c971338658198a1324e5f9370"
artbio
parents: 7
diff changeset
85 for genomicKey in list(self.map_dict):
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
86 chrom, coord, pol = genomicKey
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
87 for i in self.scope:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
88 if pol == 'F' and len(self.map_dict[chrom,
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
89 coord+i-1,
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
90 'R']) > 0:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
91 all_query_positions[chrom].append(coord)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
92 break
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
93 for chrom in all_query_positions:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
94 all_query_positions[chrom] = sorted(
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
95 list(set(all_query_positions[chrom])))
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
96 return all_query_positions
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
97
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
98 def countpairs(self, uppers, lowers):
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
99 query_range = self.query_range
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
100 target_range = self.target_range
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
101 uppers = [size for size in uppers if size in query_range or size in
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
102 target_range]
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
103 lowers = [size for size in lowers if size in query_range or size in
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
104 target_range]
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
105 paired = []
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
106 for upread in uppers:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
107 for downread in lowers:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
108 if (upread in query_range and downread in target_range) or (
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
109 upread in target_range and downread in query_range):
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
110 paired.append(upread)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
111 lowers.remove(downread)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
112 break
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
113 return len(paired)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
114
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
115 def compute_signature_pairs(self):
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
116 frequency_table = defaultdict(dict)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
117 scope = self.scope
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
118 for chrom in self.chromosomes:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
119 for overlap in scope:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
120 frequency_table[chrom][overlap] = 0
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
121 for chrom in self.query_positions:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
122 for coord in self.query_positions[chrom]:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
123 for overlap in scope:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
124 uppers = self.map_dict[chrom, coord, 'F']
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
125 lowers = self.map_dict[chrom, coord+overlap-1, 'R']
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
126 frequency_table[chrom][overlap] += self.countpairs(uppers,
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
127 lowers)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
128 # compute overlaps for all chromosomes merged
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
129 for overlap in scope:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
130 accumulator = []
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
131 for chrom in frequency_table:
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
132 if chrom != 'all_chromosomes':
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
133 accumulator.append(frequency_table[chrom][overlap])
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
134 frequency_table['all_chromosomes'][overlap] = sum(accumulator)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
135 return self.stringify_table(frequency_table)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
136
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
137 def signature_tables(self):
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
138 query_range = self.query_range
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
139 target_range = self.target_range
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
140 Query_table = defaultdict(dict)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
141 Target_table = defaultdict(dict)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
142 for key in self.map_dict:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
143 for size in self.map_dict[key]:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
144 if size in query_range or size in target_range:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
145 if key[2] == 'F':
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
146 coordinate = key[1]
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
147 else:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
148 coordinate = -key[1]
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
149 if size in query_range:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
150 Query_table[key[0]][coordinate] = Query_table[key[0]].get(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
151 coordinate, 0) + 1
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
152 if size in target_range:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
153 Target_table[key[0]][coordinate] = \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
154 Target_table[key[0]].get(coordinate, 0) + 1
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
155 return Query_table, Target_table
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
156
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
157 def compute_signature_h(self):
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
158 scope = self.scope
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
159 Query_table, Target_table = self.signature_tables()
0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
160 frequency_table = defaultdict(dict)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
161 for chrom in self.chromosomes:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
162 for overlap in scope:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
163 frequency_table[chrom][overlap] = 0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
164 for chrom in Query_table:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
165 Total_Query_Numb = 0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
166 for coord in Query_table[chrom]:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
167 Total_Query_Numb += Query_table[chrom][coord]
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
168 for coord in Query_table[chrom]:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
169 local_table = dict([(overlap, 0) for overlap in scope])
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
170 number_of_targets = 0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
171 for overlap in scope:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
172 local_table[overlap] += Query_table[chrom][coord] * \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
173 Target_table[chrom].get(-coord - overlap + 1, 0)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
174 number_of_targets += Target_table[chrom].get(
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
175 -coord - overlap + 1, 0)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
176 for overlap in scope:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
177 try:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
178 frequency_table[chrom][overlap] += \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
179 local_table[overlap] / number_of_targets \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
180 / float(Total_Query_Numb)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
181 except ZeroDivisionError:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
182 continue
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
183 # compute overlap probabilities for all chromosomes merged
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
184 general_frequency_table = dict([(overlap, 0) for overlap in scope])
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
185 total_aligned_reads = 0
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
186 for chrom in frequency_table:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
187 for overlap in frequency_table[chrom]:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
188 total_aligned_reads += self.bam_object.count(chrom)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
189 for chrom in frequency_table:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
190 for overlap in frequency_table[chrom]:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
191 try:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
192 general_frequency_table[overlap] += \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
193 frequency_table[chrom][overlap] / total_aligned_reads \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
194 * self.bam_object.count(chrom)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
195 except ZeroDivisionError:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
196 continue
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
197 for overlap in general_frequency_table:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
198 frequency_table['all_chromosomes'][overlap] = \
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
199 general_frequency_table[overlap]
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
200 return self.stringify_table(frequency_table)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
201
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
202 def stringify_table(self, frequency_table):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
203 '''
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
204 method both to compute z-score and to return a writable string
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
205 '''
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
206 tablestring = []
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
207 for chrom in sorted(frequency_table):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
208 accumulator = []
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
209 for overlap in frequency_table[chrom]:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
210 accumulator.append(frequency_table[chrom][overlap])
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
211 z_mean = numpy.mean(accumulator)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
212 z_std = numpy.std(accumulator)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
213 if z_std == 0:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
214 for overlap in sorted(frequency_table[chrom]):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
215 tablestring.append('%s\t%s\t%s\t%s\n' % (
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
216 chrom, str(overlap),
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
217 str(frequency_table[chrom][overlap]), str(0)))
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
218 else:
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
219 for overlap in sorted(frequency_table[chrom]):
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
220 tablestring.append('%s\t%s\t%s\t%s\n' % (
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
221 chrom, str(overlap),
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
222 str(frequency_table[chrom][overlap]),
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
223 str((frequency_table[chrom][overlap] - z_mean)/z_std)))
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
224 return ''.join(tablestring)
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
225
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
226
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
227 if __name__ == "__main__":
a35e6f9c1d34 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff changeset
228 args = Parser()
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
229 mapobj = Map(args.input, args.minquery, args.maxquery, args.mintarget,
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
230 args.maxtarget, args.minscope, args.maxscope, args.output_h,
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 0
diff changeset
231 args.output_z)