Mercurial > repos > artbio > small_rna_signatures
annotate signature.py @ 12:aa5e2c64dff8 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6806c0677e53d52164707faeb36947987f5c500a
author | artbio |
---|---|
date | Sat, 22 Oct 2022 23:49:52 +0000 |
parents | 8d3ca9652a5b |
children |
rev | line source |
---|---|
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
1 import argparse |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
2 from collections import defaultdict |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
3 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
4 import numpy |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
5 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
6 import pysam |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
7 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
8 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
9 def Parser(): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
11 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
12 '--input', action="store", type=str, help="bam alignment file") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
13 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
14 '--minquery', type=int, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
15 help="Minimum readsize of query reads (nt) - must be an integer") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
16 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
17 '--maxquery', type=int, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
18 help="Maximum readsize of query reads (nt) - must be an integer") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
19 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
20 '--mintarget', type=int, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
21 help="Minimum readsize of target reads (nt) - must be an integer") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
22 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
23 '--maxtarget', type=int, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
24 help="Maximum readsize of target reads (nt) - must be an integer") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
25 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
26 '--minscope', type=int, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
27 help="Minimum overlap analyzed (nt) - must be an integer") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
28 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
29 '--maxscope', type=int, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
30 help="Maximum overlap analyzed (nt) - must be an integer") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
31 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
32 '--output_h', action="store", type=str, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
33 help="h-signature dataframe") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
34 the_parser.add_argument( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
35 '--output_z', action="store", type=str, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
36 help="z-signature dataframe") |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
37 args = the_parser.parse_args() |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
38 return args |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
39 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
40 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
41 class Map: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
42 |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
43 def __init__(self, bam_file, minquery=23, maxquery=29, mintarget=23, |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
44 maxtarget=29, minscope=1, maxscope=19, output_h='', |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
45 output_z=''): |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
46 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
47 self.query_range = range(minquery, maxquery + 1) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
48 self.target_range = range(mintarget, maxtarget + 1) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
49 self.scope = range(minscope, maxscope + 1) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
50 self.H = open(output_h, 'w') |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
51 self.Z = open(output_z, 'w') |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
52 self.chromosomes = dict(zip(self.bam_object.references, |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
53 self.bam_object.lengths)) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
54 self.map_dict = self.create_map(self.bam_object) |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
55 self.query_positions = self.compute_query_positions() |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
56 self.Z.write(self.compute_signature_pairs()) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
57 self.H.write(self.compute_signature_h()) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
58 self.H.close() |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
59 self.Z.close() |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
60 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
61 def create_map(self, bam_object): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
62 ''' |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
63 Returns a map_dictionary {(chromosome,read_position,polarity): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
64 [read_length, ...]} |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
65 ''' |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
66 map_dictionary = defaultdict(list) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
67 # get empty value for start and end of each chromosome |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
68 for chrom in self.chromosomes: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
69 map_dictionary[(chrom, 1, 'F')] = [] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
70 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
71 for chrom in self.chromosomes: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
72 for read in bam_object.fetch(chrom): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
73 if read.is_reverse: |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
74 map_dictionary[(chrom, read.reference_end, |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
75 'R')].append(read.query_alignment_length) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
76 else: |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
77 map_dictionary[(chrom, read.reference_start+1, |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
78 'F')].append(read.query_alignment_length) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
79 return map_dictionary |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
80 |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
81 def compute_query_positions(self): |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
82 ''' this method does not filter on read size, just forward reads |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
83 that overlap reverse reads in the overlap range''' |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
84 all_query_positions = defaultdict(list) |
11
8d3ca9652a5b
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 8691f2575cacd71c971338658198a1324e5f9370"
artbio
parents:
7
diff
changeset
|
85 for genomicKey in list(self.map_dict): |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
86 chrom, coord, pol = genomicKey |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
87 for i in self.scope: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
88 if pol == 'F' and len(self.map_dict[chrom, |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
89 coord+i-1, |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
90 'R']) > 0: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
91 all_query_positions[chrom].append(coord) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
92 break |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
93 for chrom in all_query_positions: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
94 all_query_positions[chrom] = sorted( |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
95 list(set(all_query_positions[chrom]))) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
96 return all_query_positions |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
97 |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
98 def countpairs(self, uppers, lowers): |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
99 query_range = self.query_range |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
100 target_range = self.target_range |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
101 uppers = [size for size in uppers if size in query_range or size in |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
102 target_range] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
103 lowers = [size for size in lowers if size in query_range or size in |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
104 target_range] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
105 paired = [] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
106 for upread in uppers: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
107 for downread in lowers: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
108 if (upread in query_range and downread in target_range) or ( |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
109 upread in target_range and downread in query_range): |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
110 paired.append(upread) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
111 lowers.remove(downread) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
112 break |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
113 return len(paired) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
114 |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
115 def compute_signature_pairs(self): |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
116 frequency_table = defaultdict(dict) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
117 scope = self.scope |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
118 for chrom in self.chromosomes: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
119 for overlap in scope: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
120 frequency_table[chrom][overlap] = 0 |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
121 for chrom in self.query_positions: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
122 for coord in self.query_positions[chrom]: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
123 for overlap in scope: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
124 uppers = self.map_dict[chrom, coord, 'F'] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
125 lowers = self.map_dict[chrom, coord+overlap-1, 'R'] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
126 frequency_table[chrom][overlap] += self.countpairs(uppers, |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
127 lowers) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
128 # compute overlaps for all chromosomes merged |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
129 for overlap in scope: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
130 accumulator = [] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
131 for chrom in frequency_table: |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
132 if chrom != 'all_chromosomes': |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
133 accumulator.append(frequency_table[chrom][overlap]) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
134 frequency_table['all_chromosomes'][overlap] = sum(accumulator) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
135 return self.stringify_table(frequency_table) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
136 |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
137 def signature_tables(self): |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
138 query_range = self.query_range |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
139 target_range = self.target_range |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
140 Query_table = defaultdict(dict) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
141 Target_table = defaultdict(dict) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
142 for key in self.map_dict: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
143 for size in self.map_dict[key]: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
144 if size in query_range or size in target_range: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
145 if key[2] == 'F': |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
146 coordinate = key[1] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
147 else: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
148 coordinate = -key[1] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
149 if size in query_range: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
150 Query_table[key[0]][coordinate] = Query_table[key[0]].get( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
151 coordinate, 0) + 1 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
152 if size in target_range: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
153 Target_table[key[0]][coordinate] = \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
154 Target_table[key[0]].get(coordinate, 0) + 1 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
155 return Query_table, Target_table |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
156 |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
157 def compute_signature_h(self): |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
158 scope = self.scope |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
159 Query_table, Target_table = self.signature_tables() |
0
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
160 frequency_table = defaultdict(dict) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
161 for chrom in self.chromosomes: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
162 for overlap in scope: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
163 frequency_table[chrom][overlap] = 0 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
164 for chrom in Query_table: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
165 Total_Query_Numb = 0 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
166 for coord in Query_table[chrom]: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
167 Total_Query_Numb += Query_table[chrom][coord] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
168 for coord in Query_table[chrom]: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
169 local_table = dict([(overlap, 0) for overlap in scope]) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
170 number_of_targets = 0 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
171 for overlap in scope: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
172 local_table[overlap] += Query_table[chrom][coord] * \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
173 Target_table[chrom].get(-coord - overlap + 1, 0) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
174 number_of_targets += Target_table[chrom].get( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
175 -coord - overlap + 1, 0) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
176 for overlap in scope: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
177 try: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
178 frequency_table[chrom][overlap] += \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
179 local_table[overlap] / number_of_targets \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
180 / float(Total_Query_Numb) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
181 except ZeroDivisionError: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
182 continue |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
183 # compute overlap probabilities for all chromosomes merged |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
184 general_frequency_table = dict([(overlap, 0) for overlap in scope]) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
185 total_aligned_reads = 0 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
186 for chrom in frequency_table: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
187 for overlap in frequency_table[chrom]: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
188 total_aligned_reads += self.bam_object.count(chrom) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
189 for chrom in frequency_table: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
190 for overlap in frequency_table[chrom]: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
191 try: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
192 general_frequency_table[overlap] += \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
193 frequency_table[chrom][overlap] / total_aligned_reads \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
194 * self.bam_object.count(chrom) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
195 except ZeroDivisionError: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
196 continue |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
197 for overlap in general_frequency_table: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
198 frequency_table['all_chromosomes'][overlap] = \ |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
199 general_frequency_table[overlap] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
200 return self.stringify_table(frequency_table) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
201 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
202 def stringify_table(self, frequency_table): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
203 ''' |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
204 method both to compute z-score and to return a writable string |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
205 ''' |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
206 tablestring = [] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
207 for chrom in sorted(frequency_table): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
208 accumulator = [] |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
209 for overlap in frequency_table[chrom]: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
210 accumulator.append(frequency_table[chrom][overlap]) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
211 z_mean = numpy.mean(accumulator) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
212 z_std = numpy.std(accumulator) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
213 if z_std == 0: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
214 for overlap in sorted(frequency_table[chrom]): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
215 tablestring.append('%s\t%s\t%s\t%s\n' % ( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
216 chrom, str(overlap), |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
217 str(frequency_table[chrom][overlap]), str(0))) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
218 else: |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
219 for overlap in sorted(frequency_table[chrom]): |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
220 tablestring.append('%s\t%s\t%s\t%s\n' % ( |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
221 chrom, str(overlap), |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
222 str(frequency_table[chrom][overlap]), |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
223 str((frequency_table[chrom][overlap] - z_mean)/z_std))) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
224 return ''.join(tablestring) |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
225 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
226 |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
227 if __name__ == "__main__": |
a35e6f9c1d34
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6719543c5017d581ae012b864d7c9088f0767fc8
artbio
parents:
diff
changeset
|
228 args = Parser() |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
229 mapobj = Map(args.input, args.minquery, args.maxquery, args.mintarget, |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
230 args.maxtarget, args.minscope, args.maxscope, args.output_h, |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
0
diff
changeset
|
231 args.output_z) |