Mercurial > repos > artbio > small_rna_signatures
annotate overlapping_reads.py @ 9:59ee49bfb7bb draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 1298e352c2b9b1c40c6be95fb2625fc803f94d64
author | artbio |
---|---|
date | Fri, 26 Apr 2019 09:01:17 -0400 |
parents | 07771982ef9b |
children | 8d3ca9652a5b |
rev | line source |
---|---|
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
1 import argparse |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
2 from collections import defaultdict |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
3 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
4 import pysam |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
5 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
6 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
7 def Parser(): |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
8 the_parser = argparse.ArgumentParser() |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
9 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
10 '--input', action="store", type=str, help="bam alignment file") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
11 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
12 '--minquery', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
13 help="Minimum readsize of query reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
14 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
15 '--maxquery', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
16 help="Maximum readsize of query reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
17 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
18 '--mintarget', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
19 help="Minimum readsize of target reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
20 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
21 '--maxtarget', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
22 help="Maximum readsize of target reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
23 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
24 '--overlap', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
25 help="Overlap analyzed (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
26 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
27 '--output', action="store", type=str, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
28 help="Pairable sequences") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
29 args = the_parser.parse_args() |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
30 return args |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
31 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
32 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
33 class Map: |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
34 |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
35 def __init__(self, bam_file, output, minquery=23, maxquery=29, |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
36 mintarget=23, maxtarget=29, overlap=10): |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
37 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
38 self.output = output |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
39 self.query_range = range(minquery, maxquery + 1) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
40 self.target_range = range(mintarget, maxtarget + 1) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
41 self.overlap = overlap |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
42 self.chromosomes = dict(zip(self.bam_object.references, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
43 self.bam_object.lengths)) |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
44 self.alignement_dic = self.index_alignments(self.bam_object) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
45 self.all_query_positions = self.query_positions(self.bam_object, |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
46 overlap=self.overlap) |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
47 self.readdic = self.make_readdic(self.bam_object) |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
48 self.pairing() |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
49 |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
50 def make_readdic(self, bam_object): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
51 readdic = defaultdict(int) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
52 for read in bam_object.fetch(): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
53 readdic[read.query_sequence] += 1 |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
54 return readdic |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
55 |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
56 def index_alignments(self, bam_object): |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
57 ''' |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
58 dic[(chrom, pos, polarity)]: [readseq1, readseq2, ...] |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
59 the list value is further converted in set |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
60 ''' |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
61 dic = defaultdict(list) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
62 for chrom in self.chromosomes: |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
63 for read in bam_object.fetch(chrom): |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
64 if read.is_reverse: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
65 coord = read.reference_end-1 |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
66 pol = 'R' |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
67 else: |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
68 coord = read.reference_start |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
69 pol = 'F' |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
70 dic[(chrom, coord, pol)].append(read.query_sequence) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
71 for key in dic: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
72 dic[key] = set(dic[key]) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
73 return dic |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
74 |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
75 def query_positions(self, bam_object, overlap): |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
76 all_query_positions = defaultdict(list) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
77 for genomicKey in self.alignement_dic.keys(): |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
78 chrom, coord, pol = genomicKey |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
79 if pol == 'F' and len(self.alignement_dic[(chrom, |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
80 coord+overlap-1, |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
81 'R')]) > 0: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
82 all_query_positions[chrom].append(coord) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
83 for chrom in all_query_positions: |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
84 all_query_positions[chrom] = sorted( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
85 list(set(all_query_positions[chrom]))) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
86 return all_query_positions |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
87 |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
88 def countpairs(self, uppers, lowers): |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
89 query_range = self.query_range |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
90 target_range = self.target_range |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
91 uppers = [seq for seq in uppers if (len(seq) in query_range or len(seq) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
92 in target_range)] |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
93 print(uppers) |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
94 uppers_expanded = [] |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
95 for seq in uppers: |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
96 expand = [seq for i in range(self.readdic[seq])] |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
97 uppers_expanded.extend(expand) |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
98 print(uppers_expanded) |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
99 uppers = uppers_expanded |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
100 lowers = [seq for seq in lowers if (len(seq) in query_range or len(seq) |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
101 in target_range)] |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
102 lowers_expanded = [] |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
103 for seq in lowers: |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
104 expand = [seq for i in range(self.readdic[seq])] |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
105 lowers_expanded.extend(expand) |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
106 lowers = lowers_expanded |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
107 paired = [] |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
108 for upread in uppers: |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
109 for downread in lowers: |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
110 if (len(upread) in query_range and len(downread) in |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
111 target_range) or (len(upread) in target_range and |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
112 len(downread) in query_range): |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
113 paired.append(upread) |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
114 lowers.remove(downread) |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
115 break |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
116 return len(paired) |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
117 |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
118 def pairing(self): |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
119 F = open(self.output, 'w') |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
120 query_range = self.query_range |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
121 target_range = self.target_range |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
122 overlap = self.overlap |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
123 stringresult = [] |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
124 header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n' |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
125 total_pairs = 0 |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
126 print('Chromosome\tNbre of pairs') |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
127 for chrom in sorted(self.chromosomes): |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
128 number_pairs = 0 |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
129 for pos in self.all_query_positions[chrom]: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
130 stringbuffer = [] |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
131 uppers = self.alignement_dic[chrom, pos, 'F'] |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
132 lowers = self.alignement_dic[chrom, pos+overlap-1, 'R'] |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
133 number_pairs += self.countpairs(uppers, lowers) |
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
134 total_pairs += number_pairs |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
135 if uppers and lowers: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
136 for upread in uppers: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
137 for downread in lowers: |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
138 if (len(upread) in query_range and len(downread) in |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
139 target_range) or (len(upread) in target_range |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
140 and len(downread) in |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
141 query_range): |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
142 stringbuffer.append( |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
143 header_template % |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
144 (chrom, pos+1, '+', len(upread), |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
145 self.readdic[upread], upread)) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
146 stringbuffer.append( |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
147 header_template % |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
148 (chrom, pos+overlap-len(downread)+1, '-', |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
149 len(downread), self.readdic[downread], |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
150 self.revcomp(downread))) |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
151 stringresult.extend(sorted(set(stringbuffer))) |
6
4da23f009c9e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents:
5
diff
changeset
|
152 print('%s\t%s' % (chrom, number_pairs)) |
7
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
153 print('Total nbre of pairs that can be simultaneously formed\t%s' |
07771982ef9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents:
6
diff
changeset
|
154 % total_pairs) |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
155 F.write(''.join(stringresult)) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
156 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
157 def revcomp(self, sequence): |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
158 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
159 revseq = sequence[::-1] |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
160 return "".join([antidict[i] for i in revseq]) |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
161 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
162 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
163 if __name__ == "__main__": |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
164 args = Parser() |
5
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
165 mapobj = Map(args.input, args.output, args.minquery, args.maxquery, |
a7fd04208764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents:
4
diff
changeset
|
166 args.mintarget, args.maxtarget, args.overlap) |