annotate overlapping_reads.py @ 11:8d3ca9652a5b draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 8691f2575cacd71c971338658198a1324e5f9370"
author artbio
date Sat, 23 Oct 2021 22:55:19 +0000
parents 07771982ef9b
children 124f404b0fe7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
1 import argparse
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
2 from collections import defaultdict
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
3
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
4 import pysam
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
5
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
6
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
7 def Parser():
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
8 the_parser = argparse.ArgumentParser()
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
9 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
10 '--input', action="store", type=str, help="bam alignment file")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
11 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
12 '--minquery', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
13 help="Minimum readsize of query reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
14 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
15 '--maxquery', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
16 help="Maximum readsize of query reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
17 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
18 '--mintarget', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
19 help="Minimum readsize of target reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
20 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
21 '--maxtarget', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
22 help="Maximum readsize of target reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
23 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
24 '--overlap', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
25 help="Overlap analyzed (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
26 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
27 '--output', action="store", type=str,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
28 help="Pairable sequences")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
29 args = the_parser.parse_args()
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
30 return args
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
31
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
32
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
33 class Map:
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
34
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
35 def __init__(self, bam_file, output, minquery=23, maxquery=29,
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
36 mintarget=23, maxtarget=29, overlap=10):
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
37 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
38 self.output = output
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
39 self.query_range = range(minquery, maxquery + 1)
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
40 self.target_range = range(mintarget, maxtarget + 1)
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
41 self.overlap = overlap
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
42 self.chromosomes = dict(zip(self.bam_object.references,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
43 self.bam_object.lengths))
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
44 self.alignement_dic = self.index_alignments(self.bam_object)
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
45 self.all_query_positions = self.query_positions(self.bam_object,
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
46 overlap=self.overlap)
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
47 self.readdic = self.make_readdic(self.bam_object)
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
48 self.pairing()
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
49
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
50 def make_readdic(self, bam_object):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
51 readdic = defaultdict(int)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
52 for read in bam_object.fetch():
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
53 readdic[read.query_sequence] += 1
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
54 return readdic
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
55
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
56 def index_alignments(self, bam_object):
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
57 '''
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
58 dic[(chrom, pos, polarity)]: [readseq1, readseq2, ...]
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
59 the list value is further converted in set
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
60 '''
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
61 dic = defaultdict(list)
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
62 for chrom in self.chromosomes:
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
63 for read in bam_object.fetch(chrom):
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
64 if read.is_reverse:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
65 coord = read.reference_end-1
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
66 pol = 'R'
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
67 else:
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
68 coord = read.reference_start
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
69 pol = 'F'
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
70 dic[(chrom, coord, pol)].append(read.query_sequence)
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
71 for key in dic:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
72 dic[key] = set(dic[key])
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
73 return dic
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
74
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
75 def query_positions(self, bam_object, overlap):
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
76 all_query_positions = defaultdict(list)
11
8d3ca9652a5b "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 8691f2575cacd71c971338658198a1324e5f9370"
artbio
parents: 7
diff changeset
77 for genomicKey in list(self.alignement_dic):
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
78 chrom, coord, pol = genomicKey
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
79 if pol == 'F' and len(self.alignement_dic[(chrom,
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
80 coord+overlap-1,
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
81 'R')]) > 0:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
82 all_query_positions[chrom].append(coord)
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
83 for chrom in all_query_positions:
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
84 all_query_positions[chrom] = sorted(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
85 list(set(all_query_positions[chrom])))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
86 return all_query_positions
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
87
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
88 def countpairs(self, uppers, lowers):
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
89 query_range = self.query_range
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
90 target_range = self.target_range
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
91 uppers = [seq for seq in uppers if (len(seq) in query_range or len(seq)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
92 in target_range)]
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
93 print(uppers)
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
94 uppers_expanded = []
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
95 for seq in uppers:
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
96 expand = [seq for i in range(self.readdic[seq])]
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
97 uppers_expanded.extend(expand)
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
98 print(uppers_expanded)
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
99 uppers = uppers_expanded
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
100 lowers = [seq for seq in lowers if (len(seq) in query_range or len(seq)
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
101 in target_range)]
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
102 lowers_expanded = []
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
103 for seq in lowers:
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
104 expand = [seq for i in range(self.readdic[seq])]
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
105 lowers_expanded.extend(expand)
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
106 lowers = lowers_expanded
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
107 paired = []
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
108 for upread in uppers:
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
109 for downread in lowers:
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
110 if (len(upread) in query_range and len(downread) in
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
111 target_range) or (len(upread) in target_range and
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
112 len(downread) in query_range):
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
113 paired.append(upread)
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
114 lowers.remove(downread)
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
115 break
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
116 return len(paired)
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
117
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
118 def pairing(self):
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
119 F = open(self.output, 'w')
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
120 query_range = self.query_range
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
121 target_range = self.target_range
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
122 overlap = self.overlap
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
123 stringresult = []
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
124 header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n'
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
125 total_pairs = 0
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
126 print('Chromosome\tNbre of pairs')
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
127 for chrom in sorted(self.chromosomes):
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
128 number_pairs = 0
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
129 for pos in self.all_query_positions[chrom]:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
130 stringbuffer = []
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
131 uppers = self.alignement_dic[chrom, pos, 'F']
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
132 lowers = self.alignement_dic[chrom, pos+overlap-1, 'R']
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
133 number_pairs += self.countpairs(uppers, lowers)
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
134 total_pairs += number_pairs
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
135 if uppers and lowers:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
136 for upread in uppers:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
137 for downread in lowers:
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
138 if (len(upread) in query_range and len(downread) in
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
139 target_range) or (len(upread) in target_range
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
140 and len(downread) in
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
141 query_range):
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
142 stringbuffer.append(
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
143 header_template %
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
144 (chrom, pos+1, '+', len(upread),
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
145 self.readdic[upread], upread))
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
146 stringbuffer.append(
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
147 header_template %
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
148 (chrom, pos+overlap-len(downread)+1, '-',
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
149 len(downread), self.readdic[downread],
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
150 self.revcomp(downread)))
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
151 stringresult.extend(sorted(set(stringbuffer)))
6
4da23f009c9e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
artbio
parents: 5
diff changeset
152 print('%s\t%s' % (chrom, number_pairs))
7
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
153 print('Total nbre of pairs that can be simultaneously formed\t%s'
07771982ef9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 7276b6b73aef7af4058ad2c1e34c4557e9cccbe0
artbio
parents: 6
diff changeset
154 % total_pairs)
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
155 F.write(''.join(stringresult))
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
156
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
157 def revcomp(self, sequence):
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
158 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
159 revseq = sequence[::-1]
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
160 return "".join([antidict[i] for i in revseq])
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
161
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
162
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
163 if __name__ == "__main__":
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
164 args = Parser()
5
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
165 mapobj = Map(args.input, args.output, args.minquery, args.maxquery,
a7fd04208764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
artbio
parents: 4
diff changeset
166 args.mintarget, args.maxtarget, args.overlap)