Mercurial > repos > artbio > small_rna_signatures
annotate overlapping_reads.py @ 3:4d9682bd3a6b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
author | artbio |
---|---|
date | Sat, 02 Sep 2017 06:35:15 -0400 |
parents | 6f1378738798 |
children | 20d28cfdeefe |
rev | line source |
---|---|
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
1 import argparse |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
2 from collections import defaultdict |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
3 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
4 import pysam |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
5 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
6 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
7 def Parser(): |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
8 the_parser = argparse.ArgumentParser() |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
9 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
10 '--input', action="store", type=str, help="bam alignment file") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
11 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
12 '--minquery', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
13 help="Minimum readsize of query reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
14 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
15 '--maxquery', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
16 help="Maximum readsize of query reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
17 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
18 '--mintarget', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
19 help="Minimum readsize of target reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
20 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
21 '--maxtarget', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
22 help="Maximum readsize of target reads (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
23 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
24 '--overlap', type=int, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
25 help="Overlap analyzed (nt) - must be an integer") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
26 the_parser.add_argument( |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
27 '--output', action="store", type=str, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
28 help="Pairable sequences") |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
29 args = the_parser.parse_args() |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
30 return args |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
31 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
32 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
33 class Map: |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
34 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
35 def __init__(self, bam_file): |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
36 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
37 self.chromosomes = dict(zip(self.bam_object.references, |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
38 self.bam_object.lengths)) |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
39 self.all_query_positions = self.query_positions(self.bam_object) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
40 self.readdic = self.make_readdic(self.bam_object) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
41 |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
42 def make_readdic(self, bam_object): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
43 readdic = defaultdict(int) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
44 for read in bam_object.fetch(): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
45 readdic[read.query_sequence] += 1 |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
46 return readdic |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
47 |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
48 def query_positions(self, bam_object): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
49 all_query_positions = defaultdict(list) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
50 for chrom in self.chromosomes: |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
51 for read in bam_object.fetch(chrom): |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
52 if not read.is_reverse: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
53 all_query_positions[chrom].append( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
54 read.get_reference_positions(full_length=True)[0]) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
55 else: |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
56 all_query_positions[chrom].append( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
57 read.get_reference_positions(full_length=True)[-1]) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
58 all_query_positions[chrom] = sorted( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
59 list(set(all_query_positions[chrom]))) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
60 return all_query_positions |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
61 |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
62 def direct_pairing(self, minquery, maxquery, mintarget, maxtarget, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
63 file, overlap=10): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
64 F = open(file, 'w') |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
65 query_range = range(minquery, maxquery + 1) |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
66 target_range = range(mintarget, maxtarget + 1) |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
67 stringresult = [] |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
68 for chrom in sorted(self.chromosomes): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
69 for pos in (self.all_query_positions[chrom]): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
70 iterreads_1 = self.bam_object.fetch(chrom, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
71 start=pos, end=pos+overlap-1) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
72 iterreads_2 = self.bam_object.fetch(chrom, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
73 start=pos, end=pos+overlap-1) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
74 iterreads_3 = self.bam_object.fetch(chrom, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
75 start=pos, end=pos+overlap-1) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
76 iterreads_4 = self.bam_object.fetch(chrom, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
77 start=pos, end=pos+overlap-1) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
78 # 1 |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
79 for queryread in iterreads_1: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
80 if queryread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
81 full_length=True)[0] == pos and \ |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
82 queryread.query_alignment_length in query_range \ |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
83 and not queryread.is_reverse: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
84 for targetread in iterreads_2: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
85 if (targetread. |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
86 get_reference_positions(full_length=True)[-1] |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
87 == queryread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
88 full_length=True)[overlap-1] and |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
89 targetread.query_alignment_length in |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
90 target_range and targetread.is_reverse): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
91 targetreadseq = self.revcomp( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
92 targetread.query_sequence) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
93 stringresult.append( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
94 '>%s|%s|%s|%s|n=%s\n%s\n' % |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
95 (chrom, queryread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
96 full_length=True)[0]+1, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
97 'F', queryread.query_alignment_length, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
98 self.readdic[queryread.query_sequence], |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
99 queryread.query_sequence)) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
100 stringresult.append( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
101 '>%s|%s|%s|%s|n=%s\n%s\n' % |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
102 (chrom, targetread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
103 full_length=True)[0]+1, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
104 'R', targetread.query_alignment_length, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
105 self.readdic[targetread.query_sequence], |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
106 targetreadseq)) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
107 # 2 |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
108 for queryread in iterreads_3: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
109 if queryread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
110 full_length=True)[-1] == pos+overlap-1 and \ |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
111 queryread.query_alignment_length in query_range \ |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
112 and queryread.is_reverse: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
113 for targetread in iterreads_4: |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
114 if (targetread. |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
115 get_reference_positions(full_length=True)[0] |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
116 == pos and targetread.query_alignment_length |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
117 in target_range and not |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
118 targetread.is_reverse): |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
119 queryreadseq = self.revcomp( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
120 queryread.query_sequence) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
121 targetreadseq = targetread.query_sequence |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
122 stringresult.append( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
123 '>%s|%s|%s|%s|n=%s\n%s\n' % |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
124 (chrom, queryread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
125 full_length=True)[0]+1, 'R', |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
126 queryread.query_alignment_length, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
127 self.readdic[queryread.query_sequence], |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
128 queryreadseq)) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
129 stringresult.append( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
130 '>%s|%s|%s|%s|n=%s\n%s\n' % |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
131 (chrom, targetread.get_reference_positions( |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
132 full_length=True)[0]+1, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
133 'F', targetread.query_alignment_length, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
134 self.readdic[targetread.query_sequence], |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
135 targetreadseq)) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
136 stringresult = sorted(set(stringresult), |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
137 key=lambda x: stringresult.index(x)) |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
138 F.write(''.join(stringresult)) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
139 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
140 def revcomp(self, sequence): |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
141 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
142 revseq = sequence[::-1] |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
143 return "".join([antidict[i] for i in revseq]) |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
144 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
145 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
146 def main(input, minquery, maxquery, mintarget, maxtarget, output, overlap=10): |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
147 mapobj = Map(input) |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
148 mapobj.direct_pairing(minquery, maxquery, mintarget, maxtarget, |
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
149 output, overlap) |
1
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
150 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
151 |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
152 if __name__ == "__main__": |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
153 args = Parser() |
6f1378738798
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff
changeset
|
154 main(args.input, args.minquery, args.maxquery, args.mintarget, |
3
4d9682bd3a6b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents:
1
diff
changeset
|
155 args.maxtarget, args.output, args.overlap) |