Mercurial > repos > cpt > cpt_prophage_relatedness
annotate prophage_relatedness.py @ 0:7a23dda2e932 draft
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
| author | cpt | 
|---|---|
| date | Thu, 08 Aug 2024 03:09:32 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
0
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
2 import argparse | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
3 from math import floor | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
4 from Bio.Blast import NCBIXML | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
5 import logging | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
6 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
7 logging.basicConfig(level=logging.DEBUG) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
8 log = logging.getLogger() | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
9 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
10 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
11 def parseXML(blastxml, outFile): # Modified from intron_detection | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
12 blast = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
13 for iter_num, blast_record in enumerate(NCBIXML.parse(blastxml), 1): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
14 align_num = 0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
15 outFile.write("Query ID\tQuery Length\tTotal Number of Hits\n") | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
16 outFile.write( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
17 "%s\t%d\t%d\n\n" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
18 % ( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
19 blast_record.query_id, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
20 blast_record.query_length, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
21 len(blast_record.alignments), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
22 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
23 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
24 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
25 for alignment in blast_record.alignments: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
26 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
27 align_num += 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
28 gi_nos = str(alignment.accession) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
29 blast_gene = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
30 for hsp in alignment.hsps: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
31 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
32 x = float(hsp.identities - 1) / ((hsp.query_end) - hsp.query_start) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
33 nice_name = blast_record.query | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
34 if " " in nice_name: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
35 nice_name = nice_name[0 : nice_name.index(" ")] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
36 blast_gene.append( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
37 { | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
38 "gi_nos": gi_nos, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
39 "sbjct_length": alignment.length, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
40 "query_length": blast_record.query_length, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
41 "sbjct_range": (hsp.sbjct_start, hsp.sbjct_end), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
42 "query_range": (hsp.query_start, hsp.query_end), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
43 "name": nice_name, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
44 "evalue": hsp.expect, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
45 "identity": hsp.identities, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
46 "identity_percent": x, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
47 "hit_num": align_num, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
48 "iter_num": iter_num, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
49 "match_id": alignment.title.partition(">")[0], | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
50 "align_len": hsp.align_length, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
51 } | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
52 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
53 blast.append(blast_gene) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
54 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
55 return blast | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
56 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
57 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
58 def openTSV(blasttsv, outFile): # Modified from intron_detection | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
59 blast = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
60 activeAlign = "" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
61 numAlignments = 0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
62 qLen = 0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
63 for line in blasttsv: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
64 line = line.strip("\n") | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
65 data = line.split("\t") | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
66 for x in range(0, len(data)): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
67 data[x] = data[x].strip() | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
68 qLen = data[22] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
69 if activeAlign == "": | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
70 numAlignments += 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
71 blast_gene = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
72 hsp_num = 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
73 elif activeAlign != data[1]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
74 numAlignments += 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
75 blast.append(blast_gene) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
76 blast_gene = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
77 hsp_num = 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
78 else: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
79 hsp_num += 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
80 gi_nos = data[12] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
81 activeAlign = data[1] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
82 x = float(float(data[14]) - 1) / (float(data[7]) - float(data[6])) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
83 nice_name = data[1] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
84 if " " in nice_name: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
85 nice_name = nice_name[0 : nice_name.index(" ")] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
86 blast_gene.append( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
87 { | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
88 "gi_nos": gi_nos, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
89 "sbjct_length": int(data[23]), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
90 "query_length": int(data[22]), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
91 "sbjct_range": (int(data[8]), int(data[9])), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
92 "query_range": (int(data[6]), int(data[7])), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
93 "name": nice_name, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
94 "evalue": float(data[10]), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
95 "identity": int(data[14]), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
96 "identity_percent": x, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
97 "hit_num": numAlignments, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
98 "iter_num": hsp_num, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
99 "match_id": data[24].partition(">")[0], | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
100 "align_len": int(data[3]), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
101 } | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
102 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
103 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
104 blast.append(blast_gene) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
105 outFile.write("Query ID\tQuery Length\tTotal Number of Hits\n") | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
106 outFile.write("%s\t%d\t%d\n\n" % (data[0], int(data[22]), numAlignments)) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
107 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
108 return blast | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
109 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
110 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
111 def test_true(feature, **kwargs): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
112 return True | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
113 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
114 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
115 def superSets(inSets): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
116 inSets.sort(key=len, reverse=True) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
117 nextInd = 0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
118 res = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
119 for i in range(0, len(inSets)): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
120 if i == 0: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
121 res.append(inSets[i]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
122 continue | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
123 for par in res: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
124 complete = True | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
125 for x in inSets[i]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
126 if not (x in par): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
127 complete = False | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
128 if complete: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
129 break # Subset of at least one member | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
130 if not complete: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
131 res.append(inSets[i]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
132 return res | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
133 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
134 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
135 def disjointSets(inSets): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
136 inSets.sort(key=lambda x: x[0]["sbjct_range"][0]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
137 res = [inSets[0]] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
138 for i in range(1, len(inSets)): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
139 disjoint = True | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
140 for elem in inSets[i]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
141 for cand in res: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
142 if elem in cand: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
143 disjoint = False | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
144 break | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
145 if not disjoint: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
146 break | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
147 if disjoint: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
148 res.append(inSets[i]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
149 return res | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
150 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
151 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
152 def compPhage(inRec, outFile, padding=1.2, cutoff=0.3, numReturn=20, isTSV=False): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
153 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
154 if isTSV: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
155 inRec = openTSV(inRec, outFile) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
156 else: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
157 inRec = parseXML(inRec, outFile) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
158 res = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
159 for group in inRec: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
160 window = floor(padding * float(group[0]["query_length"])) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
161 group = sorted(group, key=lambda x: x["sbjct_range"][0]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
162 hspGroups = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
163 lastInd = len(res) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
164 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
165 for x in range(0, len(group)): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
166 hspGroups.append([group[x]]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
167 startBound = group[x]["sbjct_range"][0] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
168 endBound = startBound + window | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
169 for hsp in group[x + 1 :]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
170 if ( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
171 hsp["sbjct_range"][0] >= startBound | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
172 and hsp["sbjct_range"][1] <= endBound | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
173 ): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
174 hspGroups[-1].append(hsp) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
175 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
176 for x in disjointSets(superSets(hspGroups)): | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
177 res.append(x) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
178 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
179 maxID = 0.0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
180 for x in res[lastInd:]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
181 sumID = 0.0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
182 totAlign = 0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
183 for y in x: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
184 totAlign += y["align_len"] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
185 sumID += float(y["identity"]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
186 x.append(totAlign) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
187 x.append(sumID / float(x[0]["query_length"])) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
188 maxID = max(maxID, x[-1]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
189 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
190 res = sorted(res, key=lambda x: x[-1], reverse=True) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
191 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
192 outList = [] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
193 outNum = 0 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
194 for x in res: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
195 if outNum == numReturn or x[-1] < cutoff: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
196 break | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
197 outNum += 1 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
198 outList.append(x) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
199 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
200 # Original request was that low scoring clusters would make it to the final results IF | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
201 # they were part of an Accession cluster that did have at least one high scoring member. | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
202 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
203 outFile.write( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
204 "Accession Number\tCluster Start Location\tEnd Location\tSubject Cluster Length\t# HSPs in Cluster\tTotal Aligned Length\t% of Query Aligned\tOverall % Query Identity\tOverall % Subject Identity\tComplete Accession Info\n" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
205 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
206 for x in outList: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
207 minStart = min(x[0]["sbjct_range"][0], x[0]["sbjct_range"][1]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
208 maxEnd = max(x[0]["sbjct_range"][0], x[0]["sbjct_range"][1]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
209 if "|gb|" in x[0]["match_id"]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
210 startSlice = x[0]["match_id"].index("gb|") + 3 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
211 endSlice = (x[0]["match_id"][startSlice:]).index("|") | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
212 accOut = x[0]["match_id"][startSlice : startSlice + endSlice] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
213 else: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
214 accOut = x[0]["gi_nos"] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
215 for y in x[0:-2]: | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
216 # ("\t%.3f\t" % (x[-1])) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
217 minStart = min(minStart, y["sbjct_range"][0]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
218 maxEnd = max(maxEnd, y["sbjct_range"][1]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
219 outFile.write( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
220 accOut | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
221 + "\t" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
222 + str(minStart) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
223 + "\t" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
224 + str(maxEnd) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
225 + "\t" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
226 + str(maxEnd - minStart + 1) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
227 + "\t" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
228 + str(len(x) - 1) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
229 + "\t" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
230 + str(x[-2]) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
231 + ("\t%.3f" % (float(x[-2]) / float(x[0]["query_length"]) * 100.00)) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
232 + ("\t%.3f" % (x[-1] * 100.00)) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
233 + ("\t%.3f" % (float(x[-2]) / float(maxEnd - minStart + 1) * 100.00)) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
234 + "\t" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
235 + x[0]["match_id"] | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
236 + "\n" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
237 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
238 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
239 # accession start end number | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
240 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
241 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
242 if __name__ == "__main__": | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
243 parser = argparse.ArgumentParser(description="Intron detection") | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
244 parser.add_argument( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
245 "inRec", type=argparse.FileType("r"), help="blast XML protein results" | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
246 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
247 parser.add_argument( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
248 "--outFile", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
249 type=argparse.FileType("w"), | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
250 help="Output Error Log", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
251 default="./compPro.tsv", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
252 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
253 parser.add_argument( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
254 "--padding", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
255 help="Gap minimum (Default -1, set to a negative number to allow overlap)", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
256 default=1.2, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
257 type=float, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
258 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
259 parser.add_argument( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
260 "--cutoff", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
261 help="Gap minimum (Default -1, set to a negative number to allow overlap)", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
262 default=0.3, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
263 type=float, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
264 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
265 parser.add_argument( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
266 "--numReturn", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
267 help="Gap maximum in genome (Default 10000)", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
268 default=20, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
269 type=int, | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
270 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
271 parser.add_argument( | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
272 "--isTSV", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
273 help="Opening Blast TSV result", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
274 action="store_true", | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
275 ) | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
276 args = parser.parse_args() | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
277 | 
| 
 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 
cpt 
parents:  
diff
changeset
 | 
278 compPhage(**vars(args)) | 
