Mercurial > repos > cpt > cpt_prophage_relatedness
annotate prophage_relatedness.py @ 0:7a23dda2e932 draft
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
| author | cpt | 
|---|---|
| date | Thu, 08 Aug 2024 03:09:32 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 2 import argparse | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 3 from math import floor | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 4 from Bio.Blast import NCBIXML | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 5 import logging | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 6 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 7 logging.basicConfig(level=logging.DEBUG) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 8 log = logging.getLogger() | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 9 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 10 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 11 def parseXML(blastxml, outFile): # Modified from intron_detection | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 12 blast = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 13 for iter_num, blast_record in enumerate(NCBIXML.parse(blastxml), 1): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 14 align_num = 0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 15 outFile.write("Query ID\tQuery Length\tTotal Number of Hits\n") | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 16 outFile.write( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 17 "%s\t%d\t%d\n\n" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 18 % ( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 19 blast_record.query_id, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 20 blast_record.query_length, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 21 len(blast_record.alignments), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 22 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 23 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 24 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 25 for alignment in blast_record.alignments: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 26 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 27 align_num += 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 28 gi_nos = str(alignment.accession) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 29 blast_gene = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 30 for hsp in alignment.hsps: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 31 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 32 x = float(hsp.identities - 1) / ((hsp.query_end) - hsp.query_start) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 33 nice_name = blast_record.query | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 34 if " " in nice_name: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 35 nice_name = nice_name[0 : nice_name.index(" ")] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 36 blast_gene.append( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 37 { | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 38 "gi_nos": gi_nos, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 39 "sbjct_length": alignment.length, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 40 "query_length": blast_record.query_length, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 41 "sbjct_range": (hsp.sbjct_start, hsp.sbjct_end), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 42 "query_range": (hsp.query_start, hsp.query_end), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 43 "name": nice_name, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 44 "evalue": hsp.expect, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 45 "identity": hsp.identities, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 46 "identity_percent": x, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 47 "hit_num": align_num, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 48 "iter_num": iter_num, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 49 "match_id": alignment.title.partition(">")[0], | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 50 "align_len": hsp.align_length, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 51 } | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 52 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 53 blast.append(blast_gene) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 54 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 55 return blast | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 56 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 57 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 58 def openTSV(blasttsv, outFile): # Modified from intron_detection | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 59 blast = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 60 activeAlign = "" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 61 numAlignments = 0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 62 qLen = 0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 63 for line in blasttsv: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 64 line = line.strip("\n") | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 65 data = line.split("\t") | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 66 for x in range(0, len(data)): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 67 data[x] = data[x].strip() | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 68 qLen = data[22] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 69 if activeAlign == "": | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 70 numAlignments += 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 71 blast_gene = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 72 hsp_num = 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 73 elif activeAlign != data[1]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 74 numAlignments += 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 75 blast.append(blast_gene) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 76 blast_gene = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 77 hsp_num = 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 78 else: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 79 hsp_num += 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 80 gi_nos = data[12] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 81 activeAlign = data[1] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 82 x = float(float(data[14]) - 1) / (float(data[7]) - float(data[6])) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 83 nice_name = data[1] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 84 if " " in nice_name: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 85 nice_name = nice_name[0 : nice_name.index(" ")] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 86 blast_gene.append( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 87 { | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 88 "gi_nos": gi_nos, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 89 "sbjct_length": int(data[23]), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 90 "query_length": int(data[22]), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 91 "sbjct_range": (int(data[8]), int(data[9])), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 92 "query_range": (int(data[6]), int(data[7])), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 93 "name": nice_name, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 94 "evalue": float(data[10]), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 95 "identity": int(data[14]), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 96 "identity_percent": x, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 97 "hit_num": numAlignments, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 98 "iter_num": hsp_num, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 99 "match_id": data[24].partition(">")[0], | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 100 "align_len": int(data[3]), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 101 } | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 102 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 103 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 104 blast.append(blast_gene) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 105 outFile.write("Query ID\tQuery Length\tTotal Number of Hits\n") | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 106 outFile.write("%s\t%d\t%d\n\n" % (data[0], int(data[22]), numAlignments)) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 107 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 108 return blast | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 109 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 110 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 111 def test_true(feature, **kwargs): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 112 return True | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 113 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 114 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 115 def superSets(inSets): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 116 inSets.sort(key=len, reverse=True) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 117 nextInd = 0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 118 res = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 119 for i in range(0, len(inSets)): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 120 if i == 0: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 121 res.append(inSets[i]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 122 continue | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 123 for par in res: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 124 complete = True | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 125 for x in inSets[i]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 126 if not (x in par): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 127 complete = False | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 128 if complete: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 129 break # Subset of at least one member | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 130 if not complete: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 131 res.append(inSets[i]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 132 return res | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 133 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 134 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 135 def disjointSets(inSets): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 136 inSets.sort(key=lambda x: x[0]["sbjct_range"][0]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 137 res = [inSets[0]] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 138 for i in range(1, len(inSets)): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 139 disjoint = True | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 140 for elem in inSets[i]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 141 for cand in res: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 142 if elem in cand: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 143 disjoint = False | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 144 break | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 145 if not disjoint: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 146 break | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 147 if disjoint: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 148 res.append(inSets[i]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 149 return res | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 150 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 151 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 152 def compPhage(inRec, outFile, padding=1.2, cutoff=0.3, numReturn=20, isTSV=False): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 153 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 154 if isTSV: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 155 inRec = openTSV(inRec, outFile) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 156 else: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 157 inRec = parseXML(inRec, outFile) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 158 res = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 159 for group in inRec: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 160 window = floor(padding * float(group[0]["query_length"])) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 161 group = sorted(group, key=lambda x: x["sbjct_range"][0]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 162 hspGroups = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 163 lastInd = len(res) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 164 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 165 for x in range(0, len(group)): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 166 hspGroups.append([group[x]]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 167 startBound = group[x]["sbjct_range"][0] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 168 endBound = startBound + window | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 169 for hsp in group[x + 1 :]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 170 if ( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 171 hsp["sbjct_range"][0] >= startBound | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 172 and hsp["sbjct_range"][1] <= endBound | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 173 ): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 174 hspGroups[-1].append(hsp) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 175 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 176 for x in disjointSets(superSets(hspGroups)): | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 177 res.append(x) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 178 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 179 maxID = 0.0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 180 for x in res[lastInd:]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 181 sumID = 0.0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 182 totAlign = 0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 183 for y in x: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 184 totAlign += y["align_len"] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 185 sumID += float(y["identity"]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 186 x.append(totAlign) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 187 x.append(sumID / float(x[0]["query_length"])) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 188 maxID = max(maxID, x[-1]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 189 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 190 res = sorted(res, key=lambda x: x[-1], reverse=True) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 191 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 192 outList = [] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 193 outNum = 0 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 194 for x in res: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 195 if outNum == numReturn or x[-1] < cutoff: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 196 break | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 197 outNum += 1 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 198 outList.append(x) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 199 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 200 # Original request was that low scoring clusters would make it to the final results IF | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 201 # they were part of an Accession cluster that did have at least one high scoring member. | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 202 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 203 outFile.write( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 204 "Accession Number\tCluster Start Location\tEnd Location\tSubject Cluster Length\t# HSPs in Cluster\tTotal Aligned Length\t% of Query Aligned\tOverall % Query Identity\tOverall % Subject Identity\tComplete Accession Info\n" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 205 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 206 for x in outList: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 207 minStart = min(x[0]["sbjct_range"][0], x[0]["sbjct_range"][1]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 208 maxEnd = max(x[0]["sbjct_range"][0], x[0]["sbjct_range"][1]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 209 if "|gb|" in x[0]["match_id"]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 210 startSlice = x[0]["match_id"].index("gb|") + 3 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 211 endSlice = (x[0]["match_id"][startSlice:]).index("|") | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 212 accOut = x[0]["match_id"][startSlice : startSlice + endSlice] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 213 else: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 214 accOut = x[0]["gi_nos"] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 215 for y in x[0:-2]: | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 216 # ("\t%.3f\t" % (x[-1])) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 217 minStart = min(minStart, y["sbjct_range"][0]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 218 maxEnd = max(maxEnd, y["sbjct_range"][1]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 219 outFile.write( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 220 accOut | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 221 + "\t" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 222 + str(minStart) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 223 + "\t" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 224 + str(maxEnd) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 225 + "\t" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 226 + str(maxEnd - minStart + 1) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 227 + "\t" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 228 + str(len(x) - 1) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 229 + "\t" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 230 + str(x[-2]) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 231 + ("\t%.3f" % (float(x[-2]) / float(x[0]["query_length"]) * 100.00)) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 232 + ("\t%.3f" % (x[-1] * 100.00)) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 233 + ("\t%.3f" % (float(x[-2]) / float(maxEnd - minStart + 1) * 100.00)) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 234 + "\t" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 235 + x[0]["match_id"] | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 236 + "\n" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 237 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 238 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 239 # accession start end number | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 240 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 241 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 242 if __name__ == "__main__": | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 243 parser = argparse.ArgumentParser(description="Intron detection") | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 244 parser.add_argument( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 245 "inRec", type=argparse.FileType("r"), help="blast XML protein results" | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 246 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 247 parser.add_argument( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 248 "--outFile", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 249 type=argparse.FileType("w"), | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 250 help="Output Error Log", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 251 default="./compPro.tsv", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 252 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 253 parser.add_argument( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 254 "--padding", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 255 help="Gap minimum (Default -1, set to a negative number to allow overlap)", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 256 default=1.2, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 257 type=float, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 258 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 259 parser.add_argument( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 260 "--cutoff", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 261 help="Gap minimum (Default -1, set to a negative number to allow overlap)", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 262 default=0.3, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 263 type=float, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 264 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 265 parser.add_argument( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 266 "--numReturn", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 267 help="Gap maximum in genome (Default 10000)", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 268 default=20, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 269 type=int, | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 270 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 271 parser.add_argument( | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 272 "--isTSV", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 273 help="Opening Blast TSV result", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 274 action="store_true", | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 275 ) | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 276 args = parser.parse_args() | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 277 | 
| 
7a23dda2e932
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
 cpt parents: diff
changeset | 278 compPhage(**vars(args)) | 
