comparison PSM_Recombine.py @ 1:97ef96676b48 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:51:26 +0000
parents
children
comparison
equal deleted inserted replaced
0:b18e8268bf4e 1:97ef96676b48
1 #!/usr/bin/env python
2 import argparse
3 import logging
4 from Bio import SeqIO
5
6 logging.basicConfig(level=logging.INFO)
7 log = logging.getLogger(__name__)
8
9
10 if __name__ == "__main__":
11 parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
12 parser.add_argument("psmTable", type=argparse.FileType("r"))
13 parser.add_argument("gbkList", type=argparse.FileType("r"), nargs="+")
14 args = parser.parse_args()
15
16 gbkRecs = []
17 recIDs = []
18 recFlatten = [] # Can only seek argparse file once
19
20 for f in args.gbkList:
21 tempRecs = SeqIO.parse(f, "genbank")
22 for rec in tempRecs:
23 recFlatten.append(rec)
24
25 for line in args.psmTable:
26 lineElems = line.split("\t")
27 numGenes = 0
28 accession = ""
29 lineOut = ""
30 if recIDs == []:
31 for i in lineElems:
32 recIDs.append(i.strip())
33 lineOut += i.strip() + "\t"
34 for rec in recFlatten:
35 if i.strip() in rec.id or rec.id in i.strip():
36 gbkRecs.append(rec)
37 lineOut += "No. of phages in which gene is present\tBest Database Match"
38 print(lineOut)
39 continue
40
41 for i in range(0, len(lineElems)):
42 checkFeat = lineElems[i].strip()
43 if checkFeat == "-":
44 lineOut += "(-)\t"
45 continue
46 else:
47 lineOut += checkFeat + "\t"
48 numGenes += 1
49 if accession == "":
50 for feat in gbkRecs[i].features:
51 if (
52 "locus_tag" in feat.qualifiers.keys()
53 and feat.qualifiers["locus_tag"][0] == checkFeat
54 ):
55 if "protein_id" in feat.qualifiers.keys():
56 accession = feat.qualifiers["protein_id"][0]
57 break # Comment out if we need to get more info
58 lineOut += str(numGenes) + "\t" + accession
59 print(lineOut)