Mercurial > repos > cpt > cpt_psm_recombine
comparison PSM_Recombine.py @ 1:97ef96676b48 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:51:26 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:b18e8268bf4e | 1:97ef96676b48 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import logging | |
4 from Bio import SeqIO | |
5 | |
6 logging.basicConfig(level=logging.INFO) | |
7 log = logging.getLogger(__name__) | |
8 | |
9 | |
10 if __name__ == "__main__": | |
11 parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences") | |
12 parser.add_argument("psmTable", type=argparse.FileType("r")) | |
13 parser.add_argument("gbkList", type=argparse.FileType("r"), nargs="+") | |
14 args = parser.parse_args() | |
15 | |
16 gbkRecs = [] | |
17 recIDs = [] | |
18 recFlatten = [] # Can only seek argparse file once | |
19 | |
20 for f in args.gbkList: | |
21 tempRecs = SeqIO.parse(f, "genbank") | |
22 for rec in tempRecs: | |
23 recFlatten.append(rec) | |
24 | |
25 for line in args.psmTable: | |
26 lineElems = line.split("\t") | |
27 numGenes = 0 | |
28 accession = "" | |
29 lineOut = "" | |
30 if recIDs == []: | |
31 for i in lineElems: | |
32 recIDs.append(i.strip()) | |
33 lineOut += i.strip() + "\t" | |
34 for rec in recFlatten: | |
35 if i.strip() in rec.id or rec.id in i.strip(): | |
36 gbkRecs.append(rec) | |
37 lineOut += "No. of phages in which gene is present\tBest Database Match" | |
38 print(lineOut) | |
39 continue | |
40 | |
41 for i in range(0, len(lineElems)): | |
42 checkFeat = lineElems[i].strip() | |
43 if checkFeat == "-": | |
44 lineOut += "(-)\t" | |
45 continue | |
46 else: | |
47 lineOut += checkFeat + "\t" | |
48 numGenes += 1 | |
49 if accession == "": | |
50 for feat in gbkRecs[i].features: | |
51 if ( | |
52 "locus_tag" in feat.qualifiers.keys() | |
53 and feat.qualifiers["locus_tag"][0] == checkFeat | |
54 ): | |
55 if "protein_id" in feat.qualifiers.keys(): | |
56 accession = feat.qualifiers["protein_id"][0] | |
57 break # Comment out if we need to get more info | |
58 lineOut += str(numGenes) + "\t" + accession | |
59 print(lineOut) |