diff PSM_Recombine.py @ 1:97ef96676b48 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:51:26 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PSM_Recombine.py	Mon Jun 05 02:51:26 2023 +0000
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+import argparse
+import logging
+from Bio import SeqIO
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
+    parser.add_argument("psmTable", type=argparse.FileType("r"))
+    parser.add_argument("gbkList", type=argparse.FileType("r"), nargs="+")
+    args = parser.parse_args()
+
+    gbkRecs = []
+    recIDs = []
+    recFlatten = []  # Can only seek argparse file once
+
+    for f in args.gbkList:
+        tempRecs = SeqIO.parse(f, "genbank")
+        for rec in tempRecs:
+            recFlatten.append(rec)
+
+    for line in args.psmTable:
+        lineElems = line.split("\t")
+        numGenes = 0
+        accession = ""
+        lineOut = ""
+        if recIDs == []:
+            for i in lineElems:
+                recIDs.append(i.strip())
+                lineOut += i.strip() + "\t"
+                for rec in recFlatten:
+                    if i.strip() in rec.id or rec.id in i.strip():
+                        gbkRecs.append(rec)
+            lineOut += "No. of phages in which gene is present\tBest Database Match"
+            print(lineOut)
+            continue
+
+        for i in range(0, len(lineElems)):
+            checkFeat = lineElems[i].strip()
+            if checkFeat == "-":
+                lineOut += "(-)\t"
+                continue
+            else:
+                lineOut += checkFeat + "\t"
+            numGenes += 1
+            if accession == "":
+                for feat in gbkRecs[i].features:
+                    if (
+                        "locus_tag" in feat.qualifiers.keys()
+                        and feat.qualifiers["locus_tag"][0] == checkFeat
+                    ):
+                        if "protein_id" in feat.qualifiers.keys():
+                            accession = feat.qualifiers["protein_id"][0]
+                            break  # Comment out if we need to get more info
+        lineOut += str(numGenes) + "\t" + accession
+        print(lineOut)