Mercurial > repos > cpt > cpt_psm_recombine
view PSM_Recombine.py @ 1:97ef96676b48 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:51:26 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse import logging from Bio import SeqIO logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences") parser.add_argument("psmTable", type=argparse.FileType("r")) parser.add_argument("gbkList", type=argparse.FileType("r"), nargs="+") args = parser.parse_args() gbkRecs = [] recIDs = [] recFlatten = [] # Can only seek argparse file once for f in args.gbkList: tempRecs = SeqIO.parse(f, "genbank") for rec in tempRecs: recFlatten.append(rec) for line in args.psmTable: lineElems = line.split("\t") numGenes = 0 accession = "" lineOut = "" if recIDs == []: for i in lineElems: recIDs.append(i.strip()) lineOut += i.strip() + "\t" for rec in recFlatten: if i.strip() in rec.id or rec.id in i.strip(): gbkRecs.append(rec) lineOut += "No. of phages in which gene is present\tBest Database Match" print(lineOut) continue for i in range(0, len(lineElems)): checkFeat = lineElems[i].strip() if checkFeat == "-": lineOut += "(-)\t" continue else: lineOut += checkFeat + "\t" numGenes += 1 if accession == "": for feat in gbkRecs[i].features: if ( "locus_tag" in feat.qualifiers.keys() and feat.qualifiers["locus_tag"][0] == checkFeat ): if "protein_id" in feat.qualifiers.keys(): accession = feat.qualifiers["protein_id"][0] break # Comment out if we need to get more info lineOut += str(numGenes) + "\t" + accession print(lineOut)