# HG changeset patch
# User cpt
# Date 1685932955 0
# Node ID e29c36ee61e0e784d0870bfc6e423b11ce924bc3
# Parent 1311f97dccfa97382ff607cf22a518a0bfaeeba4
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
diff -r 1311f97dccfa -r e29c36ee61e0 adjacent_features.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/adjacent_features.py Mon Jun 05 02:42:35 2023 +0000
@@ -0,0 +1,444 @@
+#!/usr/bin/env python
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.Data import CodonTable
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from Bio.Alphabet import generic_dna, generic_protein
+import argparse
+import logging
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+
+
+def extract_features(
+ genbankFiles=None,
+ fastaFiles=None,
+ upOut=None,
+ downOut=None,
+ genesOnly=False,
+ cdsOnly=True,
+ forceSeqID=False,
+ forward=1,
+ behind=1,
+ outProt=True,
+ tTable=11,
+ fTable=11,
+):
+
+ genList = []
+ fastaList = []
+
+ for fileX in genbankFiles:
+ opener = SeqIO.parse(fileX, "genbank")
+ for (
+ openRec
+ ) in (
+ opener
+ ): # To turn the generator into objects (or else we end up with a list of generators)
+ genList.append(openRec)
+
+ for fileX in fastaFiles:
+ opener = SeqIO.parse(fileX, "fasta")
+ for openRec in opener: # Technically flattens multifastas too
+ fastaList.append(openRec)
+
+ for seqMatch in fastaList:
+ longOut = seqMatch.description
+ protID = seqMatch.id
+ if fTable != 0:
+ fSeq = seqMatch.seq.translate(table=fTable, cds=False)
+ else:
+ fSeq = seqMatch.seq
+
+ for gbk in genList:
+ sourceOut = gbk.id
+ num = -1
+ for feat in gbk.features:
+ num += 1
+
+ if (genesOnly and feat.type != "gene") or (
+ cdsOnly and feat.type != "CDS"
+ ):
+ continue
+
+ if "codon_start" in feat.qualifiers:
+ offset = 1 - int(feat.qualifiers["codon_start"][0])
+ else:
+ offset = 0
+
+ temp = gbk.seq[feat.location.start : feat.location.end]
+ if feat.location.strand == -1:
+ temp = gbk.seq[feat.location.start : feat.location.end - offset]
+ temp = temp.reverse_complement()
+ else:
+ temp = gbk.seq[feat.location.start + offset : feat.location.end]
+
+ if tTable != 0:
+ try:
+ gSeq = temp.translate(table=tTable, cds=True)
+ except CodonTable.TranslationError as cte:
+ # log.info("Translation issue at %s", cte)
+ gSeq = temp.translate(table=tTable, cds=False)
+ else:
+ gSeq = temp
+
+ if not ("protein_id" in feat.qualifiers):
+ feat.qualifiers["protein_id"] = [
+ "++++++++"
+ ] # Junk value for genesOnly flag
+
+ if (gSeq == fSeq) and (
+ protID == feat.qualifiers["protein_id"][0] or forceSeqID == False
+ ):
+ goBack = num - 1
+ goAhead = num + 1
+ numBack = behind
+ numAhead = forward
+ backList = []
+ aheadList = []
+
+ while numBack != 0 and goBack >= 0:
+ if (genesOnly and gbk.features[goBack].type != "gene") or (
+ cdsOnly and gbk.features[goBack].type != "CDS"
+ ):
+ goBack -= 1
+ continue
+ backList.append(gbk.features[goBack])
+ numBack -= 1
+ goBack -= 1
+
+ while numAhead != 0 and goAhead < len(gbk.features):
+ if (genesOnly and gbk.features[goAhead].type != "gene") or (
+ cdsOnly and gbk.features[goAhead].type != "CDS"
+ ):
+ goAhead += 1
+ continue
+ aheadList.append(gbk.features[goAhead])
+ numAhead -= 1
+ goAhead += 1
+
+ backList.reverse()
+ if feat.location.strand == -1:
+ tmpList = aheadList
+ aheadList = backList
+ backList = tmpList
+
+ for item in backList:
+ addition = ""
+ header = ""
+ if "product" in item.qualifiers:
+ addition = " -" + str(item.qualifiers["product"][0]) + "-"
+ if "protein_id" in item.qualifiers:
+ header = (
+ ">"
+ + (item.qualifiers["protein_id"][0])
+ + addition
+ + " (5' of "
+ + longOut
+ + " found within "
+ + sourceOut
+ + ")\n"
+ )
+ else:
+ header = (
+ ">"
+ + (item.qualifiers["locus_tag"][0])
+ + addition
+ + " (5' of "
+ + longOut
+ + " found within "
+ + sourceOut
+ + ")\n"
+ )
+ if outProt == True:
+ if "translation" in item.qualifiers:
+ upOut.write(header)
+ upOut.write(
+ str(item.qualifiers["translation"][0]) + "\n\n"
+ )
+ else:
+ modS = 0
+ modE = 0
+ if "codon_start" in item.qualifiers:
+ if item.location.strand > 0:
+ modS = (
+ int(item.qualifiers["codon_start"][0]) - 1
+ )
+ else:
+ modE = (
+ int(item.qualifiers["codon_start"][0]) - 1
+ )
+
+ seqHold = gbk.seq[
+ item.location.start
+ + modS : item.location.end
+ - modE
+ ]
+ if item.location.strand == -1:
+ seqHold = seqHold.reverse_complement()
+ if cdsOnly:
+ try:
+ finalSeq = ""
+ if tTable != 0:
+ finalSeq = (
+ str(
+ seqHold.translate(
+ table=tTable, cds=True
+ )
+ )
+ + "\n\n"
+ )
+ else:
+ finalSeq = str(seqHold) + "\n\n"
+ # upOut.write(header)
+ # upOut.write(finalSeq)
+ except Exception as bdct:
+ log.warn(
+ "ERROR %s %s",
+ item.qualifiers["locus_tag"][0],
+ bdct,
+ )
+ finalSeq = ""
+ if tTable != 0:
+ finalSeq = (
+ str(
+ seqHold.translate(
+ table=tTable, cds=False
+ )
+ )
+ + "\n\n"
+ )
+ else:
+ finalSeq = str(seqHold) + "\n\n"
+ header = (
+ ">"
+ + (item.qualifiers["locus_tag"][0])
+ + addition
+ + " [INCOMPLETE] (5' of "
+ + longOut
+ + " found within "
+ + sourceOut
+ + ")\n"
+ )
+ upOut.write(header)
+ upOut.write(finalSeq)
+ else:
+
+ if tTable != 0:
+ upOut.write(header)
+ upOut.write(
+ str(
+ seqHold.translate(
+ table=tTable, cds=False
+ )
+ )
+ + "\n\n"
+ )
+ else:
+ upOut.write(header)
+ upOut.write(str(seqHold) + "\n\n")
+ else:
+ upOut.write(header)
+ upOut.write(
+ str(gbk.seq[item.location.start : item.location.end])
+ + "\n\n"
+ )
+
+ for item in aheadList:
+ addition = ""
+ header = ""
+ if "product" in item.qualifiers:
+ addition = " -" + str(item.qualifiers["product"][0]) + "-"
+ if "protein_id" in item.qualifiers:
+ header = (
+ ">"
+ + (item.qualifiers["protein_id"][0])
+ + addition
+ + " (3' of "
+ + longOut
+ + " found within "
+ + sourceOut
+ + ")\n"
+ )
+ else:
+ header = (
+ ">"
+ + (item.qualifiers["locus_tag"][0])
+ + addition
+ + " (3' of "
+ + longOut
+ + " found within "
+ + sourceOut
+ + ")\n"
+ )
+ if outProt == True:
+ if "translation" in item.qualifiers:
+ downOut.write(header)
+ downOut.write(
+ str(item.qualifiers["translation"][0]) + "\n\n"
+ )
+ else:
+ modS = 0
+ modE = 0
+ if "codon_start" in item.qualifiers:
+ if item.location.strand > 0:
+ modS = (
+ int(item.qualifiers["codon_start"][0]) - 1
+ )
+ else:
+ modE = (
+ int(item.qualifiers["codon_start"][0]) - 1
+ )
+
+ seqHold = gbk.seq[
+ item.location.start
+ + modS : item.location.end
+ - modE
+ ]
+ if item.location.strand == -1:
+ seqHold = seqHold.reverse_complement()
+ if cdsOnly:
+ try:
+ finalSeq = ""
+ if tTable != 0:
+ finalSeq = (
+ str(
+ seqHold.translate(
+ table=tTable, cds=True
+ )
+ )
+ + "\n\n"
+ )
+ else:
+ finalSeq = str(seqHold) + "\n\n"
+ # downOut.write(header)
+ # downOut.write(finalSeq)
+ except Exception as bdct:
+ log.warn(
+ "ERROR %s %s",
+ item.qualifiers["locus_tag"][0],
+ bdct,
+ )
+ finalSeq = ""
+ if tTable != 0:
+ finalSeq = (
+ str(
+ seqHold.translate(
+ table=tTable, cds=False
+ )
+ )
+ + "\n\n"
+ )
+ else:
+ finalSeq = str(seqHold) + "\n\n"
+ header = (
+ ">"
+ + (item.qualifiers["locus_tag"][0])
+ + addition
+ + " [INCOMPLETE] (3' of "
+ + longOut
+ + " found within "
+ + sourceOut
+ + ")\n"
+ )
+ downOut.write(header)
+ downOut.write(finalSeq)
+ else:
+
+ if tTable != 0:
+ downOut.write(header)
+ downOut.write(
+ str(
+ seqHold.translate(
+ table=tTable, cds=False
+ )
+ )
+ + "\n\n"
+ )
+ else:
+ downOut.write(header)
+ downOut.write(str(seqHold) + "\n\n")
+ else:
+ downOut.write(header)
+ downOut.write(
+ str(gbk.seq[item.location.start : item.location.end])
+ + "\n\n"
+ )
+ # print(longOut)
+
+ return
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Export a subset of features from a Genbank file", epilog=""
+ )
+ parser.add_argument(
+ "-genbankFiles", nargs="+", type=argparse.FileType("r"), help="Genbank file"
+ )
+ parser.add_argument(
+ "-fastaFiles",
+ nargs="+",
+ type=argparse.FileType("r"),
+ help="Fasta file to match against",
+ )
+ parser.add_argument(
+ "-tTable",
+ type=int,
+ default=11,
+ help="Translation table to use",
+ choices=range(0, 23),
+ )
+ parser.add_argument(
+ "-fTable",
+ type=int,
+ default=11,
+ help="Translation table to use",
+ choices=range(0, 23),
+ )
+ parser.add_argument(
+ "-upOut",
+ type=argparse.FileType("w"),
+ help="Upstream Fasta output",
+ default="test-data/upOut.fa",
+ )
+ parser.add_argument(
+ "-downOut",
+ type=argparse.FileType("w"),
+ help="Downstream Fasta output",
+ default="test-data/downOut.fa",
+ )
+ parser.add_argument(
+ "--genesOnly",
+ action="store_true",
+ help="Search and return only Gene type features",
+ )
+ parser.add_argument(
+ "--cdsOnly",
+ action="store_true",
+ help="Search and return only CDS type features",
+ )
+ parser.add_argument(
+ "--forceSeqID",
+ action="store_true",
+ help="Search and return only CDS type features",
+ )
+ parser.add_argument(
+ "--outProt", action="store_true", help="Output the translated sequence"
+ )
+ parser.add_argument(
+ "--forward",
+ type=int,
+ default=1,
+ help="Number of features upstream from the hit to return",
+ )
+ parser.add_argument(
+ "--behind",
+ type=int,
+ default=1,
+ help="Number of features downstream from the hit to return",
+ )
+ args = parser.parse_args()
+ extract_features(**vars(args))
diff -r 1311f97dccfa -r e29c36ee61e0 adjacent_features.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/adjacent_features.xml Mon Jun 05 02:42:35 2023 +0000
@@ -0,0 +1,110 @@
+
+ Searches a Genbank file for a given FASTA sequence, then outputs a file with adjacent upstream features, and another with adjacent downstream features.
+
+ macros.xml
+ cpt-macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
diff -r 1311f97dccfa -r e29c36ee61e0 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:42:35 2023 +0000
@@ -0,0 +1,115 @@
+
+
+
+ python
+ biopython
+ requests
+ cpt_gffparser
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
diff -r 1311f97dccfa -r e29c36ee61e0 cpt_gbk_adjacent/adjacent_features.py
--- a/cpt_gbk_adjacent/adjacent_features.py Fri Jun 17 12:43:45 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,444 +0,0 @@
-#!/usr/bin/env python
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.Data import CodonTable
-from Bio.SeqRecord import SeqRecord
-from Bio.SeqFeature import SeqFeature, FeatureLocation
-from Bio.Alphabet import generic_dna, generic_protein
-import argparse
-import logging
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger()
-
-
-def extract_features(
- genbankFiles=None,
- fastaFiles=None,
- upOut=None,
- downOut=None,
- genesOnly=False,
- cdsOnly=True,
- forceSeqID=False,
- forward=1,
- behind=1,
- outProt=True,
- tTable=11,
- fTable=11,
-):
-
- genList = []
- fastaList = []
-
- for fileX in genbankFiles:
- opener = SeqIO.parse(fileX, "genbank")
- for (
- openRec
- ) in (
- opener
- ): # To turn the generator into objects (or else we end up with a list of generators)
- genList.append(openRec)
-
- for fileX in fastaFiles:
- opener = SeqIO.parse(fileX, "fasta")
- for openRec in opener: # Technically flattens multifastas too
- fastaList.append(openRec)
-
- for seqMatch in fastaList:
- longOut = seqMatch.description
- protID = seqMatch.id
- if fTable != 0:
- fSeq = seqMatch.seq.translate(table=fTable, cds=False)
- else:
- fSeq = seqMatch.seq
-
- for gbk in genList:
- sourceOut = gbk.id
- num = -1
- for feat in gbk.features:
- num += 1
-
- if (genesOnly and feat.type != "gene") or (
- cdsOnly and feat.type != "CDS"
- ):
- continue
-
- if "codon_start" in feat.qualifiers:
- offset = 1 - int(feat.qualifiers["codon_start"][0])
- else:
- offset = 0
-
-
- temp = gbk.seq[feat.location.start : feat.location.end]
- if feat.location.strand == -1:
- temp = gbk.seq[feat.location.start : feat.location.end - offset]
- temp = temp.reverse_complement()
- else:
- temp = gbk.seq[feat.location.start + offset : feat.location.end]
-
- if tTable != 0:
- try:
- gSeq = temp.translate(table=tTable, cds=True)
- except CodonTable.TranslationError as cte:
- # log.info("Translation issue at %s", cte)
- gSeq = temp.translate(table=tTable, cds=False)
- else:
- gSeq = temp
-
- if not ("protein_id" in feat.qualifiers):
- feat.qualifiers["protein_id"] = [
- "++++++++"
- ] # Junk value for genesOnly flag
-
- if (gSeq == fSeq) and (protID == feat.qualifiers["protein_id"][0] or forceSeqID == False):
- goBack = num - 1
- goAhead = num + 1
- numBack = behind
- numAhead = forward
- backList = []
- aheadList = []
-
- while numBack != 0 and goBack >= 0:
- if (genesOnly and gbk.features[goBack].type != "gene") or (
- cdsOnly and gbk.features[goBack].type != "CDS"
- ):
- goBack -= 1
- continue
- backList.append(gbk.features[goBack])
- numBack -= 1
- goBack -= 1
-
- while numAhead != 0 and goAhead < len(gbk.features):
- if (genesOnly and gbk.features[goAhead].type != "gene") or (
- cdsOnly and gbk.features[goAhead].type != "CDS"
- ):
- goAhead += 1
- continue
- aheadList.append(gbk.features[goAhead])
- numAhead -= 1
- goAhead += 1
-
- backList.reverse()
- if feat.location.strand == -1:
- tmpList = aheadList
- aheadList = backList
- backList = tmpList
-
-
- for item in backList:
- addition = ""
- header = ""
- if "product" in item.qualifiers:
- addition = " -" + str(item.qualifiers["product"][0]) + "-"
- if "protein_id" in item.qualifiers:
- header = (
- ">"
- + (item.qualifiers["protein_id"][0])
- + addition
- + " (5' of "
- + longOut
- + " found within "
- + sourceOut
- + ")\n"
- )
- else:
- header = (
- ">"
- + (item.qualifiers["locus_tag"][0])
- + addition
- + " (5' of "
- + longOut
- + " found within "
- + sourceOut
- + ")\n"
- )
- if outProt == True:
- if "translation" in item.qualifiers:
- upOut.write(header)
- upOut.write(
- str(item.qualifiers["translation"][0]) + "\n\n"
- )
- else:
- modS = 0
- modE = 0
- if "codon_start" in item.qualifiers:
- if item.location.strand > 0:
- modS = (
- int(item.qualifiers["codon_start"][0]) - 1
- )
- else:
- modE = (
- int(item.qualifiers["codon_start"][0]) - 1
- )
-
- seqHold = gbk.seq[
- item.location.start
- + modS : item.location.end
- - modE
- ]
- if item.location.strand == -1:
- seqHold = seqHold.reverse_complement()
- if cdsOnly:
- try:
- finalSeq = ""
- if tTable != 0:
- finalSeq = (
- str(
- seqHold.translate(
- table=tTable, cds=True
- )
- )
- + "\n\n"
- )
- else:
- finalSeq = str(seqHold) + "\n\n"
- # upOut.write(header)
- # upOut.write(finalSeq)
- except Exception as bdct:
- log.warn(
- "ERROR %s %s",
- item.qualifiers["locus_tag"][0],
- bdct,
- )
- finalSeq = ""
- if tTable != 0:
- finalSeq = (
- str(
- seqHold.translate(
- table=tTable, cds=False
- )
- )
- + "\n\n"
- )
- else:
- finalSeq = str(seqHold) + "\n\n"
- header = (
- ">"
- + (item.qualifiers["locus_tag"][0])
- + addition
- + " [INCOMPLETE] (5' of "
- + longOut
- + " found within "
- + sourceOut
- + ")\n"
- )
- upOut.write(header)
- upOut.write(finalSeq)
- else:
-
- if tTable != 0:
- upOut.write(header)
- upOut.write(
- str(
- seqHold.translate(
- table=tTable, cds=False
- )
- )
- + "\n\n"
- )
- else:
- upOut.write(header)
- upOut.write(str(seqHold) + "\n\n")
- else:
- upOut.write(header)
- upOut.write(
- str(gbk.seq[item.location.start : item.location.end])
- + "\n\n"
- )
-
- for item in aheadList:
- addition = ""
- header = ""
- if "product" in item.qualifiers:
- addition = " -" + str(item.qualifiers["product"][0]) + "-"
- if "protein_id" in item.qualifiers:
- header = (
- ">"
- + (item.qualifiers["protein_id"][0])
- + addition
- + " (3' of "
- + longOut
- + " found within "
- + sourceOut
- + ")\n"
- )
- else:
- header = (
- ">"
- + (item.qualifiers["locus_tag"][0])
- + addition
- + " (3' of "
- + longOut
- + " found within "
- + sourceOut
- + ")\n"
- )
- if outProt == True:
- if "translation" in item.qualifiers:
- downOut.write(header)
- downOut.write(
- str(item.qualifiers["translation"][0]) + "\n\n"
- )
- else:
- modS = 0
- modE = 0
- if "codon_start" in item.qualifiers:
- if item.location.strand > 0:
- modS = (
- int(item.qualifiers["codon_start"][0]) - 1
- )
- else:
- modE = (
- int(item.qualifiers["codon_start"][0]) - 1
- )
-
- seqHold = gbk.seq[
- item.location.start
- + modS : item.location.end
- - modE
- ]
- if item.location.strand == -1:
- seqHold = seqHold.reverse_complement()
- if cdsOnly:
- try:
- finalSeq = ""
- if tTable != 0:
- finalSeq = (
- str(
- seqHold.translate(
- table=tTable, cds=True
- )
- )
- + "\n\n"
- )
- else:
- finalSeq = str(seqHold) + "\n\n"
- # downOut.write(header)
- # downOut.write(finalSeq)
- except Exception as bdct:
- log.warn(
- "ERROR %s %s",
- item.qualifiers["locus_tag"][0],
- bdct,
- )
- finalSeq = ""
- if tTable != 0:
- finalSeq = (
- str(
- seqHold.translate(
- table=tTable, cds=False
- )
- )
- + "\n\n"
- )
- else:
- finalSeq = str(seqHold) + "\n\n"
- header = (
- ">"
- + (item.qualifiers["locus_tag"][0])
- + addition
- + " [INCOMPLETE] (3' of "
- + longOut
- + " found within "
- + sourceOut
- + ")\n"
- )
- downOut.write(header)
- downOut.write(finalSeq)
- else:
-
- if tTable != 0:
- downOut.write(header)
- downOut.write(
- str(
- seqHold.translate(
- table=tTable, cds=False
- )
- )
- + "\n\n"
- )
- else:
- downOut.write(header)
- downOut.write(str(seqHold) + "\n\n")
- else:
- downOut.write(header)
- downOut.write(
- str(gbk.seq[item.location.start : item.location.end])
- + "\n\n"
- )
- # print(longOut)
-
- return
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description="Export a subset of features from a Genbank file", epilog=""
- )
- parser.add_argument(
- "-genbankFiles", nargs="+", type=argparse.FileType("r"), help="Genbank file"
- )
- parser.add_argument(
- "-fastaFiles",
- nargs="+",
- type=argparse.FileType("r"),
- help="Fasta file to match against",
- )
- parser.add_argument(
- "-tTable",
- type=int,
- default=11,
- help="Translation table to use",
- choices=range(0, 23),
- )
- parser.add_argument(
- "-fTable",
- type=int,
- default=11,
- help="Translation table to use",
- choices=range(0, 23),
- )
- parser.add_argument(
- "-upOut",
- type=argparse.FileType("w"),
- help="Upstream Fasta output",
- default="test-data/upOut.fa",
- )
- parser.add_argument(
- "-downOut",
- type=argparse.FileType("w"),
- help="Downstream Fasta output",
- default="test-data/downOut.fa",
- )
- parser.add_argument(
- "--genesOnly",
- action="store_true",
- help="Search and return only Gene type features",
- )
- parser.add_argument(
- "--cdsOnly",
- action="store_true",
- help="Search and return only CDS type features",
- )
- parser.add_argument(
- "--forceSeqID",
- action="store_true",
- help="Search and return only CDS type features",
- )
- parser.add_argument(
- "--outProt", action="store_true", help="Output the translated sequence"
- )
- parser.add_argument(
- "--forward",
- type=int,
- default=1,
- help="Number of features upstream from the hit to return",
- )
- parser.add_argument(
- "--behind",
- type=int,
- default=1,
- help="Number of features downstream from the hit to return",
- )
- args = parser.parse_args()
- extract_features(**vars(args))
diff -r 1311f97dccfa -r e29c36ee61e0 cpt_gbk_adjacent/adjacent_features.xml
--- a/cpt_gbk_adjacent/adjacent_features.xml Fri Jun 17 12:43:45 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-
-
- Searches a Genbank file for a given FASTA sequence, then outputs a file with adjacent upstream features, and another with adjacent downstream features.
-
- macros.xml
- cpt-macros.xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
diff -r 1311f97dccfa -r e29c36ee61e0 cpt_gbk_adjacent/cpt-macros.xml
--- a/cpt_gbk_adjacent/cpt-macros.xml Fri Jun 17 12:43:45 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-
-
-
-
- python
- biopython
- requests
-
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
diff -r 1311f97dccfa -r e29c36ee61e0 cpt_gbk_adjacent/macros.xml
--- a/cpt_gbk_adjacent/macros.xml Fri Jun 17 12:43:45 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-
-
-
-
- python
- biopython
- cpt_gffparser
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1311f97dccfa -r e29c36ee61e0 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:42:35 2023 +0000
@@ -0,0 +1,74 @@
+
+
+
+ progressivemauve
+
+ bcbiogff
+
+
+
+ 2.4.0
+
+ 10.1371/journal.pone.0011147
+
+
+ 10.1093/bioinformatics/btm039
+
+
+ '$xmfa'
+
+
+
+
+
+ '$sequences'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '$gff3_data'
+
+
+ #if str($reference_genome.reference_genome_source) == 'cached':
+ '${reference_genome.fasta_indexes.fields.path}'
+ #else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+ #end if
+
+
+ #if $reference_genome.reference_genome_source == 'history':
+ ln -s '$reference_genome.genome_fasta' genomeref.fa;
+ #end if
+
+
+ #if str($reference_genome.reference_genome_source) == 'cached':
+ '${reference_genome.fasta_indexes.fields.path}'
+ #else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+ #end if
+
+