changeset 1:bb6332a85aa6 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:43:04 +0000
parents a68f32350196
children a921d6148d88
files cpt-macros.xml cpt_gbkToGff3.xml cpt_gbk_to_gff/cpt-macros.xml cpt_gbk_to_gff/cpt_gbkToGff3.xml cpt_gbk_to_gff/gbk_to_gff3.py cpt_gbk_to_gff/macros.xml gbk_to_gff3.py macros.xml
diffstat 8 files changed, 712 insertions(+), 543 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml	Mon Jun 05 02:43:04 2023 +0000
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+		<![CDATA[
+			cd '$__tool_directory__' && git rev-parse HEAD
+		]]>
+		</version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+        <yield/>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gbkToGff3.xml	Mon Jun 05 02:43:04 2023 +0000
@@ -0,0 +1,46 @@
+<tool id="edu.tamu.cpt.gff3.customGbkToGff" name="(CPT) Genbank to GFF3: " version="20.1.0.0">
+  <description> CPT made Biobython-based solution</description>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+'$__tool_directory__/gbk_to_gff3.py'
+'$gbkIn'
+'$makeMRNA'
+'$makeGene'
+--identifier "$qualID"
+--fastaFile '$fastaOut'
+> '$default']]></command>
+  <inputs>
+    <param label="GenBank file" name="gbkIn" type="data" format="genbank"/>
+    <param checked="true" label="Automatically generate any missing Gene features if CDS/RBS has none" name="makeGene" type="boolean" truevalue="--makeGene" falsevalue=""/>
+    <param checked="true" label="Automatically generate missing mRNA features for genes" name="makeMRNA" type="boolean" truevalue="--makeMRNA" falsevalue=""/>
+    <param label="Qualifier to derive GFF ID from" name="qualID" type="text" value="locus_tag"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" hidden="false" name="default"/>
+    <data format="fasta" hidden="false" name="fastaOut"/>
+  </outputs>
+  <tests>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+A Biopython-based script to convert Genbank files to GFF3. Should resolve frame shift errors and other problems caused by the old Bioperl  solution. 
+
+Will also attempt to automatically parent RBS, CDS, and Exon features without a locus tag to an appropriate gene feature.
+]]></help>
+  <citations>
+    <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+    <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+  </citations>
+</tool>
--- a/cpt_gbk_to_gff/cpt-macros.xml	Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="gff_requirements">
-		<requirements>
-			<requirement type="package" version="2.7">python</requirement>
-			<requirement type="package" version="1.65">biopython</requirement>
-			<requirement type="package" version="2.12.1">requests</requirement>
-			<yield/>
-		</requirements>
-		<version_command>
-		<![CDATA[
-			cd $__tool_directory__ && git rev-parse HEAD
-		]]>
-		</version_command>
-	</xml>
-	<xml name="citation/mijalisrasche">
-		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-		<citation type="bibtex">@unpublished{galaxyTools,
-		author = {E. Mijalis, H. Rasche},
-		title = {CPT Galaxy Tools},
-		year = {2013-2017},
-		note = {https://github.com/tamu-cpt/galaxy-tools/}
-		}
-		</citation>
-	</xml>
-	<xml name="citations">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation> 
-		<yield/>
-		</citations>
-	</xml>
-    	<xml name="citations-crr">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Ross},
-				title = {CPT Galaxy Tools},
-				year = {2020-},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020-AJC-solo">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-clm">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="sl-citations-clm">
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-	</xml>
-</macros>
--- a/cpt_gbk_to_gff/cpt_gbkToGff3.xml	Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.gff3.customGbkToGff" name="(CPT) Genbank to GFF3: " version="20.1.0.0">
-  <description> CPT made Biobython-based solution</description>
-  <macros>
-    <import>macros.xml</import>
-		<import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command detect_errors="aggressive"><![CDATA[
-$__tool_directory__/gbk_to_gff3.py
-$gbkIn
-$makeMRNA
-$makeGene
---identifier "$qualID"
---fastaFile $fastaOut
-> $default]]></command>
-  <inputs>
-    <param label="GenBank file" name="gbkIn" type="data" format="genbank"/>
-    <param checked="true" label="Automatically generate any missing Gene features if CDS/RBS has none" name="makeGene"
-        type="boolean" truevalue="--makeGene" falsevalue=""/>
-    <param checked="true" label="Automatically generate missing mRNA features for genes" name="makeMRNA"
-        type="boolean" truevalue="--makeMRNA" falsevalue=""/>
-    <param label="Qualifier to derive GFF ID from" name="qualID" type="text" value="locus_tag"/>
-  </inputs>
-  <outputs>
-    <data format="gff3" hidden="false" name="default"/>
-    <data format="fasta" hidden="false" name="fastaOut"/>
-  </outputs>
-  <tests>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-A Biopython-based script to convert Genbank files to GFF3. Should resolve frame shift errors and other problems caused by the old Bioperl  solution. 
-
-Will also attempt to automatically parent RBS, CDS, and Exon features without a locus tag to an appropriate gene feature.
-]]></help>
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-		</citations>
-</tool>
--- a/cpt_gbk_to_gff/gbk_to_gff3.py	Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,274 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import sys
-
-from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
-from Bio.SeqFeature import FeatureLocation
-from CPT_GFFParser import gffSeqFeature, gffWrite
-
-bottomFeatTypes = ["exon", "RBS", "CDS"]
-
-def makeGffFeat(inFeat, num, recName, identifier):
-    if inFeat.type == "RBS" or (inFeat.type == "regulatory" and "regulatory_class" in inFeat.qualifiers.keys() and inFeat.qualifiers["regulatory_class"][0] == "ribosome_binding_site"):
-      inFeat.type = "Shine_Dalgarno_sequence"
-    if "codon_start" in inFeat.qualifiers.keys():
-      shift = int(inFeat.qualifiers["codon_start"][0]) - 1
-    else:
-      shift = "."
-    if identifier in inFeat.qualifiers.keys():
-      name = inFeat.qualifiers[identifier][0] + "." + inFeat.type 
-      if num > 0:
-        name += "." + str(num)
-    else:
-      name = recName + "." + inFeat.type + "." + str(num)
-    
-    outFeat = gffSeqFeature(inFeat.location, inFeat.type, '', inFeat.strand, name, inFeat.qualifiers, None, None, None, shift, 0, "GbkToGff")
-    outFeat.qualifiers["ID"] = [name]  
-    return outFeat
-
-def main(inFile, makeMRNA, makeGene, identifier, fastaFile, outFile):
-
-    ofh = sys.stdout
-    if outFile:
-        ofh = outFile
-
-    outRec = []
-    failed = 0
-    for rec in SeqIO.parse(inFile, "genbank"):
-        recID = rec.name
-
-        if len(str(rec.seq)) > 0:
-            seqs_pending_writes = True
-            outSeq = str(rec.seq)
-            seqLen = len(outSeq)
-
-        locBucket = {}
-        outFeats = []
-        topTypeDict = {}
-        seekingParent = []
-        geneNum = 0
-        autoGeneNum = 0
-        for feat in rec.features:
-            if identifier not in feat.qualifiers.keys(): #Allow metadata features and other features with no ID (Output warning?) - AJC
-              if feat.type in bottomFeatTypes:
-                seekingParent.append([feat, [], []]) # [Feature, all parent candidates, strongest parent candidates]
-                continue
-              elif feat.type not in topTypeDict.keys():
-                topTypeDict[feat.type] = 1
-              else:
-                topTypeDict[feat.type] += 1
-              outFeats.append(makeGffFeat(feat, topTypeDict[feat.type], recID, identifier))
-              continue
-            elif feat.qualifiers[identifier][0] not in locBucket.keys():
-              locBucket[feat.qualifiers[identifier][0]] = []
-            locBucket[feat.qualifiers[identifier][0]].append(feat)
-
-        for locus in locBucket.keys():
-          minLoc = locBucket[locus][0].location.start
-          maxLoc = locBucket[locus][0].location.end
-          for feat in locBucket[locus]:
-            minLoc = min(minLoc, feat.location.start)
-            maxLoc = max(maxLoc, feat.location.end)
-          for x in seekingParent:
-            if x[0].location.start >= minLoc and x[0].location.end <= maxLoc:
-              x[1].append(locus)
-            if x[0].location.start == minLoc or x[0].location.end == maxLoc:
-              x[2].append(locus)
-
-        for x in seekingParent: #Reformat to [Feature, Locus, Unused/Free]
-          if len(x[2]) == 1:
-            finList = ""
-            if len(x[1]) > 1:
-              for loc in x[1]:
-                if loc != x[2][0]:
-                  finList += loc + ", "
-              finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived. Other, weaker candidate(s) were " + finList[0:-2] + "."
-            else:
-              finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived."
-            if "Notes" not in x[0].qualifiers.keys():
-              x[0].qualifiers["Notes"] = []
-            x[0].qualifiers["Notes"].append(finList)
-            x[1] = x[2][0]
-          elif len(x[2]) > 1:
-            candidate = x[2][0] #Arbitrarily choose first one
-            finList = ""
-            strongList = ""
-            for loc in x[2]:
-              if loc != candidate:
-                finList += loc + ", "
-                strongList += loc + ", "
-            for loc in x[1]:
-              if loc not in x[2]:
-                finList += loc + ", "
-            finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived. Other candidate(s) were " + finList[0:-2] + " (Equally strong candidate(s): " + strongList[0:-2] + ")."
-            if "Notes" not in x[0].qualifiers.keys():
-              x[0].qualifiers["Notes"] = []
-            x[0].qualifiers["Notes"].append(finList)
-            x[1] = candidate
-          elif len(x[1]) == 1:
-            x[1] = x[1][0]
-            if "Notes" not in x[0].qualifiers.keys():
-              x[0].qualifiers["Notes"] = []
-            finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived."
-            x[0].qualifiers["Notes"].append(finList)
-          elif len(x[1]) > 1:
-            candidate = x[1][0] #Arbitrarily choose first one
-            finList = ""
-            for loc in x[1]:
-              if loc != candidate:
-                finList += loc + ", "
-            finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived. Other candidates were " + finList[0:-2] + "."
-            if "Notes" not in x[0].qualifiers.keys():
-              x[0].qualifiers["Notes"] = []
-            x[0].qualifiers["Notes"].append(finList)
-            x[1] = candidate
-          else:
-            if makeGene:
-              sys.stderr.write("Warning: Unable to find potential parent for feature with no " + identifier + " of type " + str(x[0].type) + " at location [" + str(x[0].location.start + 1) + ", " + str(x[0].location.end) + "], creating standalone gene.\n")
-              autoGeneNum += 1
-              x[0].source = "GbkToGff"
-              x[0].score = 0
-              x[0].shift = 0
-              if "ID" not in x[0].qualifiers.keys():
-                x[0].qualifiers["ID"] = [recID + ".standalone_" + x[0].type + "." + str(autoGeneNum)]
-              tempName = recID + ".derived_Gene." + str(autoGeneNum)
-              tempQuals = {"ID" : [tempName], "Notes" : ["Gene feature automatically generated by Gbk to GFF conversion"]}
-              tempGene = gffSeqFeature(FeatureLocation(x[0].location.start, x[0].location.end, x[0].location.strand), 'gene', '', x[0].strand, tempName, tempQuals, None, None, None, ".", 0, "GbkToGff")
-              if makeMRNA:
-                tempName = recID + ".derived_mRNA." + str(autoGeneNum)
-                tempQuals = {"ID" : [tempName], "Notes" : ["mRNA feature automatically generated by Gbk to GFF conversion"]}
-                tempGene.sub_features.append(gffSeqFeature(FeatureLocation(x[0].location.start, x[0].location.end, x[0].location.strand), 'mRNA', '', x[0].strand, tempName, tempQuals, None, None, None, ".", 0, "GbkToGff"))
-                tempGene.sub_features[-1].sub_features.append(x[0])
-              else:
-                tempGene.sub_features.append(x[0])
-                
-              
-              outFeats.append(tempGene)
-            else:
-              sys.stderr.write("Warning: Unable to find potential parent for feature with no " + identifier + " of type " + str(x[0].type) + " at location [" + str(x[0].location.start + 1) + ", " + str(x[0].location.end) + "].\n")
-              if x[0].type not in topTypeDict.keys():
-                topTypeDict[x[0].type] = 1
-              else:
-                topTypeDict[x[0].type] += 1
-              outFeats.append(makeGffFeat(x[0], topTypeDict[x[0].type], recID, identifier))
-            
-        for locus in locBucket.keys():
-          if len(locBucket[locus]) == 1: # No heirarchy to be made
-            outFeats.append(makeGffFeat(locBucket[locus][0], 0, recID, identifier))
-            continue
-          topFeat = None
-          midFeat = None
-          bottomFeats = []
-          typeDict = {}
-          minLoc = locBucket[locus][0].location.start
-          maxLoc = locBucket[locus][0].location.end
-          geneNum += 1
-          for feat in locBucket[locus]:
-            # If we want to make our own top-level feat?
-            minLoc = min(minLoc, feat.location.start)
-            maxLoc = max(maxLoc, feat.location.end)
-            
-            # Gene->mRNA->CDS included as example, to add other feature-heirarchys in the appropriate slot
-            if feat.type in ['gene']:
-              if not topFeat:
-                topFeat = feat
-              # Else handle multiple top features
-            elif feat.type in ['mRNA', 'tRNA', 'rRNA']:
-              if not midFeat:
-                midFeat = feat
-              # Else handle multiple mid feats (May need another elif type-in-list statement if we actually expect a list of mid feats)
-            else:
-              if feat.type not in typeDict.keys():
-                typeDict[feat.type] = 1
-              else:
-                typeDict[feat.type] += 1 
-              bottomFeats.append(feat)
-          
-          for x in seekingParent:
-            if type(x[1]) != "list" and locus == x[1]:
-              x[0].qualifiers[identifier] = [locus]
-              bottomFeats.append(x[0])
-              if x[0].type not in typeDict.keys():
-                typeDict[x[0].type] = 1
-              else:
-                typeDict[x[0].type] += 1 
-              
-          
-            
-              
-            
-          #if not topFeat: # Make our own top-level feature based off minLoc, maxLoc bounds
-            
-          for x in typeDict.keys(): # If only 1, set it to 0 so we don't append a number to the name
-            if typeDict[x] == 1:    # Else, set to 1 so that we count up as we encounter the features
-              typeDict[x] = 0
-            else:
-              typeDict[x] = 1
-          
-          if not topFeat:
-            if makeGene:
-              if midFeat:
-                possibleStrand = midFeat.strand
-              else:
-                possibleStrand = bottomFeats[0].strand
-              tempName = recID + ".gene." + str(geneNum)
-              tempQuals = {identifier : [locus], "ID" : [tempName], "Notes" : ["Gene feature automatically generated by Gbk to GFF conversion"]}
-              topFeat = gffSeqFeature(FeatureLocation(minLoc, maxLoc, possibleStrand), 'gene', '', possibleStrand, tempName, tempQuals, None, None, None, ".", 0, "GbkToGff")
-            else:
-              sys.stderr.write("Unable to create a feature heirarchy at location [%d, %d] with features: \n" % (minLoc, maxLoc))
-              for x in locBucket[locus]:
-                sys.stderr.write(str(x))
-                sys.stderr.write('\n')
-                failed = 1
-              continue
-
-          outFeats.append(makeGffFeat(topFeat, 0, recID, identifier))
-          if not midFeat and topFeat.type == "gene" and makeMRNA:
-              if identifier in topFeat.qualifiers.keys():
-                tempName = topFeat.qualifiers[identifier][0] + ".mRNA"
-                tempQuals = {identifier : topFeat.qualifiers[identifier], "ID" : [tempName], "Notes" : ["mRNA feature automatically generated by Gbk to GFF conversion"]}
-              else:
-                tempName = outFeats[-1].ID + ".mRNA"
-                tempQuals = {identifier : topFeat.qualifiers[identifier], "ID" : [tempName], "Notes" : ["mRNA feature automatically generated by Gbk to GFF conversion"]}
-              midFeat = gffSeqFeature(FeatureLocation(minLoc, maxLoc, topFeat.strand), 'mRNA', '', topFeat.strand, tempName, tempQuals, None, None, None, ".", 0, "GbkToGff")
-              
-          if midFeat: # Again, need a new if statement if we want to handle multiple mid-tier features
-              outFeats[-1].sub_features.append(makeGffFeat(midFeat, 0, recID, identifier))
-              outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]
-              for x in bottomFeats:
-                typeDict[x.type] += 1
-                outFeats[-1].sub_features[-1].sub_features.append(makeGffFeat(x, typeDict[x.type], recID, identifier))
-                outFeats[-1].sub_features[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].sub_features[-1].id]
-          else: # No midFeat, append bottom feats directly to top feats 
-              for x in bottomFeats:
-                typeDict[x.type] += 1
-                outFeats[-1].sub_features.append(makeGffFeat(x, typeDict[x.type], recID, identifier))
-                outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]
-                
-        outRec.append(SeqRecord(rec.seq, recID, rec.name, rec.description, rec.dbxrefs, sorted(outFeats, key=lambda x: x.location.start), rec.annotations, rec.letter_annotations))
-        SeqIO.write([outRec[-1]], fastaFile, "fasta")
-    gffWrite(outRec, ofh)    
-    exit(failed) # 0 if all features handled, 1 if unable to handle some
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser( description='Biopython solution to Gbk to GFF conversion')
-
-    parser.add_argument('inFile', type=argparse.FileType("r"), help='Path to an input GBK file' )
-    parser.add_argument('--makeMRNA', action="store_true", required=False, help="Automatically create mRNA features")
-    parser.add_argument('--makeGene', action="store_true", required=False, help="Automatically create missing Gene features")
-    parser.add_argument('--identifier', type=str, default="locus_tag", required=False, help="Qualifier to derive ID property from")
-    parser.add_argument('--fastaFile', type=argparse.FileType("w"),  help='Fasta output for sequences' )
-    parser.add_argument('--outFile', type=argparse.FileType("w"),  help='GFF feature output' )
-    args = parser.parse_args()
-    main(**vars(args))
-
-
-
-
-
-
-
-
--- a/cpt_gbk_to_gff/macros.xml	Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-  <xml name="requirements">
-    <requirements>
-	<requirement type="package" version="3.8.13">python</requirement>
-	<requirement type="package" version="1.79">biopython</requirement>
-	<requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
-	<yield/>
-    </requirements>
-  </xml>
-  <xml name="ldap_ref"
-    token_name="dn_ref"
-    token_label="Pick a DN"
-    token_fromfile="ldap_people.loc">
-        <repeat name="repeat_@NAME@" title="@LABEL@">
-          <param name="@NAME@" label="Select a @LABEL@" type="select">
-            <options from_file="@FROMFILE@">
-                <column name="name" index="0"/>
-                <column name="value" index="1"/>
-            </options>
-          </param>
-        </repeat>
-    </xml>
-  <xml name="ldap_ref_single"
-    token_name="dn_ref"
-    token_label="Pick a DN"
-    token_fromfile="ldap_people.loc">
-          <param name="@NAME@" label="Select a @LABEL@" type="select">
-            <options from_file="@FROMFILE@">
-                <column name="name" index="0"/>
-                <column name="value" index="1"/>
-            </options>
-          </param>
-    </xml>
-	<xml name="gbk_feature_type"
-		token_label="Feature type to remove"
-		token_multiple="True"
-		token_optional="False"
-		token_name="positional_2">
-    <param label="@LABEL@" optional="@TOKEN_OPTIONAL" multiple="@MULTIPLE@" name="feature_type" type="select">
-      <option value="-10_signal">-10_signal</option>
-      <option value="-35_signal">-35_signal</option>
-      <option value="3'UTR">3'UTR</option>
-      <option value="5'UTR">5'UTR</option>
-      <option value="CAAT_signal">CAAT_signal</option>
-      <option selected="true" value="CDS">CDS</option>
-      <option value="C_region">C_region</option>
-      <option value="D-loop">D-loop</option>
-      <option value="D_segment">D_segment</option>
-      <option value="GC_signal">GC_signal</option>
-      <option value="J_segment">J_segment</option>
-      <option value="LTR">LTR</option>
-      <option value="N_region">N_region</option>
-      <option value="RBS">RBS</option>
-      <option value="STS">STS</option>
-      <option value="S_region">S_region</option>
-      <option value="TATA_signal">TATA_signal</option>
-      <option value="V_region">V_region</option>
-      <option value="V_segment">V_segment</option>
-      <option value="all">all</option>
-      <option value="assembly_gap">assembly_gap</option>
-      <option value="attenuator">attenuator</option>
-      <option value="enhancer">enhancer</option>
-      <option value="exon">exon</option>
-      <option value="gap">gap</option>
-      <option value="gene">gene</option>
-      <option value="iDNA">iDNA</option>
-      <option value="intron">intron</option>
-      <option value="mRNA">mRNA</option>
-      <option value="mat_peptide">mat_peptide</option>
-      <option value="misc_RNA">misc_RNA</option>
-      <option value="misc_binding">misc_binding</option>
-      <option value="misc_difference">misc_difference</option>
-      <option value="misc_feature">misc_feature</option>
-      <option value="misc_recomb">misc_recomb</option>
-      <option value="misc_signal">misc_signal</option>
-      <option value="misc_structure">misc_structure</option>
-      <option value="mobile_element">mobile_element</option>
-      <option value="modified_base">modified_base</option>
-      <option value="ncRNA">ncRNA</option>
-      <option value="old_sequence">old_sequence</option>
-      <option value="operon">operon</option>
-      <option value="oriT">oriT</option>
-      <option value="polyA_signal">polyA_signal</option>
-      <option value="polyA_site">polyA_site</option>
-      <option value="precursor_RNA">precursor_RNA</option>
-      <option value="prim_transcript">prim_transcript</option>
-      <option value="primer_bind">primer_bind</option>
-      <option value="promoter">promoter</option>
-      <option value="protein_bind">protein_bind</option>
-      <option value="rRNA">rRNA</option>
-      <option value="rep_origin">rep_origin</option>
-      <option value="repeat_region">repeat_region</option>
-      <option value="sig_peptide">sig_peptide</option>
-      <option value="source">source</option>
-      <option value="stem_loop">stem_loop</option>
-      <option value="tRNA">tRNA</option>
-      <option value="terminator">terminator</option>
-      <option value="tmRNA">tmRNA</option>
-      <option value="transit_peptide">transit_peptide</option>
-      <option value="unsure">unsure</option>
-      <option value="variation">variation</option>
-    </param>
-	</xml>
-</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk_to_gff3.py	Mon Jun 05 02:43:04 2023 +0000
@@ -0,0 +1,477 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import FeatureLocation
+from CPT_GFFParser import gffSeqFeature, gffWrite
+
+bottomFeatTypes = ["exon", "RBS", "CDS"]
+
+
+def makeGffFeat(inFeat, num, recName, identifier):
+    if inFeat.type == "RBS" or (
+        inFeat.type == "regulatory"
+        and "regulatory_class" in inFeat.qualifiers.keys()
+        and inFeat.qualifiers["regulatory_class"][0] == "ribosome_binding_site"
+    ):
+        inFeat.type = "Shine_Dalgarno_sequence"
+    if "codon_start" in inFeat.qualifiers.keys():
+        shift = int(inFeat.qualifiers["codon_start"][0]) - 1
+    else:
+        shift = "."
+    if identifier in inFeat.qualifiers.keys():
+        name = inFeat.qualifiers[identifier][0] + "." + inFeat.type
+        if num > 0:
+            name += "." + str(num)
+    else:
+        name = recName + "." + inFeat.type + "." + str(num)
+
+    outFeat = gffSeqFeature(
+        inFeat.location,
+        inFeat.type,
+        "",
+        inFeat.strand,
+        name,
+        inFeat.qualifiers,
+        None,
+        None,
+        None,
+        shift,
+        0,
+        "GbkToGff",
+    )
+    outFeat.qualifiers["ID"] = [name]
+    return outFeat
+
+
+def main(inFile, makeMRNA, makeGene, identifier, fastaFile, outFile):
+
+    ofh = sys.stdout
+    if outFile:
+        ofh = outFile
+
+    outRec = []
+    failed = 0
+    for rec in SeqIO.parse(inFile, "genbank"):
+        recID = rec.name
+
+        if len(str(rec.seq)) > 0:
+            seqs_pending_writes = True
+            outSeq = str(rec.seq)
+            seqLen = len(outSeq)
+
+        locBucket = {}
+        outFeats = []
+        topTypeDict = {}
+        seekingParent = []
+        geneNum = 0
+        autoGeneNum = 0
+        for feat in rec.features:
+            if (
+                identifier not in feat.qualifiers.keys()
+            ):  # Allow metadata features and other features with no ID (Output warning?) - AJC
+                if feat.type in bottomFeatTypes:
+                    seekingParent.append(
+                        [feat, [], []]
+                    )  # [Feature, all parent candidates, strongest parent candidates]
+                    continue
+                elif feat.type not in topTypeDict.keys():
+                    topTypeDict[feat.type] = 1
+                else:
+                    topTypeDict[feat.type] += 1
+                outFeats.append(
+                    makeGffFeat(feat, topTypeDict[feat.type], recID, identifier)
+                )
+                continue
+            elif feat.qualifiers[identifier][0] not in locBucket.keys():
+                locBucket[feat.qualifiers[identifier][0]] = []
+            locBucket[feat.qualifiers[identifier][0]].append(feat)
+
+        for locus in locBucket.keys():
+            minLoc = locBucket[locus][0].location.start
+            maxLoc = locBucket[locus][0].location.end
+            for feat in locBucket[locus]:
+                minLoc = min(minLoc, feat.location.start)
+                maxLoc = max(maxLoc, feat.location.end)
+            for x in seekingParent:
+                if x[0].location.start >= minLoc and x[0].location.end <= maxLoc:
+                    x[1].append(locus)
+                if x[0].location.start == minLoc or x[0].location.end == maxLoc:
+                    x[2].append(locus)
+
+        for x in seekingParent:  # Reformat to [Feature, Locus, Unused/Free]
+            if len(x[2]) == 1:
+                finList = ""
+                if len(x[1]) > 1:
+                    for loc in x[1]:
+                        if loc != x[2][0]:
+                            finList += loc + ", "
+                    finList = (
+                        str(x[0].type)
+                        + " had no locus tag set in .gbk file, automatically derived. Other, weaker candidate(s) were "
+                        + finList[0:-2]
+                        + "."
+                    )
+                else:
+                    finList = (
+                        str(x[0].type)
+                        + " had no locus tag set in .gbk file, automatically derived."
+                    )
+                if "Notes" not in x[0].qualifiers.keys():
+                    x[0].qualifiers["Notes"] = []
+                x[0].qualifiers["Notes"].append(finList)
+                x[1] = x[2][0]
+            elif len(x[2]) > 1:
+                candidate = x[2][0]  # Arbitrarily choose first one
+                finList = ""
+                strongList = ""
+                for loc in x[2]:
+                    if loc != candidate:
+                        finList += loc + ", "
+                        strongList += loc + ", "
+                for loc in x[1]:
+                    if loc not in x[2]:
+                        finList += loc + ", "
+                finList = (
+                    str(x[0].type)
+                    + " had no locus tag set in .gbk file, automatically derived. Other candidate(s) were "
+                    + finList[0:-2]
+                    + " (Equally strong candidate(s): "
+                    + strongList[0:-2]
+                    + ")."
+                )
+                if "Notes" not in x[0].qualifiers.keys():
+                    x[0].qualifiers["Notes"] = []
+                x[0].qualifiers["Notes"].append(finList)
+                x[1] = candidate
+            elif len(x[1]) == 1:
+                x[1] = x[1][0]
+                if "Notes" not in x[0].qualifiers.keys():
+                    x[0].qualifiers["Notes"] = []
+                finList = (
+                    str(x[0].type)
+                    + " had no locus tag set in .gbk file, automatically derived."
+                )
+                x[0].qualifiers["Notes"].append(finList)
+            elif len(x[1]) > 1:
+                candidate = x[1][0]  # Arbitrarily choose first one
+                finList = ""
+                for loc in x[1]:
+                    if loc != candidate:
+                        finList += loc + ", "
+                finList = (
+                    str(x[0].type)
+                    + " had no locus tag set in .gbk file, automatically derived. Other candidates were "
+                    + finList[0:-2]
+                    + "."
+                )
+                if "Notes" not in x[0].qualifiers.keys():
+                    x[0].qualifiers["Notes"] = []
+                x[0].qualifiers["Notes"].append(finList)
+                x[1] = candidate
+            else:
+                if makeGene:
+                    sys.stderr.write(
+                        "Warning: Unable to find potential parent for feature with no "
+                        + identifier
+                        + " of type "
+                        + str(x[0].type)
+                        + " at location ["
+                        + str(x[0].location.start + 1)
+                        + ", "
+                        + str(x[0].location.end)
+                        + "], creating standalone gene.\n"
+                    )
+                    autoGeneNum += 1
+                    x[0].source = "GbkToGff"
+                    x[0].score = 0
+                    x[0].shift = 0
+                    if "ID" not in x[0].qualifiers.keys():
+                        x[0].qualifiers["ID"] = [
+                            recID + ".standalone_" + x[0].type + "." + str(autoGeneNum)
+                        ]
+                    tempName = recID + ".derived_Gene." + str(autoGeneNum)
+                    tempQuals = {
+                        "ID": [tempName],
+                        "Notes": [
+                            "Gene feature automatically generated by Gbk to GFF conversion"
+                        ],
+                    }
+                    tempGene = gffSeqFeature(
+                        FeatureLocation(
+                            x[0].location.start, x[0].location.end, x[0].location.strand
+                        ),
+                        "gene",
+                        "",
+                        x[0].strand,
+                        tempName,
+                        tempQuals,
+                        None,
+                        None,
+                        None,
+                        ".",
+                        0,
+                        "GbkToGff",
+                    )
+                    if makeMRNA:
+                        tempName = recID + ".derived_mRNA." + str(autoGeneNum)
+                        tempQuals = {
+                            "ID": [tempName],
+                            "Notes": [
+                                "mRNA feature automatically generated by Gbk to GFF conversion"
+                            ],
+                        }
+                        tempGene.sub_features.append(
+                            gffSeqFeature(
+                                FeatureLocation(
+                                    x[0].location.start,
+                                    x[0].location.end,
+                                    x[0].location.strand,
+                                ),
+                                "mRNA",
+                                "",
+                                x[0].strand,
+                                tempName,
+                                tempQuals,
+                                None,
+                                None,
+                                None,
+                                ".",
+                                0,
+                                "GbkToGff",
+                            )
+                        )
+                        tempGene.sub_features[-1].sub_features.append(x[0])
+                    else:
+                        tempGene.sub_features.append(x[0])
+
+                    outFeats.append(tempGene)
+                else:
+                    sys.stderr.write(
+                        "Warning: Unable to find potential parent for feature with no "
+                        + identifier
+                        + " of type "
+                        + str(x[0].type)
+                        + " at location ["
+                        + str(x[0].location.start + 1)
+                        + ", "
+                        + str(x[0].location.end)
+                        + "].\n"
+                    )
+                    if x[0].type not in topTypeDict.keys():
+                        topTypeDict[x[0].type] = 1
+                    else:
+                        topTypeDict[x[0].type] += 1
+                    outFeats.append(
+                        makeGffFeat(x[0], topTypeDict[x[0].type], recID, identifier)
+                    )
+
+        for locus in locBucket.keys():
+            if len(locBucket[locus]) == 1:  # No heirarchy to be made
+                outFeats.append(makeGffFeat(locBucket[locus][0], 0, recID, identifier))
+                continue
+            topFeat = None
+            midFeat = None
+            bottomFeats = []
+            typeDict = {}
+            minLoc = locBucket[locus][0].location.start
+            maxLoc = locBucket[locus][0].location.end
+            geneNum += 1
+            for feat in locBucket[locus]:
+                # If we want to make our own top-level feat?
+                minLoc = min(minLoc, feat.location.start)
+                maxLoc = max(maxLoc, feat.location.end)
+
+                # Gene->mRNA->CDS included as example, to add other feature-heirarchys in the appropriate slot
+                if feat.type in ["gene"]:
+                    if not topFeat:
+                        topFeat = feat
+                    # Else handle multiple top features
+                elif feat.type in ["mRNA", "tRNA", "rRNA"]:
+                    if not midFeat:
+                        midFeat = feat
+                    # Else handle multiple mid feats (May need another elif type-in-list statement if we actually expect a list of mid feats)
+                else:
+                    if feat.type not in typeDict.keys():
+                        typeDict[feat.type] = 1
+                    else:
+                        typeDict[feat.type] += 1
+                    bottomFeats.append(feat)
+
+            for x in seekingParent:
+                if type(x[1]) != "list" and locus == x[1]:
+                    x[0].qualifiers[identifier] = [locus]
+                    bottomFeats.append(x[0])
+                    if x[0].type not in typeDict.keys():
+                        typeDict[x[0].type] = 1
+                    else:
+                        typeDict[x[0].type] += 1
+
+            # if not topFeat: # Make our own top-level feature based off minLoc, maxLoc bounds
+
+            for (
+                x
+            ) in (
+                typeDict.keys()
+            ):  # If only 1, set it to 0 so we don't append a number to the name
+                if (
+                    typeDict[x] == 1
+                ):  # Else, set to 1 so that we count up as we encounter the features
+                    typeDict[x] = 0
+                else:
+                    typeDict[x] = 1
+
+            if not topFeat:
+                if makeGene:
+                    if midFeat:
+                        possibleStrand = midFeat.strand
+                    else:
+                        possibleStrand = bottomFeats[0].strand
+                    tempName = recID + ".gene." + str(geneNum)
+                    tempQuals = {
+                        identifier: [locus],
+                        "ID": [tempName],
+                        "Notes": [
+                            "Gene feature automatically generated by Gbk to GFF conversion"
+                        ],
+                    }
+                    topFeat = gffSeqFeature(
+                        FeatureLocation(minLoc, maxLoc, possibleStrand),
+                        "gene",
+                        "",
+                        possibleStrand,
+                        tempName,
+                        tempQuals,
+                        None,
+                        None,
+                        None,
+                        ".",
+                        0,
+                        "GbkToGff",
+                    )
+                else:
+                    sys.stderr.write(
+                        "Unable to create a feature heirarchy at location [%d, %d] with features: \n"
+                        % (minLoc, maxLoc)
+                    )
+                    for x in locBucket[locus]:
+                        sys.stderr.write(str(x))
+                        sys.stderr.write("\n")
+                        failed = 1
+                    continue
+
+            outFeats.append(makeGffFeat(topFeat, 0, recID, identifier))
+            if not midFeat and topFeat.type == "gene" and makeMRNA:
+                if identifier in topFeat.qualifiers.keys():
+                    tempName = topFeat.qualifiers[identifier][0] + ".mRNA"
+                    tempQuals = {
+                        identifier: topFeat.qualifiers[identifier],
+                        "ID": [tempName],
+                        "Notes": [
+                            "mRNA feature automatically generated by Gbk to GFF conversion"
+                        ],
+                    }
+                else:
+                    tempName = outFeats[-1].ID + ".mRNA"
+                    tempQuals = {
+                        identifier: topFeat.qualifiers[identifier],
+                        "ID": [tempName],
+                        "Notes": [
+                            "mRNA feature automatically generated by Gbk to GFF conversion"
+                        ],
+                    }
+                midFeat = gffSeqFeature(
+                    FeatureLocation(minLoc, maxLoc, topFeat.strand),
+                    "mRNA",
+                    "",
+                    topFeat.strand,
+                    tempName,
+                    tempQuals,
+                    None,
+                    None,
+                    None,
+                    ".",
+                    0,
+                    "GbkToGff",
+                )
+
+            if (
+                midFeat
+            ):  # Again, need a new if statement if we want to handle multiple mid-tier features
+                outFeats[-1].sub_features.append(
+                    makeGffFeat(midFeat, 0, recID, identifier)
+                )
+                outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]
+                for x in bottomFeats:
+                    typeDict[x.type] += 1
+                    outFeats[-1].sub_features[-1].sub_features.append(
+                        makeGffFeat(x, typeDict[x.type], recID, identifier)
+                    )
+                    outFeats[-1].sub_features[-1].sub_features[-1].qualifiers[
+                        "Parent"
+                    ] = [outFeats[-1].sub_features[-1].id]
+            else:  # No midFeat, append bottom feats directly to top feats
+                for x in bottomFeats:
+                    typeDict[x.type] += 1
+                    outFeats[-1].sub_features.append(
+                        makeGffFeat(x, typeDict[x.type], recID, identifier)
+                    )
+                    outFeats[-1].sub_features[-1].qualifiers["Parent"] = [
+                        outFeats[-1].id
+                    ]
+
+        outRec.append(
+            SeqRecord(
+                rec.seq,
+                recID,
+                rec.name,
+                rec.description,
+                rec.dbxrefs,
+                sorted(outFeats, key=lambda x: x.location.start),
+                rec.annotations,
+                rec.letter_annotations,
+            )
+        )
+        SeqIO.write([outRec[-1]], fastaFile, "fasta")
+    gffWrite(outRec, ofh)
+    exit(failed)  # 0 if all features handled, 1 if unable to handle some
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Biopython solution to Gbk to GFF conversion"
+    )
+
+    parser.add_argument(
+        "inFile", type=argparse.FileType("r"), help="Path to an input GBK file"
+    )
+    parser.add_argument(
+        "--makeMRNA",
+        action="store_true",
+        required=False,
+        help="Automatically create mRNA features",
+    )
+    parser.add_argument(
+        "--makeGene",
+        action="store_true",
+        required=False,
+        help="Automatically create missing Gene features",
+    )
+    parser.add_argument(
+        "--identifier",
+        type=str,
+        default="locus_tag",
+        required=False,
+        help="Qualifier to derive ID property from",
+    )
+    parser.add_argument(
+        "--fastaFile", type=argparse.FileType("w"), help="Fasta output for sequences"
+    )
+    parser.add_argument(
+        "--outFile", type=argparse.FileType("w"), help="GFF feature output"
+    )
+    args = parser.parse_args()
+    main(**vars(args))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jun 05 02:43:04 2023 +0000
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+		'$xmfa'
+	</token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+		'$sequences'
+	</token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+	    '$gff3_data'
+	</token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+</macros>