Mercurial > repos > cpt > cpt_gbk_adjacent
changeset 0:1311f97dccfa draft
Uploaded
author | cpt |
---|---|
date | Fri, 17 Jun 2022 12:43:45 +0000 |
parents | |
children | e29c36ee61e0 |
files | cpt_gbk_adjacent/adjacent_features.py cpt_gbk_adjacent/adjacent_features.xml cpt_gbk_adjacent/cpt-macros.xml cpt_gbk_adjacent/macros.xml |
diffstat | 4 files changed, 775 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gbk_adjacent/adjacent_features.py Fri Jun 17 12:43:45 2022 +0000 @@ -0,0 +1,444 @@ +#!/usr/bin/env python +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.Data import CodonTable +from Bio.SeqRecord import SeqRecord +from Bio.SeqFeature import SeqFeature, FeatureLocation +from Bio.Alphabet import generic_dna, generic_protein +import argparse +import logging + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() + + +def extract_features( + genbankFiles=None, + fastaFiles=None, + upOut=None, + downOut=None, + genesOnly=False, + cdsOnly=True, + forceSeqID=False, + forward=1, + behind=1, + outProt=True, + tTable=11, + fTable=11, +): + + genList = [] + fastaList = [] + + for fileX in genbankFiles: + opener = SeqIO.parse(fileX, "genbank") + for ( + openRec + ) in ( + opener + ): # To turn the generator into objects (or else we end up with a list of generators) + genList.append(openRec) + + for fileX in fastaFiles: + opener = SeqIO.parse(fileX, "fasta") + for openRec in opener: # Technically flattens multifastas too + fastaList.append(openRec) + + for seqMatch in fastaList: + longOut = seqMatch.description + protID = seqMatch.id + if fTable != 0: + fSeq = seqMatch.seq.translate(table=fTable, cds=False) + else: + fSeq = seqMatch.seq + + for gbk in genList: + sourceOut = gbk.id + num = -1 + for feat in gbk.features: + num += 1 + + if (genesOnly and feat.type != "gene") or ( + cdsOnly and feat.type != "CDS" + ): + continue + + if "codon_start" in feat.qualifiers: + offset = 1 - int(feat.qualifiers["codon_start"][0]) + else: + offset = 0 + + + temp = gbk.seq[feat.location.start : feat.location.end] + if feat.location.strand == -1: + temp = gbk.seq[feat.location.start : feat.location.end - offset] + temp = temp.reverse_complement() + else: + temp = gbk.seq[feat.location.start + offset : feat.location.end] + + if tTable != 0: + try: + gSeq = temp.translate(table=tTable, cds=True) + except CodonTable.TranslationError as cte: + # log.info("Translation issue at %s", cte) + gSeq = temp.translate(table=tTable, cds=False) + else: + gSeq = temp + + if not ("protein_id" in feat.qualifiers): + feat.qualifiers["protein_id"] = [ + "++++++++" + ] # Junk value for genesOnly flag + + if (gSeq == fSeq) and (protID == feat.qualifiers["protein_id"][0] or forceSeqID == False): + goBack = num - 1 + goAhead = num + 1 + numBack = behind + numAhead = forward + backList = [] + aheadList = [] + + while numBack != 0 and goBack >= 0: + if (genesOnly and gbk.features[goBack].type != "gene") or ( + cdsOnly and gbk.features[goBack].type != "CDS" + ): + goBack -= 1 + continue + backList.append(gbk.features[goBack]) + numBack -= 1 + goBack -= 1 + + while numAhead != 0 and goAhead < len(gbk.features): + if (genesOnly and gbk.features[goAhead].type != "gene") or ( + cdsOnly and gbk.features[goAhead].type != "CDS" + ): + goAhead += 1 + continue + aheadList.append(gbk.features[goAhead]) + numAhead -= 1 + goAhead += 1 + + backList.reverse() + if feat.location.strand == -1: + tmpList = aheadList + aheadList = backList + backList = tmpList + + + for item in backList: + addition = "" + header = "" + if "product" in item.qualifiers: + addition = " -" + str(item.qualifiers["product"][0]) + "-" + if "protein_id" in item.qualifiers: + header = ( + ">" + + (item.qualifiers["protein_id"][0]) + + addition + + " (5' of " + + longOut + + " found within " + + sourceOut + + ")\n" + ) + else: + header = ( + ">" + + (item.qualifiers["locus_tag"][0]) + + addition + + " (5' of " + + longOut + + " found within " + + sourceOut + + ")\n" + ) + if outProt == True: + if "translation" in item.qualifiers: + upOut.write(header) + upOut.write( + str(item.qualifiers["translation"][0]) + "\n\n" + ) + else: + modS = 0 + modE = 0 + if "codon_start" in item.qualifiers: + if item.location.strand > 0: + modS = ( + int(item.qualifiers["codon_start"][0]) - 1 + ) + else: + modE = ( + int(item.qualifiers["codon_start"][0]) - 1 + ) + + seqHold = gbk.seq[ + item.location.start + + modS : item.location.end + - modE + ] + if item.location.strand == -1: + seqHold = seqHold.reverse_complement() + if cdsOnly: + try: + finalSeq = "" + if tTable != 0: + finalSeq = ( + str( + seqHold.translate( + table=tTable, cds=True + ) + ) + + "\n\n" + ) + else: + finalSeq = str(seqHold) + "\n\n" + # upOut.write(header) + # upOut.write(finalSeq) + except Exception as bdct: + log.warn( + "ERROR %s %s", + item.qualifiers["locus_tag"][0], + bdct, + ) + finalSeq = "" + if tTable != 0: + finalSeq = ( + str( + seqHold.translate( + table=tTable, cds=False + ) + ) + + "\n\n" + ) + else: + finalSeq = str(seqHold) + "\n\n" + header = ( + ">" + + (item.qualifiers["locus_tag"][0]) + + addition + + " [INCOMPLETE] (5' of " + + longOut + + " found within " + + sourceOut + + ")\n" + ) + upOut.write(header) + upOut.write(finalSeq) + else: + + if tTable != 0: + upOut.write(header) + upOut.write( + str( + seqHold.translate( + table=tTable, cds=False + ) + ) + + "\n\n" + ) + else: + upOut.write(header) + upOut.write(str(seqHold) + "\n\n") + else: + upOut.write(header) + upOut.write( + str(gbk.seq[item.location.start : item.location.end]) + + "\n\n" + ) + + for item in aheadList: + addition = "" + header = "" + if "product" in item.qualifiers: + addition = " -" + str(item.qualifiers["product"][0]) + "-" + if "protein_id" in item.qualifiers: + header = ( + ">" + + (item.qualifiers["protein_id"][0]) + + addition + + " (3' of " + + longOut + + " found within " + + sourceOut + + ")\n" + ) + else: + header = ( + ">" + + (item.qualifiers["locus_tag"][0]) + + addition + + " (3' of " + + longOut + + " found within " + + sourceOut + + ")\n" + ) + if outProt == True: + if "translation" in item.qualifiers: + downOut.write(header) + downOut.write( + str(item.qualifiers["translation"][0]) + "\n\n" + ) + else: + modS = 0 + modE = 0 + if "codon_start" in item.qualifiers: + if item.location.strand > 0: + modS = ( + int(item.qualifiers["codon_start"][0]) - 1 + ) + else: + modE = ( + int(item.qualifiers["codon_start"][0]) - 1 + ) + + seqHold = gbk.seq[ + item.location.start + + modS : item.location.end + - modE + ] + if item.location.strand == -1: + seqHold = seqHold.reverse_complement() + if cdsOnly: + try: + finalSeq = "" + if tTable != 0: + finalSeq = ( + str( + seqHold.translate( + table=tTable, cds=True + ) + ) + + "\n\n" + ) + else: + finalSeq = str(seqHold) + "\n\n" + # downOut.write(header) + # downOut.write(finalSeq) + except Exception as bdct: + log.warn( + "ERROR %s %s", + item.qualifiers["locus_tag"][0], + bdct, + ) + finalSeq = "" + if tTable != 0: + finalSeq = ( + str( + seqHold.translate( + table=tTable, cds=False + ) + ) + + "\n\n" + ) + else: + finalSeq = str(seqHold) + "\n\n" + header = ( + ">" + + (item.qualifiers["locus_tag"][0]) + + addition + + " [INCOMPLETE] (3' of " + + longOut + + " found within " + + sourceOut + + ")\n" + ) + downOut.write(header) + downOut.write(finalSeq) + else: + + if tTable != 0: + downOut.write(header) + downOut.write( + str( + seqHold.translate( + table=tTable, cds=False + ) + ) + + "\n\n" + ) + else: + downOut.write(header) + downOut.write(str(seqHold) + "\n\n") + else: + downOut.write(header) + downOut.write( + str(gbk.seq[item.location.start : item.location.end]) + + "\n\n" + ) + # print(longOut) + + return + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Export a subset of features from a Genbank file", epilog="" + ) + parser.add_argument( + "-genbankFiles", nargs="+", type=argparse.FileType("r"), help="Genbank file" + ) + parser.add_argument( + "-fastaFiles", + nargs="+", + type=argparse.FileType("r"), + help="Fasta file to match against", + ) + parser.add_argument( + "-tTable", + type=int, + default=11, + help="Translation table to use", + choices=range(0, 23), + ) + parser.add_argument( + "-fTable", + type=int, + default=11, + help="Translation table to use", + choices=range(0, 23), + ) + parser.add_argument( + "-upOut", + type=argparse.FileType("w"), + help="Upstream Fasta output", + default="test-data/upOut.fa", + ) + parser.add_argument( + "-downOut", + type=argparse.FileType("w"), + help="Downstream Fasta output", + default="test-data/downOut.fa", + ) + parser.add_argument( + "--genesOnly", + action="store_true", + help="Search and return only Gene type features", + ) + parser.add_argument( + "--cdsOnly", + action="store_true", + help="Search and return only CDS type features", + ) + parser.add_argument( + "--forceSeqID", + action="store_true", + help="Search and return only CDS type features", + ) + parser.add_argument( + "--outProt", action="store_true", help="Output the translated sequence" + ) + parser.add_argument( + "--forward", + type=int, + default=1, + help="Number of features upstream from the hit to return", + ) + parser.add_argument( + "--behind", + type=int, + default=1, + help="Number of features downstream from the hit to return", + ) + args = parser.parse_args() + extract_features(**vars(args))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gbk_adjacent/adjacent_features.xml Fri Jun 17 12:43:45 2022 +0000 @@ -0,0 +1,111 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.gbk.adjacent_features" name="Find adjacent Genbank features" version="RC4"> + <description>Searches a Genbank file for a given FASTA sequence, then outputs a file with adjacent upstream features, and another with adjacent downstream features.</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/adjacent_features.py +#set repeat_var_1 = '" "'.join([ str($var) for $var in $gbkIn ]) +#set repeat_var_2 = '" "'.join([ str($var) for $var in $fastaIn ]) +-genbankFiles "$repeat_var_1" +-fastaFiles "$repeat_var_2" +-tTable $tTable +-fTable $fTable +-upOut $upstreamOut +-downOut $downstreamOut +$translate +$forceID +$mode +--forward $ahead +--behind $back +]]></command> + <inputs> + <param label="Genbank file" name="gbkIn" type="data" format="genbank" multiple="True"/> + <param label="Fasta file" name="fastaIn" type="data" format="fasta" multiple="True"/> + <param label="Translation table to use on Fasta input:" name="fTable" type="select"> + <option value="0" selected="true">[0] Do not translate/ Fasta already translated</option> + <option value="1">[1] The Standard Code</option> + <option value="2">[2] The Vertebrate Mitochondrial Code</option> + <option value="3">[3] The Yeast Mitochondrial Code</option> + <option value="4">[4] The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">[5] The Invertebrate Mitochondrial Code </option> + <option value="6">[6] The Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">[9] The Echinoderm and Flatworm Mitochondrial Code</option> + <option value="10">[10] The Euplotid Nuclear Code</option> + <option value="11">[11] The Bacterial, Archaeal and Plant Plastid Code</option> + <option value="12">[12] The Alternative Yeast Nuclear Code</option> + <option value="13">[13] The Ascidian Mitochondrial Code</option> + <option value="14">[14] The Alternative Flatworm Mitochondrial Code</option> + <option value="15">[15] Blepharisma Nuclear Code</option> + <option value="16">[16] Chlorophycean Mitochondrial Code</option> + <option value="21">[21] Trematode Mitochondrial Code</option> + <option value="22">[22] Scenedesmus Obliquus Mitochondrial Code</option> + <option value="23">[23] Thraustochytrium Mitochondrial Code</option> + <option value="24">[24] Pterobranchia Mitochondrial Code</option> + <option value="25">[25] Candidate Division SR1 and Gracilibacteria Code</option> + </param> + <param label="Translation table to use on Genbank features:" name="tTable" type="select"> + <option value="0">[0] Do not translate (Use nucleotide sequence of features)</option> + <option value="1">[1] The Standard Code</option> + <option value="2">[2] The Vertebrate Mitochondrial Code</option> + <option value="3">[3] The Yeast Mitochondrial Code</option> + <option value="4">[4] The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">[5] The Invertebrate Mitochondrial Code </option> + <option value="6">[6] The Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">[9] The Echinoderm and Flatworm Mitochondrial Code</option> + <option value="10">[10] The Euplotid Nuclear Code</option> + <option value="11" selected="true">[11] The Bacterial, Archaeal and Plant Plastid Code</option> + <option value="12">[12] The Alternative Yeast Nuclear Code</option> + <option value="13">[13] The Ascidian Mitochondrial Code</option> + <option value="14">[14] The Alternative Flatworm Mitochondrial Code</option> + <option value="15">[15] Blepharisma Nuclear Code</option> + <option value="16">[16] Chlorophycean Mitochondrial Code</option> + <option value="21">[21] Trematode Mitochondrial Code</option> + <option value="22">[22] Scenedesmus Obliquus Mitochondrial Code</option> + <option value="23">[23] Thraustochytrium Mitochondrial Code</option> + <option value="24">[24] Pterobranchia Mitochondrial Code</option> + <option value="25">[25] Candidate Division SR1 and Gracilibacteria Code</option> + </param> + <param label="Number of features upstream to return" name="ahead" type="integer" value="1"/> + <param label="Number of features downstream to return" name="back" type="integer" value="1"/> + <param label="Translate output to protein sequence" name="translate" type="boolean" truevalue="--outProt" falsevalue=""/> + <param label="Genbank Protein's ID must also match Fasta Sequence's ID " name="forceID" type="boolean" truevalue="--forceSeqID" falsevalue="" checked="true"/> + <param name="mode" type="select" label="Mode"> + <option value="--genesOnly">Search only Gene-type features</option> + <option value="--cdsOnly">Search only CDS-type features</option> + <option value="">Search all features (Will most likely cause duplicate results, as this will include sub-features)</option> + </param> + </inputs> + <outputs> + <data name="upstreamOut" format="fasta" label="upOut"/> + <data name="downstreamOut" format="fasta" label="downOut"/> + </outputs> + <help><![CDATA[ +Currently Experimental: Uploaded for review purposes + +**What it does** + +For a given Fasta file, this tool searches through the features of a Genbank file for one that matches the sequence in the Fasta. If found, it will then output a specified number of features upstream from the hit and a specified number of features downstream as a multifasta file. + +The drop down menus provide a selection of translation tables for the Fasta and Genbank inputs. If "translate output to protein sequence" +is selected, the output will be translated using the table selected for the Genbank translation. + +It is currently recommended to select either the Gene only or CDS only options for mode, as searching all features will include sub-features of neighbors (ie, selecting 2 for upstream will give you the neighboring gene and then its CDS sub feature, rather than 2 proper neighbors) as well as the sub-features of the search hit itself. + + +]]></help> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gbk_adjacent/cpt-macros.xml Fri Jun 17 12:43:45 2022 +0000 @@ -0,0 +1,115 @@ +<?xml version="1.0"?> +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd $__tool_directory__ && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gbk_adjacent/macros.xml Fri Jun 17 12:43:45 2022 +0000 @@ -0,0 +1,105 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.8.13">python</requirement> + <requirement type="package" version="1.79">biopython</requirement> + <requirement type="package" version="1.2.2">cpt_gffparser</requirement> + <yield/> + </requirements> + </xml> + <xml name="ldap_ref" + token_name="dn_ref" + token_label="Pick a DN" + token_fromfile="ldap_people.loc"> + <repeat name="repeat_@NAME@" title="@LABEL@"> + <param name="@NAME@" label="Select a @LABEL@" type="select"> + <options from_file="@FROMFILE@"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + </repeat> + </xml> + <xml name="ldap_ref_single" + token_name="dn_ref" + token_label="Pick a DN" + token_fromfile="ldap_people.loc"> + <param name="@NAME@" label="Select a @LABEL@" type="select"> + <options from_file="@FROMFILE@"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + </xml> + <xml name="gbk_feature_type" + token_label="Feature type to remove" + token_multiple="True" + token_optional="False" + token_name="positional_2"> + <param label="@LABEL@" optional="@TOKEN_OPTIONAL" multiple="@MULTIPLE@" name="feature_type" type="select"> + <option value="-10_signal">-10_signal</option> + <option value="-35_signal">-35_signal</option> + <option value="3'UTR">3'UTR</option> + <option value="5'UTR">5'UTR</option> + <option value="CAAT_signal">CAAT_signal</option> + <option selected="true" value="CDS">CDS</option> + <option value="C_region">C_region</option> + <option value="D-loop">D-loop</option> + <option value="D_segment">D_segment</option> + <option value="GC_signal">GC_signal</option> + <option value="J_segment">J_segment</option> + <option value="LTR">LTR</option> + <option value="N_region">N_region</option> + <option value="RBS">RBS</option> + <option value="STS">STS</option> + <option value="S_region">S_region</option> + <option value="TATA_signal">TATA_signal</option> + <option value="V_region">V_region</option> + <option value="V_segment">V_segment</option> + <option value="all">all</option> + <option value="assembly_gap">assembly_gap</option> + <option value="attenuator">attenuator</option> + <option value="enhancer">enhancer</option> + <option value="exon">exon</option> + <option value="gap">gap</option> + <option value="gene">gene</option> + <option value="iDNA">iDNA</option> + <option value="intron">intron</option> + <option value="mRNA">mRNA</option> + <option value="mat_peptide">mat_peptide</option> + <option value="misc_RNA">misc_RNA</option> + <option value="misc_binding">misc_binding</option> + <option value="misc_difference">misc_difference</option> + <option value="misc_feature">misc_feature</option> + <option value="misc_recomb">misc_recomb</option> + <option value="misc_signal">misc_signal</option> + <option value="misc_structure">misc_structure</option> + <option value="mobile_element">mobile_element</option> + <option value="modified_base">modified_base</option> + <option value="ncRNA">ncRNA</option> + <option value="old_sequence">old_sequence</option> + <option value="operon">operon</option> + <option value="oriT">oriT</option> + <option value="polyA_signal">polyA_signal</option> + <option value="polyA_site">polyA_site</option> + <option value="precursor_RNA">precursor_RNA</option> + <option value="prim_transcript">prim_transcript</option> + <option value="primer_bind">primer_bind</option> + <option value="promoter">promoter</option> + <option value="protein_bind">protein_bind</option> + <option value="rRNA">rRNA</option> + <option value="rep_origin">rep_origin</option> + <option value="repeat_region">repeat_region</option> + <option value="sig_peptide">sig_peptide</option> + <option value="source">source</option> + <option value="stem_loop">stem_loop</option> + <option value="tRNA">tRNA</option> + <option value="terminator">terminator</option> + <option value="tmRNA">tmRNA</option> + <option value="transit_peptide">transit_peptide</option> + <option value="unsure">unsure</option> + <option value="variation">variation</option> + </param> + </xml> +</macros>