diff glimmer_w_icm.xml @ 0:9b2e283dc3b5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author bgruening
date Tue, 28 Nov 2017 10:10:55 -0500
parents
children ce89c473666d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer_w_icm.xml	Tue Nov 28 10:10:55 2017 -0500
@@ -0,0 +1,142 @@
+<tool id="glimmer_knowledge_based" name="Glimmer3" version="@WRAPPER_VERSION@">
+    <description>Predict ORFs in prokaryotic genomes (knowlegde-based)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+    glimmer3
+        --max_olap $max_olap
+        --gene_len $gene_len
+        --threshold $threshold
+        #if float( str($gc_percent) ) > 0.0:
+            --gc_percent $gc_percent
+        #end if
+
+        #if $stop_codon_opts.stop_codon_opts_selector == "gb":
+            --trans_table '${stop_codon_opts.genbank_gencode}'
+        #else:
+            --stop_codons '${stop_codon_opts.stop_codons}'
+        #end if
+
+        --start_codons '$start_codons'
+
+        $linear
+        $no_indep
+        $extend
+        '$seq_input'
+        '$icm_input'
+        glimmer_output
+
+    &&
+
+    #if $report:
+        cp glimmer_output.predict '$report_output' &&
+    #end if
+    #if $detailed_report:
+        cp glimmer_output.detail '$detailed_output' &&
+    #end if
+
+    ## convert prediction to FASTA sequences
+    python '$__tool_directory__/glimmer2seq.py' glimmer_output.predict '$seq_input' '$genes_output'
+]]></command>
+    <inputs>
+        <param name="seq_input" type="data" format="fasta" label="Genome Sequence" />
+        <param name="icm_input" type="data" format="tar" label="Interpolated context model (ICM)" />
+
+        <param name="max_olap" type="integer" value="50" label="Set maximum overlap length" help="Overlaps this short or shorter are ignored." />
+        <param name="gene_len" type="integer" value="90" label="Set the minimum gene length to n nucleotides" help="This does not include the bases in the stop codon."/>
+        <param name="threshold" type="integer" value="30" label="Set threshold score for calling as gene" help="If the in-frame score >= N, then the region is given a number and considered a potential gene." />
+        <param name="gc_percent" type="float" value="0.0" label="Set the GC percentage of the independent model, i.e., the model of intergenic sequence" help="If 0.0 specified, the GC percentage will be counted from the input file." />
+
+        <param name="linear" type="boolean" truevalue="--linear" falsevalue="" checked="true" label="Assume linear rather than circular genome, i.e., no wraparound" />
+        <param name="no_indep" type="boolean" truevalue="--no_indep" falsevalue="" checked="false" label="Don’t use the independent probability score column at all" help="Using this option will produce more short gene predictions." />
+        <param name="extend" type="boolean" truevalue="--extend" falsevalue="" checked="false" label="Also score orfs that extend off the end of the sequence(s)" />
+        <param name="start_codons" type="text" value="atg,gtg,ttg" label="Specify start codons as a comma-separated list" />
+
+        <conditional name="stop_codon_opts">
+            <param name="stop_codon_opts_selector" type="select" label="Specify start codons as">
+              <option value="gb" selected="True">Genbank translation table entry</option>
+              <option value="free_form">Comma-separated list</option>
+            </param>
+            <when value="gb">
+                <param name="genbank_gencode" type="select" label="Use Genbank translation table to specify stop codons">
+                    <option value="1" selected="True">1. Standard</option>
+                    <option value="2">2. Vertebrate Mitochondrial</option>
+                    <option value="3">3. Yeast Mitochondrial</option>
+                    <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+                    <option value="5">5. Invertebrate Mitochondrial</option>
+                    <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
+                    <option value="9">9. Echinoderm Mitochondrial</option>
+                    <option value="10">10. Euplotid Nuclear</option>
+                    <option value="11">11. Bacteria and Archaea</option>
+                    <option value="12">12. Alternative Yeast Nuclear</option>
+                    <option value="13">13. Ascidian Mitochondrial</option>
+                    <option value="14">14. Flatworm Mitochondrial</option>
+                    <option value="15">15. Blepharisma Macronuclear</option>
+                    <option value="16">16. Chlorophycean Mitochondrial</option>
+                    <option value="21">21. Trematode Mitochondrial</option>
+                    <option value="22">22. Scenedesmus obliquus mitochondrial</option>
+                    <option value="23">23. Thraustochytrium Mitochondrial</option>
+                    <option value="24">24. Pterobranchia mitochondrial</option>
+                </param>
+            </when>
+            <when value="free_form">
+                <param name="stop_codons" type="text" value="tag,tga,taa" label="Specify stop codons as a comma-separated list" />
+            </when>
+        </conditional>
+
+        <param name="report" type="boolean" truevalue="" falsevalue="" checked="false" label="Report the classic glimmer table output"/>
+        <param name="detailed_report" type="boolean" truevalue="" falsevalue="" checked="false" label="Output a detailed gene prediction report as separate file"/>
+    </inputs>
+    <outputs>
+        <data name="genes_output" format="fasta" label="Glimmer3 on ${on_string} (Gene Prediction FASTA)" />
+        <data name="report_output" format="txt" label="Glimmer3 on ${on_string} (Gene Prediction table)">
+            <filter>report == True</filter>
+        </data>
+        <data name="detailed_output" format="txt" label="Glimmer3 on ${on_string} (detailed report)">
+            <filter>detailed_report == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="seq_input" value='streptomyces_Tue6071_plasmid_genomic.fasta' />
+            <param name="icm_input" value='streptomyces_Tu6071_plasmid_genes.icm' ftype="tar" />
+            <param name="max_olap" value="50" />
+            <param name="gene_len" value="90" />
+            <param name="threshold" value="30" />
+            <param name="gc_percent" value="0.0" />
+            <param name="linear" value="--linear" />
+            <param name="no_indep" value="" />
+            <param name="extend" value="" />
+            <param name="start_codons" value="atg,gtg,ttg" />
+            <param name="genbank_gencode" value="11" />
+            <param name="detailed_report" value="true" />
+            <param name="report" value="true" />
+            <output name="genes_output" file='glimmer_w_icm_trans-table-11_genomic.fasta' ftype="fasta" />
+            <output name="report_output" file='glimmer_w_icm_trans-table-11_genomic.out' ftype="txt" />
+            <output name="detailed_output" file='glimmer_w_icm_trans-table-11_genomic.dout' ftype="txt" lines_diff="6" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+This is the main program that makes gene predictions based on an interpolated context model (ICM).
+
+The ICM can be generated with extracted CDS from related organisms (ICM builder). If you can't generate an ICM model you can use the non knowlegde-based Glimmer with a de novo prediction.
+
+
+**Input**
+
+- Interpolated context model (ICM): Use the 'Glimmer ICM builder' tool to create one
+- Genome Sequence in FASTA format
+
+
+
+**Output**
+
+- FASTA file with predicted proteins
+- Glimmer prediction file (optional)
+]]></help>
+    <expand macro="citation" />
+</tool>