diff glimmer_wo_icm.xml @ 0:841357e0acbf draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 10:09:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer_wo_icm.xml	Sat Jul 06 10:09:30 2013 -0400
@@ -0,0 +1,119 @@
+<tool id="glimmer_not-knowlegde-based" name="Glimmer3" version="0.2">
+    <description>Predict ORFs in prokaryotic genomes (not knowlegde-based)</description>
+    <requirements>
+        <requirement type="package" version="3.02b">glimmer</requirement>
+        <requirement type="package" version="1.61">biopython</requirement>
+    </requirements>
+    <command interpreter="python">
+        glimmer_wo_icm.py 
+            $input
+            #if $report:
+                $prediction
+            #else:
+                "None"
+            #end if
+            #if $detailed_report:
+                $detailed
+            #else:
+                "None"
+            #end if
+            $overlap
+            $gene_length
+            $threshold
+            $linear
+            $genes_output
+    </command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="Genome sequence" />
+        <param name="overlap" type="integer" value="0" label="Set maximum overlap length. Overlaps this short or shorter are ignored." />
+        <param name="gene_length" type="integer" value="110" label="Set minimum gene length." />
+        <param name="threshold" type="integer" value="30" label="Set threshold score for calling as gene. If the in-frame score >= N, then the region is given a number and considered a potential gene." />
+        <param name="linear" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Assume linear rather than circular genome, i.e., no wraparound" />
+
+        <param name="detailed_report" type="boolean" truevalue="" falsevalue="" checked="false" label="Output a detailed gene prediction report as separate file" />
+        <param name="report" type="boolean" truevalue="" falsevalue="" checked="false" label="Report the classic glimmer table output" />
+    </inputs>
+    <outputs>
+        <data name="genes_output" format="fasta" label="Glimmer3 on ${on_string} (Gene Prediction FASTA)" />
+        <data name="prediction" format="txt" label="Glimmer3 on ${on_string} (Gene Prediction table)">
+            <filter>report == True</filter>
+        </data>
+        <data name="detailed" format="txt" label="Glimmer3 on ${on_string} (detailed report)">
+            <filter>detailed_report == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="streptomyces_Tue6071_plasmid_genomic.fasta" />
+            <param name="overlap" value="0" />
+            <param name="gene_length" value="110" />
+            <param name="threshold" value="30" />
+            <param name="linear" value="true" />
+            <param name="detailed_report" value="" />
+            <param name="report" value="" />
+            <output name="genes_output" file="glimmer_wo_icm_trans-table-11_plasmid_genomic.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+This tool predicts open reading frames (orfs) from a given DNA Sequence. That tool is not knowlegde-based.
+
+The recommended way is to use a trained Glimmer3 with ICM model. Use the knowlegde-based version for that and insert/generate a training set.
+
+-----
+
+**Glimmer Overview**
+
+::
+
+**************		**************		**************		**************		
+*            *		*	     *		*            *		*            *
+* long-orfs  *  ===>	*   Extract  *	===>	* build-icm  *  ===>	*  glimmer3  *	
+*            *		*	     *		*	     *  	*	     *	
+**************		**************		**************		**************
+
+-----
+
+**Example**
+
+Suppose you have the following DNA sequences::
+
+    >SQ   Sequence 8667507 BP; 1203558 A; 3121252 C; 3129638 G; 1213059 T; 0 other;
+    cccgcggagcgggtaccacatcgctgcgcgatgtgcgagcgaacacccgggctgcgcccg
+    ggtgttgcgctcccgctccgcgggagcgctggcgggacgctgcgcgtcccgctcaccaag
+    cccgcttcgcgggcttggtgacgctccgtccgctgcgcttccggagttgcggggcttcgc
+    cccgctaaccctgggcctcgcttcgctccgccttgggcctgcggcgggtccgctgcgctc
+    ccccgcctcaagggcccttccggctgcgcctccaggacccaaccgcttgcgcgggcctgg
+    .......
+
+Running this tool will produce a FASTA file with predicted genes and glimmer output files like the following::
+
+    >SQ   Sequence 8667507 BP; 1203558 A; 3121252 C; 3129638 G; 1213059 T; 0 other;
+    orf00001      577      699  +1     5.24
+    orf00003      800     1123  +2     5.18
+    orf00004     1144     3813  +1    10.62
+    orf00006     3857     6220  +2     6.07
+    orf00007     6226     7173  +1     1.69
+    orf00008     7187     9307  +2     8.95
+    orf00009     9424    10410  +1     8.29
+    orf00010    10515    11363  +3     7.00
+    orf00011    11812    11964  +1     2.80
+    orf00012    12360    13457  +3     4.80
+    orf00013    14379    14044  -1     7.41
+    orf00015    15029    14739  -3    12.43
+    orf00016    15066    15227  +3     1.91
+    orf00020    16061    15351  -3     2.83
+    orf00021    17513    17391  -3     2.20
+    orf00023    17529    17675  +3     0.11
+
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+    </help>
+</tool>