Mercurial > repos > bgruening > glimmer3
diff glimmer_build-icm.xml @ 0:841357e0acbf draft
Uploaded
author | bgruening |
---|---|
date | Sat, 06 Jul 2013 10:09:30 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer_build-icm.xml Sat Jul 06 10:09:30 2013 -0400 @@ -0,0 +1,125 @@ +<tool id="glimmer_build-icm" name="Glimmer ICM builder" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.02b">glimmer</requirement> + </requirements> + <command> + build-icm + --depth $depth + #if $no_stops: + --no_stops + #end if + --period $period + --width $width + + #if $stop_codon_opts.stop_codon_opts_selector == "gb": + --trans_table "${stop_codon_opts.genbank_gencode}" + #else: + --stop_codons "${stop_codon_opts.stop_codons}" + #end if + + $outfile < $infile 2>&1; + </command> + <inputs> + <param name="infile" type="data" format="fasta" label="Trainings Dataset" help="A set of known genes in FASTA format." /> + <param name="depth" type="integer" value="7" label="Set the depth of the ICM" help="The depth is the maximum number of positions in the context window that will be used to determine the probability of the predicted position." /> + <param name="period" type="integer" value="3" label="Set the period of the ICM" help="The period is the number of different submodels for different positions in the text in a cyclic pattern. E.g., if the period is 3, the first submodel will determine positions 1, 4, 7, ..." /> + <param name="width" type="integer" value="12" label="Set the width of the ICM" help="The width includes the predicted position." /> + <param name="no_stops" type="boolean" truevalue="--no_stops" falsevalue="" checked="false" label="Do not use any input strings with in-frame stop codons" /> + + <conditional name="stop_codon_opts"> + <param name="stop_codon_opts_selector" type="select" label="Specify start codons as"> + <option value="gb" selected="True">Genbank translation table entry</option> + <option value="free_form">Comma-separated list</option> + </param> + <when value="gb"> + <param name="genbank_gencode" type="select" label="Use Genbank translation table to specify stop codons"> + <option value="1" select="True">1. Standard</option> + <option value="2">2. Vertebrate Mitochondrial</option> + <option value="3">3. Yeast Mitochondrial</option> + <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">5. Invertebrate Mitochondrial</option> + <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">9. Echinoderm Mitochondrial</option> + <option value="10">10. Euplotid Nuclear</option> + <option value="11">11. Bacteria and Archaea</option> + <option value="12">12. Alternative Yeast Nuclear</option> + <option value="13">13. Ascidian Mitochondrial</option> + <option value="14">14. Flatworm Mitochondrial</option> + <option value="15">15. Blepharisma Macronuclear</option> + <option value="16">16. Chlorophycean Mitochondrial</option> + <option value="21">21. Trematode Mitochondrial</option> + <option value="22">22. Scenedesmus obliquus mitochondrial</option> + <option value="23">23. Thraustochytrium Mitochondrial</option> + <option value="24">24. Pterobranchia mitochondrial</option> + </param> + </when> + <when value="free_form"> + <param name="stop_codons" type="text" value="tag,tga,taa" label="Specify stop codons as a comma-separated list" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="data" name="outfile" /> + </outputs> + <tests> + <test> + <param name="infile" value='streptomyces_Tu6071_plasmid_genes.fasta' /> + <param name="depth" value="7" /> + <param name="period" value="3" /> + <param name="width" value="12" /> + <param name="no_stops" value="" /> + <param name="genbank_gencode" value="11" /> + <!-- compare files sizes, because the output is a binary --> + <output name="outfile" file='streptomyces_Tu6071_plasmid_genes.icm' compare="sim_size" delta="1000" ftype="data" /> + </test> + </tests> + + <help> + +**What it does** + +This program constructs an interpolated context model (ICM) from an input set of sequences. + +This model can be used by Glimmer3 to predict genes. + +**TIP** To extract CDS from a GenBank file use the tool *Extract ORF from a GenBank file*. + +----- + +**Example** + +*Input*:: + + - Genome Sequence + + >CELF22B7 C.aenorhabditis elegans (Bristol N2) cosmid F22B7 + GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT + GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT + TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT + TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC + GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA + ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG + AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA + CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA + TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC + AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA + GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC + AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC + CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA + AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC + GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT + ..... + +*Output*:: + interpolated context model (ICM) + +------- + +**References** + +A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007). + + + </help> +</tool>