Mercurial > repos > iuc > hmmer3
diff hmmemit.xml @ 0:62479bdcc059 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hmmer3 commit 4164b44c651bcbdac6637eccce61b2a802c9b569
author | iuc |
---|---|
date | Tue, 12 May 2015 15:04:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmmemit.xml Tue May 12 15:04:26 2015 -0400 @@ -0,0 +1,175 @@ +<?xml version="1.0"?> +<tool id="hmmer_hmmemit" name="hmmemit" version="@WRAPPER_VERSION@.0"> + <description>sample sequence(s) from a profile HMM</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command><![CDATA[ +hmmemit + +#if $oformat.oformat_select == "fasta": + -N $oformat.N_fa +#elif $oformat.oformat_select == "aln": + -N $oformat.N_aln + -a +#elif $oformat.oformat_select == "mrcs": + -c +#elif $oformat.oformat_select == "mrcsf": + --minl $oformat.minl + --minu $oformat.minu + -C +#else: + -N $oformat.N_smp + -p + #if $oformat.L: + -L $oformat.L + #end if + $oformat.emission_profiles +#end if + + +@SEED@ + +$hmmfile +> $output + ]]></command> + <inputs> + <expand macro="input_hmm" /> + + <conditional name="oformat"> + <param name="oformat_select" type="select" label="Output Format"> + <option value="fasta" selected="true">Fasta</option> + <option value="aln">Alignment</option> + <option value="mrcs">Majority-Rule Concensus Sequence</option> + <option value="mrcsf">Fancier Concensus Sequence</option> + <option value="sample">Sample sequences from profile, not core model</option> + </param> + <when value="fasta"> + <param name="N_fa" type="integer" value="1" min="1" help="(-N)" + label="Number of sequences to generate"/> + </when> + <when value="aln"> + <param name="N_aln" type="integer" value="1" min="1" help="(-N)" + label="Number of sequences to generate"/> + </when> + <when value="mrcs"> + </when> + <when value="mrcsf"> + <param name="minl" type="float" value="0.7" help="(--minl)" + label="show consensus as 'any' (X/N) unless >= this fraction"/> + <param name="minu" type="float" value="0.2" help="(--minu)" + label="show consensus as upper case if >= this fraction"/> + </when> + <when value="sample"> + <param name="N_smp" type="integer" value="1" min="1" help="(-N)" + label="Number of sequences to generate"/> + <param name="L" type="integer" optional="True" help="(-L)" + label="Expected length of profile"/> + + <param name="emission_profiles" type="select" label="Emission profile options"> + <option value="--local" selected="true">configure profile in multihit local mode</option> + <option value="--unilocal">configure profile in unilocal mode</option> + <option value="--glocal">configure profile in multihit glocal mode</option> + <option value="--uniglocal">configure profile in unihit glocal mode</option> + </param> + </when> + </conditional> + + + <expand macro="seed"/> + </inputs> + <outputs> + <data format="fasta" name="output" label="Sequences generated from $hmmfile.name"> + <change_format> + <when input="oformat_select" value="aln" format="stockholm"/> + <!-- the rest are fasta --> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="hmmfile" value="globins4.hmm"/> + <param name="oformat_select" value="aln"/> + <param name="N_aln" value="10"/> + <expand macro="seed_test" /> + <output name="output" file="globins4-emit.sto" lines_diff="4"/> + </test> + <test> + <param name="hmmfile" value="globins4.hmm"/> + <param name="oformat_select" value="fasta"/> + <param name="N_aln" value="10"/> + <expand macro="seed_test" /> + <output name="output" file="globins4-emit-1.sto" lines_diff="4"/> + </test> + </tests> + <help><![CDATA[ +@HELP_PRE@ + +The hmmemit program samples (emits) sequences from the profile HMM(s) in +hmmfile, and writes them to output. Sampling sequences may be useful for a +variety of purposes, including creating synthetic true positives for benchmarks +or tests. + +The default is to sample one unaligned sequence from the core probability +model, which means that each sequence consists of one full-length domain. +Alternatively, with the -c option, you can emit a simple majority-rule +consensus sequence; or with the -a option, you can emit an alignment (in which +case, you probably also want to set -N to something other than its default of 1 +sequence per model). + +As another option, with the -p option you can sample a sequence from a fully +configured HMMER search profile. This means sampling a ‘homologous sequence’ by +HMMER’s definition, including nonhomologous flanking sequences, local +alignments, and multiple domains per sequence, depending on the length model +and alignment mode chosen for the profile. + +The hmmfile may contain a library of HMMs, in which case each HMM will be used +in turn. + +@HELP_PRE_OTH@ + +Output Formats +-------------- + +Several output formats are available, each with different options. + +**Fasta** + +Fasta option is the easiest to understand, given an input model, it will produce N sequences in fasta format from that model. + +**Alignment** + +Produces a stockholm alignment, of what the Fasta output would have produced. + +**Majority-Rule Concensus Sequence** + +Emit a plurality-rule consensus sequence, instead of sampling a sequence from +the profile HMM’s probability distribution. The consensus sequence is formed by +selecting the maximum probability residue at each match state. + +**Fancier Concensus Sequence** + +Emit a fancier plurality-rule consensus sequence than the -c option. If the +maximum probability residue has p < minl show it as a lower case ’any’ residue +(n or x); if p >= minl and < minu show it as a lower case residue; and if p >= +minu show it as an upper case residue. The default settings of minu and minl +are both 0.0, which means -C gives the same output as -c unless you also set +minu and minl to what you want. + +**Sample** + +Sample unaligned sequences from the implicit search profile, not from the core +model. The core model consists only of the homologous states (between the begin +and end states of a HMMER Plan7 model). The profile includes the nonhomologous +N, C, and J states, local/glocal and uni/multihit algorithm configuration, and +the target length model. Therefore sequences sampled from a profile may in- +clude nonhomologous as well as homologous sequences, and may contain more than +one homologous sequence segment. By default, the profile is in multihit local +mode, and the target sequence length is configured for L=400. + +@ATTRIBUTION@ +]]></help> + <expand macro="citation"/> +</tool>