Mercurial > repos > iuc > hyphy_bgm
view hyphy_bgm.xml @ 14:a54340ea0866 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit adf0f20535bd585663c291f8f15137a7ba947949"
author | iuc |
---|---|
date | Fri, 21 Aug 2020 15:23:47 -0400 |
parents | 823a5afee916 |
children | 26ad9a6b1293 |
line wrap: on
line source
<?xml version="1.0"?> <tool id="hyphy_bgm" name="HyPhy-BGM" version="@VERSION@+galaxy0"> <description>- Detecting coevolving sites via Bayesian graphical models</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ ln -s '$input_file' bgm_input.fa && ln -s '$input_nhx' bgm_input.nhx && hyphy bgm --alignment ./bgm_input.fa --tree ./bgm_input.nhx --run_type $datatype.value #if $datatype.value == "codon": --code '$datatype.gencodeid' #end if #if $datatype.value == "protein": --baseline_model '$datatype.model' #end if --branches '$branches' --chain '$chain_length' --burn_in '$burn_in' --samples '$samples' --parents '$parents' --min_subs '$min_subs' > '$bgm_log' ]]></command> <inputs> <param name="input_file" type="data" format="fasta" label="Input FASTA file"/> <param name="input_nhx" type="data" format="nhx" label="Input newick file"/> <conditional name="datatype"> <param name="value" type="select" label="Type of data"> <option value="nucleotide">Nucleotide</option> <option value="amino-acid">Amino acid</option> <option value="codon">Codon</option> </param> <when value="nucleotide"/> <when value="amino-acid"> <expand macro="substitution" /> </when> <when value="codon"> <expand macro="gencode" /> </when> </conditional> <expand macro="branches"/> <param name="chain_length" type="integer" value="100000" min="0" max="1000000000" label="Length of MCMC chain"/> <param name="burn_in" type="integer" value="10000" min="0" max="1000000000" label="Number of samples to discard for burn-in"/> <param name="samples" type="integer" value="100" min="0" max="100" label="Number of steps to extract from chain sample"/> <param argument="--max-parents" name="parents" type="integer" value="1" min="1" max="3" label="Maximum number of parents allowed per node" /> <param argument="--min-subs" name="min_subs" type="integer" value="1" min="1" max="100000" label="Minimum number of ubstitutions per site to be included in the analysis" /> </inputs> <outputs> <data name="bgm_log" format="txt"/> <data name="bgm_output" format="hyphy_results.json" from_work_dir="bgm_input.fa.BGM.json"/> </outputs> <tests> <test> <param name="input_file" ftype="fasta" value="bgm-in1.fa"/> <param name="input_nhx" ftype="nhx" value="bgm-in1.nhx"/> <output name="bgm_output" file="bgm-out1.json" compare="sim_size"/> </test> </tests> <help><![CDATA[ BGM : Bayesian Graphical Models =============================== What does this do? ------------------ This tools identifies groups of sites in the alignments that experience substitutions along the same branches, i.g. *co-evolve*. Brief description ----------------- GM (Bayesian Graphical Model) uses a maximum likelihood ancestral state reconstruction to map substitution (non-synonymous only for coding data) events to branches in the phylogeny and then analyzes the joint distribution of the substitution map using a Bayesian graphical model (network). Next, a Markov chain Monte Carlo analysis is used to generate a random sample of network structures from the posterior distribution given the data. Each node in the network represents a site in the alignment, and links (edges) between nodes indicate high posterior support for correlated substitutions at the two sites over time, which implies coevolution. Input ----- 1. A *FASTA* sequence alignment. 2. A phylogenetic tree in the *Newick* format Note: the names of sequences in the alignment must match the names of the sequences in the tree. Output ------ A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). A custom visualization module for viewing these results is available (see http://vision.hyphy.org/BGM for an example) Further reading --------------- http://hyphy.org/methods/selection-methods/#BGM Tool options ------------ :: --branches Which branches should be tested for selection? All [default] : test all branches Internal : test only internal branches (suitable for intra-host pathogen evolution for example, where terminal branches may contain polymorphism data) Leaves: test only terminal (leaf) branches Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/) --max-parents The maximum number of parents allowed per node, i.e. how many sites can directly influence substitution patterns at another site Increasing this number scales complexity nonlinearly default value: 1 --min-subs The minium number of substitutions per site to include it in the analysis Filter low complexity (too few substitution) sites default value: 1 --chains How many MCMC chains to run (does not apply to Variational-Bayes) default value: 5 --steps MCMC chain length (does not apply to Variational-Bayes) default value: 100,000 --burn-in MCMC chain burn in (does not apply to Variational-Bayes) default value: 10,000 --samples MCMC samples to draw (does not apply to Variational-Bayes) default value: 100 ]]></help> <expand macro="citations"> <citation type="doi">10.1371/journal.pcbi.0030231</citation> </expand> </tool>