Mercurial > repos > malex > garli

<tool id="Garli" name="Garli" version="2.0" force_history_refresh="True">
  <description> phylogenetic inference using the maximum-likelihood</description>
## The command is a Cheetah template which allows some Python based syntax.
## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
## Arguments to the wrapper beyond the config file are just for Galaxy's benefit - all filenames are hardcoded
<command interpreter="python">garli_wrapper.py $garli_conf $best_all_tre $best_tre $log00_log $screen_log </command>
  <inputs>
    <param name="datafname" format="nexus" type="data" label="Nexus formated sequence file" force_select="true"/>
    <conditional name="choose_search_type">
        <param name="search_type" type="select" label="Analysis Type">
            <option value="mlsearch" selected="true">ML Search</option>
            <option value="bootstrap">Bootstrap</option>
        </param>
            <when value="mlsearch">
                <param name="searchreps" type="integer" size="4" value="1" label="Number of replicates">
                    <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
                </param>
                <param name="bootstrapreps" type="hidden" value="0" />
                <param name="resampleproportion" type="float"
                    value="1.0" label="Relative size of resample data
                    matrix (0.1-10.0)">
                    <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
                </param>
            </when>
            <when value="bootstrap">
                <param name="searchreps" type="hidden" value="0" />
                <param name="bootstrapreps" type="integer" size="4"
                    value="1" label="Number of replicates">
                    <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
                </param>
            </when>
    </conditional>
    <param name="constraintfile" type="data" format="text" label="Constraint file" optional="true"/>
    <conditional name="choose_streefname">
        <param name="streefname_menu" type="select" label="Source of starting tree and/or model">
            <option value="stepwise" selected="true">Stepwise</option>
            <option value="random">Random</option>
            <option value="file">User Tree</option>
        </param>
        <when value="stepwise">
            <param name="streefname" type="hidden" value="stepwise"/>
        </when>
        <when value="random">
            <param name="streefname" type="hidden" value="random"/>
        </when>
        <when value="file">
            <param name="streefname" format="nexus" type="data" label="Starting Tree File"/>
        </when>
    </conditional>
    <param name="attachmentspertaxon" size="4" type="integer" value="50"
        label="Attachment branches evaluated per taxon (min=1)" >
        <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
    </param>
    <param name="randseed" type="text" size="4" value="-1" label="Random Seed (-1 or
        int)">
        <validator type="in_range" message="(-1 to infinity)" min="-1" max="inf"/>
    </param>
    <param name="availablememory" size="4" value="512" type="integer"
        label="Available Memory"/>
    <param name="refinestart" type="select" label="Perform
        initial rough optimization">
        <option value="1" selected="true">Yes</option>
        <option value="0">No</option>
        </param>
    <param name="outgroup" type="integer" size="20" value="1" label="Outgroup taxa
        numbers"/>
    <param name="collapsebranches" type="select" label="Collapse
        Branches">
        <option value="1" selected="true">Yes</option>
        <option value="0">No</option>
        </param>

        <conditional name="choose_datatype">
        <param name="datatype" type="select" label="Model Type">
            <option value="nucleotide" selected="true">Nucleotide</option>
            <option value="aminoacid">Amino Acid</option>
            <option value="codon-aminoacid">Codon-Amino Acid</option>
            <option value="codon">Codon</option>
        </param>
        <when value="nucleotide">
        <conditional name="choose_ratematrix">
        <param name="ratematrix" type="select" label="Rate Matrix">
            <option value="1rate">1rate</option>
            <option value="2rate">2rate</option>
            <option value="6rate" selected="true">6rate</option>
            <option value="fixed">fixed</option>
            <option value="custom">custom</option>
        </param>
            <when value="1rate" />
            <when value="2rate" />
            <when value="6rate" />
            <when value="fixed" />
            <when value="custom">
                <param name="ratematrix" type="text" size="20" value="(a b a a b a)" label="Custom Rate Matrix"/>
            </when>
        </conditional>
        <param name="statefrequencies" type="select" label="State
        Frequencies">
        <option value="equal">Equal</option>
        <option value="empirical">Empirical</option>
        <option value="estimate" selected="true">Estimate</option>
        <option value="fixed">Fixed</option>
        </param>
        <param name="ratehetmodel" type="select" label="Rate Heterogeneity
        Type">
        <option value="none">None</option>
        <option value="gamma" selected="true">Gamma</option>
        <option value="gammafixed">Gamma Fixed</option>
        </param>
    <param name="numratecats" type="integer" size="2" value="4"
        label="Number of discrete dN/dS categories">
        <validator type="in_range" message="(1-20)" min="1" max="20"/>
    </param>
    <param name="invariantsites" type="select" label="Treatment of
        proportion of invariable sites parameter">
        <option value="none">None</option>
        <option value="estimate" select="true">Estimate</option>
        <option value="fixed">Fixed</option>
    </param>
    </when>
    <when value="aminoacid">
        <param name="ratematrix" type="select" label="Rate Matrix">
            <option value="poisson">Poisson</option>
            <option value="jones" selected="true">Jones</option>
            <option value="dayhoff">Dayhoff</option>
            <option value="wag">WAG</option>
            <option value="mtmam">mtmam</option>
            <option value="mtrev">mtREV</option>
        </param>
        <param name="statefrequences" type="select" label="Equilibrium Base
            Frequences">
            <option value="equal">Equal</option>
            <option value="empirical" selected="true">Empirical</option>
            <option value="estimate">Estimate</option>
            <option value="fixed">Fixed</option>
            <option value="jones">Jones</option>
            <option value="dayhoff">Dayhoff</option>
            <option value="wag">WAG</option>
            <option value="mtmam">mtmam</option>
            <option value="mtrev">mtREV</option>
        </param>
    <param name="numratecats" type="integer" size="2" value="4"
        label="Number of discrete dN/dS categories">
        <validator type="in_range" message="(1-20)" min="1" max="20"/>
    </param>
    <param name="invariantsites" type="select" label="Treatment of
        proportion of invariable sites parameter">
        <option value="none">None</option>
        <option value="estimate" select="true">Estimate</option>
        <option value="fixed">Fixed</option>
    </param>
    </when>
    <when value="codon-aminoacid">
        <param name="ratematrix" type="select" label="Rate Matrix">
            <option value="poisson">Poisson</option>
            <option value="jones" selected="true">Jones</option>
            <option value="dayhoff">Dayhoff</option>
            <option value="wag">WAG</option>
            <option value="mtmam">mtmam</option>
            <option value="mtrev">mtREV</option>
        </param>
        <param name="statefrequences" type="select" label="Equilibrium Base
            Frequences">
            <option value="equal">Equal</option>
            <option value="empirical" selected="true">Empirical</option>
            <option value="estimate">Estimate</option>
            <option value="fixed">Fixed</option>
            <option value="jones">Jones</option>
            <option value="dayhoff">Dayhoff</option>
            <option value="wag">WAG</option>
            <option value="mtmam">mtmam</option>
            <option value="mtrev">mtREV</option>
        </param>
        <param name="geneticcode" type="select" label="Genetic Code">
            <option value="standard" selected="true">Standard</option>
            <option value="vertmito">Vertmito</option>
            <option value="invertmito">Invertmito</option>
        </param>
    </when>
    <when value="codon">
    <conditional name="choose_ratematrix">
        <param name="ratematrix" type="select" label="Rate Matrix">
            <option value="1rate">1rate</option>
            <option value="2rate">2rate</option>
            <option value="6rate" selected="true">6rate</option>
            <option value="fixed">fixed</option>
            <option value="custom">custom</option>
        </param>
            <when value="1rate" />
            <when value="2rate" />
            <when value="6rate" />
            <when value="fixed" />
            <when value="custom">
            <param name="ratematrix" type="text" size="20" value="(a b a a b a)" label="Custom Rate Matrix"/>
        </when>
    </conditional>
    <param name="statefrequencies" type="select" label="State
        Frequencies">
        <option value="equal">Equal</option>
        <option value="empirical" selected="true">Empirical</option>
        <option value="f1x4">F1x4</option>
        <option value="f3x4">F3x4</option>
    </param>
    <param name="ratehetmodel" type="select" label="Rate Heterogeneity
        Type">
        <option value="none" selected="true">None</option>
        <option value="nonsynonymous">Non-synonymous</option>
    </param>
    <param name="numratecats" type="integer" size="2" value="1"
        label="Number of discrete dN/dS categories">
        <validator type="in_range" message="(1-20)" min="1" max="20"/>
    </param>
    <param name="invariantsites" type="hidden" value="none"/>
    <param name="geneticcode" type="select" label="Genetic Code">
        <option value="standard" selected="true">Standard</option>
        <option value="vertmito">Vertmito</option>
        <option value="invertmito">Invertmito</option>
    </param>
    </when>
    </conditional>
    <param name="nindivs" type="integer" size="3" value="4" label="Number of individuals in population">
        <validator type="in_range" message="(2-100)" min="2" max="100"/>
    </param>
    <param name="holdover" type="integer" size="2" value="1" label="Unmutated copies of
        best individual">
        <validator type="in_range" message="(1-99)" min="1" max="99"/>
    </param>
    <param name="selectionintensity" type="float" size="3" value="0.5" label="Strength of
        selection">
        <validator type="in_range" message="(0.1-5.0)" min="0.1" max="5.0"/>
    </param>
    <param name="holdoverpenalty" type="integer" size="3" value="0" label="Fitness
        handicap for best individual">
        <validator type="in_range" message="(0-100)" min="0" max="100"/>
    </param>
    <param name="stopgen" type="integer" size="10" value="5000000" label="Maximum number
        of generations to run">
        <validator type="in_range" message="(0-50000000)" min="0" max="50000000"/>
    </param>
    <param name="stoptime" type="integer" size="10" value="5000000" label="Maximum time to
        run">
        <validator type="in_range" message="(0-50000000)" min="0" max="50000000"/>
    </param>
    <param name="startoptprec" type="float" size="5" value="0.5" label="Starting
        optimization precision">
        <validator type="in_range" message="(0.005-5.0)" min="0.005" max="5.0"/>
    </param>
    <param name="minoptprec" type="float" size="5" value="0.01" label="Minimal
        optimization precision">
        <validator type="in_range" message="(0.001-5.0)" min="0.001" max="5.0"/>
    </param>
    <param name="numberofprecreductions" type="integer" size="3" value="10"
        label="Number of steps down from Start Precision to Minimum Precision"
>
        <validator type="in_range" message="(0-100)" min="0" max="100"/>
    </param>
    <param name="treerejectionthreshold" type="float" size="5" value="50.0"
        label="Tree Rejection Threshold">
        <validator type="in_range" message="(0-500.0)" min="0" max="500.0"/>
    </param>
    <param name="topoweight" type="float" size="10" value="1.0"
        label="Weight on topology mutations">
        <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
    </param>
    <param name="modweight" type="float" size="10" value="0.05"
        label="Weight on model parameter mutations">
        <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
    </param>
    <param name="brlenweight" type="float" size="10" value="0.2"
        label="Weight on branch-length parameter mutations">
        <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
    </param>
    <param name="randnniweight" type="float" size="10" value="0.1"
        label="Weight on NNI topology changes">
        <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
    </param>
    <param name="randsprweight" type="float" size="10" value="0.3"
        label="Weight on SPR topology changes">
        <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
    </param>
    <param name="limsprweight" type="float" size="10" value="0.6"
        label="Weight on localized SPR topology changes">
        <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
    </param>
    <param name="intervallength" type="integer" size="4" value="100"
        label="Interval Length">
        <validator type="in_range" message="(0-1000)" min="0" max="1000"/>
    </param>
    <param name="intervalstostore" type="integer" size="2" value="5"
        label="Number of intervals to store">
        <validator type="in_range" message="(0-1000)" min="0" max="10"/>
    </param>
    <param name="limsprrange" type="integer" size="10" value="6"
        label="Max range for localized SPR topology changes">
        <validator type="in_range" message="(0-1000)" min="0" max="inf"/>
    </param>
    <param name="meanbrlenmuts" type="integer" size="7" value="5"
        label="Mean number of branch lengths to change per mutation">
        <validator type="in_range" message="(0-# of taxa)" min="0" max="10000000000"/>
    </param>
    <param name="gammashapebrlen" type="integer" size="4" value="1000"
        label="Magnitude of branch-length mutations">
        <validator type="in_range" message="(50-2000)" min="50" max="2000"/>
    </param>
    <param name="gammashapemodel" type="integer" size="4" value="1000"
        label="Magnitude of model parameter mutations">
        <validator type="in_range" message="(50-2000)" min="50" max="2000"/>
    </param>
    <param name="uniqueswapbias" type="float" size="4" value="0.1"
        label="Relative weight assigned to already attempted branch swaps"
>
        <validator type="in_range" message="(0.01-1.0)" min="0.01" max="1.0"/>
    </param>
    <param name="distanceswapbias" type="float" size="3" value="1.0"
        label="Relative weight assigned to branch swaps based on locality">
        <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
    </param>
</inputs>
<outputs>
    <data format="nexus" name="best_tre" metadata_source="datafname" from_work_dir="garli.best.tre" label="${tool.name} on ${on_string}: garli.best.tre"/>
    <data format="nexus" name="best_all_tre" metadata_source="datafname" from_work_dir="garli.best.all.tre" label="${tool.name} on ${on_string}: garli.all.best.tre"/>
    <data format="txt" name="garli_conf" from_work_dir="garli.conf" label="${tool.name} on ${on_string}: garli.conf"/>
    <data format="txt" name="log00_log" from_work_dir="garli.log00.log" label="${tool.name} on ${on_string}: garli.log00.log"/>
    <data format="txt" name="screen_log" from_work_dir="garli.screen.log" label="${tool.name} on ${on_string}: garli.screen.log"/>
</outputs>
<configfiles>
<configfile name="garli_conf">
[general]
datafname = ${datafname}
searchreps = ${choose_search_type.searchreps}
bootstrapreps = ${choose_search_type.bootstrapreps}
constraintfile = ${constraintfile}
streefname = ${choose_streefname.streefname}
attachmentspertaxon = ${attachmentspertaxon}
ofprefix = garli
randseed = ${randseed}
availablememory = ${availablememory}
logevery = 10
writecheckpoints = 0
saveevery = 100
refinestart = ${refinestart}
outputeachbettertopology = 0
outputcurrentbesttopology = 0
enforcetermconditions = 1
genthreshfortopoterm = 20000
scorethreshforterm = 0.05
significanttopochange = 0.01
outputphyliptree = 0
outputmostlyuselessfiles = 0
restart = 0
outgroup = ${outgroup}
resampleproportion = ${choose_search_type.resampleproportion}
inferinternalstateprobs = 0
outputsitelikelihoods = 0
optimizeinputonly = 0
collapsebranches = ${collapsebranches}

[model1]
datatype = ${choose_datatype.datatype}
ratematrix = ${choose_datatype.choose_ratematrix.ratematrix}
statefrequencies = ${choose_datatype.statefrequencies}
ratehetmodel = ${choose_datatype.ratehetmodel}
#if $choose_datatype.numratecats > 0
numratecats = ${choose_datatype.numratecats}
#end if
#if $choose_datatype.invariantsites != ""
invariantsites = ${choose_datatype.invariantsites}
#end if

[master]
nindivs = $nindivs
holdover = $holdover
selectionintensity = $selectionintensity
holdoverpenalty = $holdoverpenalty
stopgen = $stopgen
stoptime = $stoptime

startoptprec = $startoptprec
minoptprec = $minoptprec
numberofprecreductions = $numberofprecreductions
treerejectionthreshold = $treerejectionthreshold
topoweight = $topoweight
modweight = $modweight
brlenweight = $brlenweight
randnniweight = $randnniweight
randsprweight = $randsprweight
limsprweight =  $limsprweight
intervallength = $intervallength
intervalstostore = $intervalstostore
limsprrange = $limsprrange
meanbrlenmuts = $meanbrlenmuts
gammashapebrlen = $gammashapebrlen
gammashapemodel = $gammashapemodel
uniqueswapbias = $uniqueswapbias
distanceswapbias = 1.0
</configfile>
</configfiles>

<help>

**What it does**

GARLI is a program that performs phylogenetic inference using the
maximum-likelihood criterion. Several sequence types are supported,
including nucleotide, amino acid and codon. Version 2.0 adds support
for partitioned models and morphology-like datatypes.

Garli is written and maintained by Derrick Zwickl

Configuration options are adapted from
https://www.nescent.org/wg_garli/GARLI_Configuration_Settings

-----

**Detailed description of the configuration options**


**Analysis Type**

    Specify whether to perform a maximum likelihood search for the best tree, or
    a bootstrap analysis.


**Number of replicates**

    Number of independent search replicates to run.


**Relative size of resample data**

    This setting allows for bootstrap-like resampling, but with the
    psuedoreplicate datasets having the number of alignment columns different
    from the real data. Setting values below 1.0 is somewhat similar to
    jackknifing, but not identical.


**Attachment branches evaluated per taxon (min=1)**

    The number of attachment branches evaluated for each taxon to be added to
    the tree during the creation of an ML stepwise-addition starting tree.
    Briefly, stepwise addition is an algorithm used to make a tree, and involves
    adding taxa in a random order to a growing tree. For each taxon to be added,
    a number of randomly chosen attachment branches are tried and scored, and
    then the best scoring one is chosen as the location of that taxon. This
    setting controls how many attachment points are evaluated for each taxon to
    be added. A value of one is equivalent to a completely random tree (only one
    randomly chosen location is evaluated). A value of greater than 2 times the
    number of taxa in the dataset means that all attachment points will be
    evaluated for each taxon, and will result in very good starting trees (but
    may take a while on large datasets). Even fairly small values (less than 10)
    can result in starting trees that are much, much better than random, but
    still fairly different from one another.


**Constraint file**

     Select a file containing constraint specifications.


**Random seed**

    Random see can have a value of -1 or a positive integer. The random number
    seed used by the random number generator. Specify “–1” to have a seed chosen
    for you. Specifying the same seed number in multiple runs will give exactly
    identical results, if all other parameters and settings are also identical.


**Available memory**

    This lets GARLI determine how much system memory it may be able to use to
    store computations for reuse.


**Perform initial rough optimization**

    Specifies whether some initial rough optimization is performed on the
    starting branch lengths and rate heterogeneity parameters. This is always
    recommended.


**Outgroup taxa numbers**

    The outgroup option allows for orienting tree topologies in a consistent way
    when they are written to a file. Note that this has NO effect whatsoever on
    the actual inference and the specified outgroup is NOT constrained to be
    present in the inferred trees. If multiple outgroup taxa are specified and
    they do not form a monophyletic group, this setting will be ignored. If you
    specify a single outgroup taxon it will always be present, and the tree will
    always be consistently oriented. To specify an outgroup consisting of taxa
    1, 3 and 5 the format is this: outgroup = 1 3 5. Dashes are used for ranges
    e.g. 1-3 5.


**Collapse branches**

    Before version 1.0, all trees that are returned were fully resolved. This is
    true even if the maximum-likelihood estimate of some internal branch lengths
    are effectively zero (or GARLI's minimum, which is 1e-8). In such cases,
    collapsing the branch into a polytomy would be a better representation. Note
    that GARLI will never return a tree with an actual branch length of zero,
    but rather with its minimum value of 1.0e-8. The drawback of always
    returning fully resolved trees is that what is effectively a polytomy can be
    resolved in three ways, and different independent searches may randomly
    return one of those resolutions. Thus, if you compare the trees by topology
    only, they will look different. If you pay attention to the branch lengths
    and likelihood scores of the trees it will be apparent that they are
    effectively the same. I think that collapsing of branches is particularly
    important when bootstrapping, since no support should be given to a branch
    that doesn't really exist, i.e., that is a random resolution of a polytomy.
    Collapsing is also good when calculating tree to tree distances such as the
    symmetric tree distance, for example when calculating phylogenetic error to
    a known target tree. Zero-length branches would add to the distances
    (~error) although they really should not.


**Model type**

    The codon-aminoacid datatype means that the data will be supplied as a
    nucleotide alignment, but will be internally translated and analyzed using
    an amino acid model. The codon and codon-aminoacid datatypes require
    nucleotide sequence that is aligned in the correct reading frame. In other
    words, all gaps in the alignment should be a multiple of 3 in length, and
    the alignment should start at the first position of a codon. If the
    alignment has extra columns at the start, middle or end, they should be
    removed or excluded with a Nexus exset (see the FAQ for an example of exset
    usage). The correct Genetic Code must also be set.


**Datatype - nucleotide**

**Rate matrix**

    The number of relative substitution rate parameters (note that the number of
    free parameters is this value minus one). Equivalent to the “nst” setting in
    PAUP* and MrBayes. 1rate assumes that substitutions between all pairs of
    nucleotides occur at the same rate (JC model), 2rate allows different rates
    for transitions and transversions (K2P or HKY models), and 6rate allows a
    different rate between each nucleotide pair (GTR). These rates are estimated
    unless the fixed option is chosen. Since version 0.96, parameters for any
    submodel of the GTR model may be estimated. The format for specifying this
    is very similar to that used in the “rclass’ setting of PAUP*. Within
    parentheses, six letters are specified, with spaces between them. The six
    letters represent the rates of substitution between the six pairs of
    nucleotides, with the order being A-C, A-G, A-T, C-G, C-T and G-T. Letters
    within the parentheses that are the same mean that a single parameter is
    shared by multiple nucleotide pairs.


**State frequences**

    Specifies how the equilibrium state frequencies (A, C, G and T) are treated.
    The empirical setting fixes the frequencies at their observed proportions,
    and the other options should be self-explanatory.


**Datatype - nucleotide or amino-acid**


**Treatment of proportion of invariable sites parameter**

    Specifies whether a parameter representing the proportion of sites that are
    unable to change (i.e. have a substitution rate of zero) will be included.
    This is typically referred to as 'invariant sites', but would better be
    termed 'invariable sites'.


**Rate heterogeneity type**

    (none, gamma, gammafixed) – The model of rate heterogeneity assumed.
    “gammafixed” requires that the alpha shape parameter is provided, and a
    setting of “gamma” estimates it.


**Number of discrete dN/dS categories**

    The number of categories of variable rates (not including the invariant site
    class if it is being used). Must be set to 1 if ratehetmodel is set to none.
    Note that runtimes and memory usage scale linearly with this setting.


**Datatype - amino-acid or codon-aminoacid**

**Rate matrix**

    (poisson, jones, dayhoff, wag, mtmam, mtrev) – The fixed amino acid rate
    matrix to use. You should use the matrix that gives the best likelihood, and
    could use a program like PROTTEST (very much like MODELTEST, but for amino
    acid models) to determine which fits best for your data. Poisson assumes a
    single rate of substitution between all amino acid pairs, and is a very poor
    model.


**Equilibrium Base Frequences **

    (equal, empirical, estimate, fixed, jones, dayhoff, wag, mtmam, mtrev) –
    Specifies how the equilibrium state frequencies of the 20 amino acids are
    treated. The “empirical” option fixes the frequencies at their observed
    proportions (when describing a model this is often termed '+F').


**Number of discrete dN/dS categories**

    The number of categories of variable rates (not including the invariant site
    class if it is being used). Must be set to 1 if ratehetmodel is set to none.
    Note that runtimes and memory usage scale linearly with this setting.


**Treatment of proportion of invariable sites parameter**

    Specifies whether a parameter representing the proportion of sites that are
    unable to change (i.e. have a substitution rate of zero) will be included.
    This is typically referred to as 'invariant sites', but would better be
    termed 'invariable sites'.


**Datatype - codon**


**Rate matrix**

    (1rate, 2rate, 6rate, fixed, custom string) – This determines the relative
    rates of nucleotide substitution assumed by the codon model. The options are
    exactly the same as those allowed under a normal nucleotide model. A codon
    model with ratematrix = 2rate specifies the standard Goldman and Yang (1994)
    model, with different substitution rates for transitions and transversions.


**State frequences**

    The options are to use equal codon frequencies (not a good option), the
    frequencies observed in your dataset (termed “empirical” in GARLI), or the
    codon frequencies implied by the “F1x4” or “F3x4” methods (using PAML
    terminology). These last two options calculate the codon frequencies as the
    product of the frequencies of the three nucleotides that make up each codon.
    In the “F1x4” case the nucleotide frequencies are those observed in the
    dataset across all codon positions, while the “F3x4” option uses the
    nucleotide frequencies observed in the data at each codon position
    separately.


**Rate Heterogeneity Type**

    For codon models, the default is to infer a single dN/dS parameter.
    Alternatively, a model can be specified that infers a given number of dN/dS
    categories, with the dN/dS values and proportions falling in each category
    estimated (ratehetmodel = nonsynonymous). This is the 'discrete' or 'M3'
    model of Yang et al., 2000.


**Number of discrete dN/dS categories**

    When ratehetmodel = nonsynonymous, this is the number of dN/dS parameter
    categories.


**Datatype - codon or codon-aminoacid**


**Genetic code**

    The genetic code to be used in translating codons into amino acids.


**Population Settings**


**Number of individuals in population**

    The number of individuals in the population. This may be increased, but
    doing so is generally not beneficial. Note that typical genetic algorithms
    tend to have much, much larger population sizes than GARLI defaults.


**Unmutated copies of best individual**

    The number of times the best individual is copied to the next generation
    with no chance of mutation. It is best not to mess with this setting.


**Strength of selection**

    Controls the strength of selection, with larger numbers denoting stronger
    selection. The relative probability of reproduction of two individuals
    depends on the difference in their log likelihoods (ΔlnL) and is formulated
    very similarly to the procedure of calculating Akaike weights.


**Fitness handicap for the best individual**

    This can be used to bias the probability of reproduction of the best
    individual downward. Because the best individual is automatically copied
    into the next generation, it has a bit of an unfair advantage and can cause
    all population variation to be lost due to genetic drift, especially with
    small populations sizes. The value specified here is subtracted from the
    best individual’s lnL score before calculating the probabilities of
    reproduction. It seems plausible that this might help maintain variation,
    but I have not seen it cause a measurable effect.


**Maximum number of generations to run**

    Use if automatic termination is desired to prevent a runaway process.


**Maximum time to run**

    The maximum number of seconds for the run to continue. Use if automatic
    termination is desired to prevent a runaway process.


**Branch-length optimization settings**


**Minimal optimization precision**

    The minimum allowed value of the optimization precision - must not be larger
    then the Starting optimization precision.


**Number of steps down from Start Precision to Minimum Precision**

    Specify the number of steps that it will take for the optimization precision
    to decrease (linearly) from startoptrec to minoptprec.


**Tree rejection threshold**

    This setting controls which trees have more extensive branch-length
    optimization applied to them. All trees created by a branch swap receive
    optimization on a few branches that directly took part in the rearrangement.
    If the difference in score between the partially optimized tree and the best
    known tree is greater than treerejectionthreshold, no further optimization
    is applied to the branches of that tree. Reducing this value can
    significantly reduce runtimes, often with little or no effect on results.
    However, it is possible that a better tree could be missed if this is set
    too low. In cases in which obtaining the very best tree per search is not
    critical (e.g., bootstrapping), setting this lower (~20) is probably safe.


**Settings controlling the proportions of the mutation types**


**Weight on topology mutations**

    The prior weight assigned to the class of topology mutations (NNI, SPR and
    limSPR). Note that setting this to 0.0 turns off topology mutations, meaning
    that the tree topology is fixed for the run. This used to be a way to have
    the program estimate only model parameters and branch-lengths, but the
    optimizeinputonly setting is now a better way to go.


**Weight on model parameter mutations**

    The prior weight assigned to the class of model mutations. Note that setting
    this at 0.0 fixes the model during the run.


**Weight on branch-length parameter mutations**

    The prior weight assigned to branch-length mutations. The same procedure
    used above to determine the proportion of Topology:Model:Branch-Length
    mutations is also used to determine the relative proportions of the three
    types of topological mutations (NNI:SPR:limSPR), controlled by the following
    three weights. Note that the proportion of mutations applied to each of the
    model parameters is not user controlled.


**Weight on NNI topology changes**

    The prior weight assigned to NNI mutations


**Weight on SPR topology changes**

    The prior weight assigned to random SPR mutations. For very large datasets
    it is often best to set this to 0.0, as random SPR mutations essentially
    never result in score increases.


**Weight on localized SPR topology changes**

    The prior weight assigned to SPR mutations with the reconnection branch
    limited to being a maximum of limsprrange branches away from where the
    branch was detached.


**Interval Length**

    The number of generations in each interval during which the number and
    benefit of each mutation type are stored.


**Number of intervals to store**

    The number of intervals to be stored. Thus, records of mutations are kept
    for the last (intervallength x intervalstostore) generations. Every
    intervallength generations the probabilities of the mutation types are
    updated by the scheme described above.


**Settings controlling mutation details**


**Max range for localized SPR topology changes**

    The maximum number of branches away from its original location that a branch
    may be reattached during a limited SPR move. Setting this too high (&gt; 10)
    can seriously degrade performance, but if you do so in conjunction with a
    large increase in genthreshfort.


**Settings controlling mutation details**

    The mean of the binomial distribution from which the number of branch
    lengths mutated is drawn during a branch length mutation.


**Magnitude of branch-length mutations**

    The shape parameter of the gamma distribution (with a mean of 1.0) from
    which the branch-length multipliers are drawn for branch-length mutations.
    Larger numbers cause smaller changes in branch lengths. (Note that this has
    nothing to do with gamma rate heterogeneity.)


**Magnitude of model parameter mutations**

    The shape parameter of the gamma distribution (with a mean of 1.0) from
    which the model mutation multipliers are drawn for model parameters
    mutations. Larger numbers cause smaller changes in model parameters. (Note
    that this has nothing to do with gamma rate heterogeneity.)


**Relative weight assigned to already attempted branch swaps**

    With version 0.95 and later, GARLI keeps track of which branch swaps it has
    attempted on the current best tree. Because swaps are applied randomly, it
    is possible that some swaps are tried twice before others are tried at all.
    This option allows the program to bias the swaps applied toward those that
    have not yet been attempted. Each swap is assigned a relative weight
    depending on the number of times that it has been attempted on the current
    best tree. This weight is equal to (uniqueswapbias) raised to the (# times
    swap attempted) power. In other words, a value of 0.5 means that swaps that
    have already been tried once will be half as likely as those not yet
    attempted, swaps attempted twice will be ¼ as likely, etc. A value of 1.0
    means no biasing. Use of this option may allow the use of somewhat larger
    values of limsprrange.


**Relative weight assigned to branch swaps based on locality**

    This option is similar to uniqueswapbias, except that it biases toward
    certain swaps based on the topological distance between the initial and
    rearranged trees. The distance is measured as in the limsprrange, and is
    half the the Robinson-Foulds distance between the trees. As with
    uniqueswapbias, distanceswapbias assigns a relative weight to each potential
    swap. In this case the weight is (distanceswapbias) raised to the
    (reconnection distance - 1) power. Thus, given a value of 0.5, the weight of
    an NNI is 1.0, the weight of an SPR with distance 2 is 0.5, with distance 3
    is 0.25, etc. Note that values less than 1.0 bias toward more localized
    swaps, while values greater than 1.0 bias toward more extreme swaps. Also
    note that this bias is only applied to limSPR rearrangements. Be careful in
    setting this, as extreme values can have a very large effect.

</help>
</tool>
author	malex
date	Fri, 02 Dec 2011 17:06:40 -0500
parents
children	681e9bb51cc4