view phylogenetic_tree.xml @ 7:e29f4d801bb0

change wsf -> snp; wpf -> sap
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 18 Apr 2012 11:12:21 -0400
parents 7a94f11fe71f
children 9b92372de9f6
line wrap: on
line source

<tool id="gd_phylogenetic_tree" name="Phylogenetic" version="1.0.0">
  <description>tree</description>

  <command interpreter="python">
    phylogenetic_tree.py "$input"
    #if $individuals.choice == '0'
      "all_individuals"
    #else if $individuals.choice == '1'
      "$p1_input"
    #end if
    "$output" "$output.files_path" "$minimum_coverage" "$minimum_quality"
	#if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
        "none"
    #else
        "$input.metadata.dbkey"
    #end if
    "$data_source"
    #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
    #if $draw_tree_options == ''
        ""
    #else
        "-$draw_tree_options"
    #end if
    #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
        #set $arg = '%s:%s' % ($individual_col, $individual_name)
        "$arg"
    #end for
  </command>

  <inputs>
    <param name="input" type="data" format="snp" label="SNP table" />

    <conditional name="individuals">
      <param name="choice" type="select" label="Individuals">
        <option value="0" selected="true">All</option>
        <option value="1">Individuals in a population</option>
      </param>
      <when value="0" />
      <when value="1">
        <param name="p1_input" type="data" format="ind" label="Population individuals" />
      </when>
    </conditional>

    <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" help="Note: Minimum coverage and Minimum quality cannot both be 0" />

    <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: Minimum coverage and Minimum quality cannot both be 0" />

    <param name="include_reference" type="select" format="integer" label="Include reference sequence">
      <option value="1" selected="true">Yes</option>
      <option value="0">No</option>
    </param>

    <param name="data_source" type="select" format="integer" label="Data source">
      <option value="0" selected="true">sequence coverage</option>
      <option value="1">estimated genotype</option>
    </param>

    <param name="branch_style" type="select" display="radio">
      <label>Branch type</label>
      <option value="" selected="true">square</option>
      <option value="d">diagonal</option>
    </param>
     
    <param name="scale_style" type="select" display="radio">
      <label>Draw branches to scale</label>
      <option value="" selected="true">yes</option>
      <option value="s">no</option>
    </param>
     
    <param name="length_style" type="select" display="radio">
      <label>Show branch lengths</label>
      <option value="" selected="true">yes</option>
      <option value="b">no</option>
    </param>
     
    <param name="layout_style" type="select" display="radio">
      <label>Tree layout</label>
      <option value="" selected="true">horizontal</option>
      <option value="v">vertical</option>
    </param>
  </inputs>

  <outputs>
    <data name="output" format="html" />
  </outputs>

  <tests>
    <test>
      <param name="input" value="test_in/sample.snp" ftype="snp" />
      <param name="choice" value="0" />
      <param name="minimum_coverage" value="3" />
      <param name="minimum_quality" value="30" />
      <param name="data_source" value="0" />
      <param name="branch_style" value="" />
      <param name="scale_style" value="" />
      <param name="length_style" value="" />
      <param name="layout_style" value="" />
      <output name="output" file="test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2">
        <extra_files type="file" name="distance_matrix.phylip" value="test_out/phylogenetic_tree/distance_matrix.phylip" />
        <extra_files type="file" name="informative_snps.txt" value="test_out/phylogenetic_tree/informative_snps.txt" />
        <extra_files type="file" name="mega_distance_matrix.txt" value="test_out/phylogenetic_tree/mega_distance_matrix.txt" />
        <extra_files type="file" name="phylogenetic_tree.newick" value="test_out/phylogenetic_tree/phylogenetic_tree.newick" />
        <extra_files type="file" name="tree.pdf" value="test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/>
      </output>
    </test>
  </tests>

  <help>
**What it does**

This tool uses a SNP table to determine a kind of "genetic distance" between
each pair of individuals.  Optionally, that information can be used to
produce a tree-shaped figure that depicts how the individuals are related,
either as a text file in a common format, called NEWICK, or as a picture.
The user specifies the following inputs to the tool.

SNP table

Individuals
  By default, all individuals are included in the analysis; an option
  is to analyze only a subset of individuals that has been specified
  using the tool to "Select individuals from a SNP table".

Minimum coverage
  For each pair of individuals, the tool looks for informative SNPs, i.e.,
  where the sequence data for both individuals is adequate according to
  some criterion.  Specifying, say, 7 for this option instructs the tool
  to consider only SNPs with coverage at least 7 in both individuals
  when estimating their "genetic distance".

Minimum quality
  Specifying, say, 37 for this option instructs the tool to consider
  only SNPs with SAMtools quality value at least 37 in both individuals
  when estimating their "genetic distance".

Minimum number of informative SNPs
  This option instructs the tool to terminate execution if at least one
  pair of individuals does not have a required number of informative SNPs.

Include reference sequence
  For SNP tables with a reference sequence, the user can ask that the
  reference be indicated in the tree, to help with rooting it.  If the
  SNP table has no reference sequence, this option has no effect.

Data source
  The genetic distance between two individuals at a given SNP can
  be estimated two ways.  One method is to use the absolute value of
  difference in the frequency of the first allele (equivalently: the
  second allele).  For instance, if the first individual has 5 reads of
  each allele and the second individual has respectively 3 and 6 reads,
  then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
  SNP.  The other approach is to use the SAMtools genotypes to estimate
  the difference in the number of occurrences of the first allele.
  For instance, if the two genotypes are 2 and 1, i.e., the individuals
  are estimated to have respectively 2 and 1 occurrences of the first
  allele at this location, then the distance is 1 (the absolute value
  of the difference of the two numbers).

Output format
  There are three options, as described above.

**Acknowledgments**

To convert the distance matrix to a NEWICK-formatted tree, we use the QuickTree program, downloaded from: http://www.sanger.ac.uk/resources/software/quicktree/

To draw the tree, we use the program draw_tree, downloaded from: http://compgen.bscb.cornell.edu/phast/
  </help>
</tool>