comparison phylogenetic_tree.xml @ 0:2c498d40ecde

Uploaded
author miller-lab
date Mon, 09 Apr 2012 12:03:06 -0400
parents
children 7a94f11fe71f
comparison
equal deleted inserted replaced
-1:000000000000 0:2c498d40ecde
1 <tool id="gd_phylogenetic_tree" name="Phylogenetic" version="1.0.0">
2 <description>tree</description>
3
4 <command interpreter="python">
5 phylogenetic_tree.py "$input"
6 #if $individuals.choice == '0'
7 "all_individuals"
8 #else if $individuals.choice == '1'
9 "$p1_input"
10 #end if
11 "$output" "$output.extra_files_path" "$minimum_coverage" "$minimum_quality"
12 #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
13 "none"
14 #else
15 "$input.metadata.dbkey"
16 #end if
17 "$data_source"
18 #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
19 #if $draw_tree_options == ''
20 ""
21 #else
22 "-$draw_tree_options"
23 #end if
24 #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
25 #set $arg = '%s:%s' % ($individual_col, $individual_name)
26 "$arg"
27 #end for
28 </command>
29
30 <inputs>
31 <param name="input" type="data" format="wsf" label="SNP table" />
32
33 <conditional name="individuals">
34 <param name="choice" type="select" label="Individuals">
35 <option value="0" selected="true">All</option>
36 <option value="1">Individuals in a population</option>
37 </param>
38 <when value="0" />
39 <when value="1">
40 <param name="p1_input" type="data" format="ind" label="Population individuals" />
41 </when>
42 </conditional>
43
44 <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" help="Note: Minimum coverage and Minimum quality cannot both be 0" />
45
46 <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: Minimum coverage and Minimum quality cannot both be 0" />
47
48 <param name="include_reference" type="select" format="integer" label="Include reference sequence">
49 <option value="1" selected="true">Yes</option>
50 <option value="0">No</option>
51 </param>
52
53 <param name="data_source" type="select" format="integer" label="Data source">
54 <option value="0" selected="true">sequence coverage</option>
55 <option value="1">estimated genotype</option>
56 </param>
57
58 <param name="branch_style" type="select" display="radio">
59 <label>Branch type</label>
60 <option value="" selected="true">square</option>
61 <option value="d">diagonal</option>
62 </param>
63
64 <param name="scale_style" type="select" display="radio">
65 <label>Draw branches to scale</label>
66 <option value="" selected="true">yes</option>
67 <option value="s">no</option>
68 </param>
69
70 <param name="length_style" type="select" display="radio">
71 <label>Show branch lengths</label>
72 <option value="" selected="true">yes</option>
73 <option value="b">no</option>
74 </param>
75
76 <param name="layout_style" type="select" display="radio">
77 <label>Tree layout</label>
78 <option value="" selected="true">horizontal</option>
79 <option value="v">vertical</option>
80 </param>
81 </inputs>
82
83 <outputs>
84 <data name="output" format="html" />
85 </outputs>
86
87 <tests>
88 <test>
89 <param name="input" value="test_in/sample.wsf" ftype="wsf" />
90 <param name="choice" value="0" />
91 <param name="minimum_coverage" value="3" />
92 <param name="minimum_quality" value="30" />
93 <param name="data_source" value="0" />
94 <param name="branch_style" value="" />
95 <param name="scale_style" value="" />
96 <param name="length_style" value="" />
97 <param name="layout_style" value="" />
98 <output name="output" file="test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2">
99 <extra_files type="file" name="distance_matrix.phylip" value="test_out/phylogenetic_tree/distance_matrix.phylip" />
100 <extra_files type="file" name="informative_snps.txt" value="test_out/phylogenetic_tree/informative_snps.txt" />
101 <extra_files type="file" name="mega_distance_matrix.txt" value="test_out/phylogenetic_tree/mega_distance_matrix.txt" />
102 <extra_files type="file" name="phylogenetic_tree.newick" value="test_out/phylogenetic_tree/phylogenetic_tree.newick" />
103 <extra_files type="file" name="tree.pdf" value="test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/>
104 </output>
105 </test>
106 </tests>
107
108 <help>
109 **What it does**
110
111 This tool uses a SNP table to determine a kind of "genetic distance" between
112 each pair of individuals. Optionally, that information can be used to
113 produce a tree-shaped figure that depicts how the individuals are related,
114 either as a text file in a common format, called NEWICK, or as a picture.
115 The user specifies the following inputs to the tool.
116
117 SNP table
118
119 Individuals
120 By default, all individuals are included in the analysis; an option
121 is to analyze only a subset of individuals that has been specified
122 using the tool to "Select individuals from a SNP table".
123
124 Minimum coverage
125 For each pair of individuals, the tool looks for informative SNPs, i.e.,
126 where the sequence data for both individuals is adequate according to
127 some criterion. Specifying, say, 7 for this option instructs the tool
128 to consider only SNPs with coverage at least 7 in both individuals
129 when estimating their "genetic distance".
130
131 Minimum quality
132 Specifying, say, 37 for this option instructs the tool to consider
133 only SNPs with SAMtools quality value at least 37 in both individuals
134 when estimating their "genetic distance".
135
136 Minimum number of informative SNPs
137 This option instructs the tool to terminate execution if at least one
138 pair of individuals does not have a required number of informative SNPs.
139
140 Include reference sequence
141 For SNP tables with a reference sequence, the user can ask that the
142 reference be indicated in the tree, to help with rooting it. If the
143 SNP table has no reference sequence, this option has no effect.
144
145 Data source
146 The genetic distance between two individuals at a given SNP can
147 be estimated two ways. One method is to use the absolute value of
148 difference in the frequency of the first allele (equivalently: the
149 second allele). For instance, if the first individual has 5 reads of
150 each allele and the second individual has respectively 3 and 6 reads,
151 then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
152 SNP. The other approach is to use the SAMtools genotypes to estimate
153 the difference in the number of occurrences of the first allele.
154 For instance, if the two genotypes are 2 and 1, i.e., the individuals
155 are estimated to have respectively 2 and 1 occurrences of the first
156 allele at this location, then the distance is 1 (the absolute value
157 of the difference of the two numbers).
158
159 Output format
160 There are three options, as described above.
161
162 **Acknowledgments**
163
164 To convert the distance matrix to a NEWICK-formatted tree, we use the QuickTree program, downloaded from: http://www.sanger.ac.uk/resources/software/quicktree/
165
166 To draw the tree, we use the program draw_tree, downloaded from: http://compgen.bscb.cornell.edu/phast/
167 </help>
168 </tool>