Mercurial > repos > miller-lab > genome_diversity
comparison average_fst.xml @ 24:248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Tue, 28 May 2013 16:24:19 -0400 |
parents | 95a05c1ef5d5 |
children | 8997f2ca8c7a |
comparison
equal
deleted
inserted
replaced
23:66a183c44dd5 | 24:248b06e86022 |
---|---|
1 <tool id="gd_average_fst" name="Overall FST" version="1.2.0"> | 1 <tool id="gd_average_fst" name="Overall FST" version="1.3.0"> |
2 <description>: Estimate the relative fixation index between two populations</description> | 2 <description>: Estimate the relative fixation index between two populations</description> |
3 | 3 |
4 <command interpreter="python"> | 4 <command interpreter="python"> |
5 average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$output" | 5 average_fst.py "$input" "$p1_input" "$p2_input" |
6 #if $use_randomization.ur_choice == '1' | 6 #if $input_type.choice == '0' |
7 "gd_snp" "$input_type.data_source.choice" | |
8 #if $input_type.data_source.choice == '0' | |
9 "$input_type.data_source.min_value" | |
10 #else if $input_type.data_source.choice == '1' | |
11 "1" | |
12 #end if | |
13 #else if $input_type.choice == '1' | |
14 "gd_genotype" "1" "1" | |
15 #end if | |
16 "$discard_fixed" "$output" | |
17 #if $use_randomization.choice == '0' | |
18 "0" "/dev/null" | |
19 #else if $use_randomization.choice == '1' | |
7 "$use_randomization.shuffles" "$use_randomization.p0_input" | 20 "$use_randomization.shuffles" "$use_randomization.p0_input" |
8 #else | |
9 "0" "/dev/null" | |
10 #end if | 21 #end if |
11 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) | 22 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) |
12 #set $arg = '%s:%s' % ($individual_col, $individual) | 23 #set $arg = '%s:%s' % ($individual_col, $individual) |
13 "$arg" | 24 "$arg" |
14 #end for | 25 #end for |
15 </command> | 26 </command> |
16 | 27 |
17 <inputs> | 28 <inputs> |
18 <param name="input" type="data" format="gd_snp" label="SNP dataset" /> | 29 <conditional name="input_type"> |
30 <param name="choice" type="select" format="integer" label="Input format"> | |
31 <option value="0" selected="true">gd_snp</option> | |
32 <option value="1">gd_genotype</option> | |
33 </param> | |
34 | |
35 <when value="0"> | |
36 <param name="input" type="data" format="gd_snp" label="SNP dataset" /> | |
37 | |
38 <conditional name="data_source"> | |
39 <param name="choice" type="select" format="integer" label="Frequency metric"> | |
40 <option value="0">sequence coverage</option> | |
41 <option value="1" selected="true">estimated genotype</option> | |
42 </param> | |
43 | |
44 <when value="0"> | |
45 <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" /> | |
46 </when> | |
47 | |
48 <when value="1"/> | |
49 </conditional> | |
50 </when> | |
51 | |
52 <when value="1"> | |
53 <param name="input" type="data" format="gd_genotype" label="Genotype dataset" /> | |
54 </when> | |
55 </conditional> | |
56 | |
19 <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" /> | 57 <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" /> |
20 <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" /> | 58 <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" /> |
21 | |
22 <conditional name="data_source"> | |
23 <param name="ds_choice" type="select" format="integer" label="Frequency metric"> | |
24 <option value="0">sequence coverage</option> | |
25 <option value="1" selected="true">estimated genotype</option> | |
26 </param> | |
27 <when value="0"> | |
28 <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" /> | |
29 </when> | |
30 <when value="1"> | |
31 <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" /> | |
32 </when> | |
33 </conditional> | |
34 | 59 |
35 <param name="discard_fixed" type="select" label="For SNPs that appear to be fixed across both populations"> | 60 <param name="discard_fixed" type="select" label="For SNPs that appear to be fixed across both populations"> |
36 <option value="0">retain</option> | 61 <option value="0">retain</option> |
37 <option value="1" selected="true">delete</option> | 62 <option value="1" selected="true">delete</option> |
38 </param> | 63 </param> |
39 | 64 |
40 <conditional name="use_randomization"> | 65 <conditional name="use_randomization"> |
41 <param name="ur_choice" type="select" format="integer" label="Use randomization"> | 66 <param name="choice" type="select" format="integer" label="Use randomization"> |
42 <option value="0" selected="true">no</option> | 67 <option value="0" selected="true">no</option> |
43 <option value="1">yes</option> | 68 <option value="1">yes</option> |
44 </param> | 69 </param> |
45 <when value="0" /> | 70 <when value="0" /> |
46 <when value="1"> | 71 <when value="1"> |
60 <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> | 85 <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> |
61 <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> | 86 <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> |
62 <param name="ds_choice" value="0" /> | 87 <param name="ds_choice" value="0" /> |
63 <param name="min_value" value="3" /> | 88 <param name="min_value" value="3" /> |
64 <param name="discard_fixed" value="1" /> | 89 <param name="discard_fixed" value="1" /> |
65 <param name="ur_choice" value="0" /> | 90 <param name="choice" value="0" /> |
66 <output name="output" file="test_out/average_fst/average_fst.txt" /> | 91 <output name="output" file="test_out/average_fst/average_fst.txt" /> |
67 </test> | 92 </test> |
68 </tests> | 93 </tests> |
69 | 94 |
70 <help> | 95 <help> |
71 | 96 |
72 **Dataset formats** | 97 **Dataset formats** |
73 | 98 |
74 The input datasets are in gd_snp_ and gd_indivs_ formats. | 99 The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats. |
75 The output dataset is in text_ format. (`Dataset missing?`_) | 100 The output dataset is in text_ format. (`Dataset missing?`_) |
76 | 101 |
77 .. _gd_snp: ./static/formatHelp.html#gd_snp | 102 .. _gd_snp: ./static/formatHelp.html#gd_snp |
103 .. _gd_genotype: ./static/formatHelp.html#gd_genotype | |
78 .. _gd_indivs: ./static/formatHelp.html#gd_indivs | 104 .. _gd_indivs: ./static/formatHelp.html#gd_indivs |
79 .. _text: ./static/formatHelp.html#text | 105 .. _text: ./static/formatHelp.html#text |
80 .. _Dataset missing?: ./static/formatHelp.html | 106 .. _Dataset missing?: ./static/formatHelp.html |
81 | 107 |
82 ----- | 108 ----- |
83 | 109 |
84 **What it does** | 110 **What it does** |
85 | 111 |
86 The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to specify individuals from a SNP table. No individual can be in both populations. Other choices are as follows. | 112 The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to specify individuals from a SNP table. No individual can be in both populations. Other choices are as follows. |
87 | 113 |
88 Frequency metric. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele, or by adding the frequencies inferred from genotypes of individuals in the populations. | 114 Frequency metric. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele (if the table is in gd_snp format, but not with gd_genotype), or by adding the frequencies inferred from genotypes of individuals in the populations. |
89 | 115 |
90 After specifying the frequency metric, the user sets lower bounds on amount of data required at a SNP. For estimating the FST using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. SNPs not meeting these lower bounds are ignored. | 116 After specifying the frequency metric, the user sets lower bounds on amount of data required at a SNP. For estimating the FST using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. SNPs not meeting these lower bounds are ignored. |
91 | 117 |
92 The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded. | 118 The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded. |
93 | 119 |