Mercurial > repos > miller-lab > genome_diversity
diff average_fst.xml @ 14:8ae67e9fb6ff
Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:35:56 -0400 |
parents | |
children | f04f40a36cc8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/average_fst.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,113 @@ +<tool id="gd_average_fst" name="Overall FST" version="1.0.0"> + <description>: Estimate the relative fixation index between two populations</description> + + <command interpreter="python"> + average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output" + #if $use_randomization.ur_choice == '1' + "$use_randomization.shuffles" "$use_randomization.p0_input" + #else + "0" "/dev/null" + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP table" /> + <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" /> + <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" /> + + <conditional name="data_source"> + <param name="ds_choice" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage and ..</option> + <option value="1">estimated genotype and ..</option> + </param> + <when value="0"> + <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" /> + </when> + <when value="1"> + <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" /> + </when> + </conditional> + + <param name="discard_fixed" type="select" label="Apparently fixed SNPs"> + <option value="0">Retain SNPs that appear fixed in the two populations</option> + <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option> + </param> + + <param name="biased" type="select" label="FST estimator"> + <option value="0" selected="true">Wright's original definition</option> + <option value="1">Weir's unbiased estimator</option> + </param> + + <conditional name="use_randomization"> + <param name="ur_choice" type="select" format="integer" label="Use randomization"> + <option value="0" selected="true">No</option> + <option value="1">Yes</option> + </param> + <when value="0" /> + <when value="1"> + <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" /> + <param name="p0_input" type="data" format="gd_indivs" label="Individuals for randomization" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="txt" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> + <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> + <param name="ds_choice" value="0" /> + <param name="min_value" value="3" /> + <param name="discard_fixed" value="1" /> + <param name="biased" value="0" /> + <param name="ur_choice" value="0" /> + <output name="output" file="test_out/average_fst/average_fst.txt" /> + </test> + </tests> + + <help> + +**What it does** + +The user specifies a SNP table and two "populations" of individuals, +both previously defined using the Specify Individuals tool. +No individual can be in both populations. Other choices are as follows. + +Data source. The allele frequencies of a SNP in the two populations can be +estimated either by the total number of reads of each allele, or by adding +the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount +of data required at a SNP. For estimating the Fst using read counts, +the bound is the minimum count of reads of the two alleles in a population. +For estimations based on genotype, the bound is the minimum reported genotype +quality per individual. SNPs not meeting these lower bounds are ignored. + +The user specifies whether SNPs where both populations appear to be fixed +for the same allele should be retained or discarded. + +The user chooses which definition of Fst to use: Wright's original definition +or Weir's unbiased estimator. + +Finally, the user decides whether to use randomizations. If so, then the +user specifies how many randomly generated population pairs (retaining +the numbers of individuals of the originals) to generate, as well as the +"population" of additional individuals (not in the first two populations) +that can be used in the randomization process. + +The program prints the average Fst for the original populations and the +number of SNPs used to compute it. If randomizations were requested, +it prints the average Fst for each randomly generated population pair, +ending with a summary that includes the maximum and average value, and the +highest-scoring population pair. + + </help> +</tool>