diff average_fst.xml @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/average_fst.xml	Wed Sep 12 17:10:26 2012 -0400
@@ -0,0 +1,112 @@
+<tool id="gd_average_fst" name="Average FST" version="1.0.0">
+  <description>of two populations</description>
+
+  <command interpreter="python">
+    average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output"
+    #if $use_randomization.ur_choice == '1'
+      "$use_randomization.shuffles" "$use_randomization.p0_input"
+    #else
+      "0" "/dev/null"
+    #end if
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP table" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
+
+    <conditional name="data_source">
+      <param name="ds_choice" type="select" format="integer" label="Data source">
+          <option value="0" selected="true">sequence coverage and ..</option>
+          <option value="1">estimated genotype and ..</option>
+      </param>
+      <when value="0">
+        <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" />
+      </when>
+      <when value="1">
+        <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" />
+      </when>
+    </conditional>
+
+    <param name="discard_fixed" type="select" label="Apparently fixed SNPs">
+      <option value="0">Retain SNPs that appear fixed in the two populations</option>
+      <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option>
+    </param>
+
+    <param name="biased" type="select" label="FST estimator">
+      <option value="0" selected="true">Wright's original definition</option>
+      <option value="1">Weir's unbiased estimator</option>
+    </param>
+
+    <conditional name="use_randomization">
+      <param name="ur_choice" type="select" format="integer" label="Use randomization">
+        <option value="0" selected="true">No</option>
+        <option value="1">Yes</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" />
+        <param name="p0_input" type="data" format="gd_indivs" label="Individuals for randomization" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="ds_choice" value="0" />
+      <param name="min_value" value="3" />
+      <param name="discard_fixed" value="1" />
+      <param name="biased" value="0" />
+      <param name="ur_choice" value="0" />
+      <output name="output" file="test_out/average_fst/average_fst.txt" />
+    </test>
+  </tests>
+
+  <help>
+**What it does**
+
+The user specifies a SNP table and two "populations" of individuals,
+both previously defined using the Galaxy tool to select individuals from
+a SNP table.  No individual can be in both populations.  Other choices are
+as follows.
+
+Data source.  The allele frequencies of a SNP in the two populations can be
+estimated either by the total number of reads of each allele, or by adding
+the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the data source, the user sets lower bounds on amount
+of data required at a SNP.  For estimating the Fst using read counts,
+the bound is the minimum count of reads of the two alleles in a population.
+For estimations based on genotype, the bound is the minimum reported genotype
+quality per individual.  SNPs not meeting these lower bounds are ignored.
+
+The user specifies whether SNPs where both populations appear to be fixed
+for the same allele should be retained or discarded.
+
+The user chooses which definition of Fst to use: Wright's original definition
+or Weir's unbiased estimator.
+
+Finally, the user decides whether to use randomizations.  If so, then the
+user specifies how many randomly generated population pairs (retaining
+the numbers of individuals of the originals) to generate, as well as the
+"population" of additional individuals (not in the first two populations)
+that can be used in the randomization process.
+
+The program prints the average Fst for the original populations and the
+number of SNPs used to compute it.  If randomizations were requested,
+it prints the average Fst for each randomly generated population pair,
+ending with a summary that includes the maximum and average value, and the
+highest-scoring population pair.
+  </help>
+</tool>