diff add_fst_column.xml @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 8ae67e9fb6ff
children f04f40a36cc8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/add_fst_column.xml	Fri Sep 28 11:57:18 2012 -0400
@@ -0,0 +1,91 @@
+<tool id="gd_add_fst_column" name="Per-SNP FSTs" version="1.0.0">
+  <description>: Compute a fixation index score for each SNP</description>
+
+  <command interpreter="python">
+    add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output"
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP table" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
+
+    <param name="data_source" type="select" format="integer" label="Data source">
+      <option value="0" selected="true">sequence coverage</option>
+      <option value="1">estimated genotype</option>
+    </param>
+
+    <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" />
+    <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" />
+
+    <param name="retain" type="select" label="Special treatment">
+      <option value="0" selected="true">Skip row</option>
+      <option value="1">Set FST = -1</option>
+    </param>
+
+    <param name="discard_fixed" type="select" label="Apparently fixed SNPs">
+      <option value="0">Retain SNPs that appear fixed in the two populations</option>
+      <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option>
+    </param>
+
+    <param name="biased" type="select" label="FST estimator">
+      <option value="0" selected="true">Wright's original definition</option>
+      <option value="1">Weir's unbiased estimator</option>
+    </param>
+
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_snp" metadata_source="input" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="data_source" value="0" />
+      <param name="min_reads" value="3" />
+      <param name="min_qual" value="0" />
+      <param name="retain" value="0" />
+      <param name="discard_fixed" value="1" />
+      <param name="biased" value="0" />
+      <output name="output" file="test_out/add_fst_column/add_fst_column.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**What it does**
+
+The user specifies a SNP table and two "populations" of individuals,
+both previously defined using the Specify Individuals tool.
+No individual can be in both populations.  Other choices are as follows.
+
+Data source.  The allele frequencies of a SNP in the two populations can be
+estimated either by the total number of reads of each allele, or by adding
+the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the data source, the user sets lower bounds on amount
+of data required at a SNP.  For estimating the Fst using read counts,
+the bound is the minimum count of reads of the two alleles in a population.
+For estimations based on genotype, the bound is the minimum reported genotype
+quality per individual.
+
+The user specifies whether the SNPs that violate the lower bound should be
+ignored or the Fst set to -1.
+
+The user specifies whether SNPs where both populations appear to be fixed
+for the same allele should be retained or discarded.
+
+Finally, the user chooses which definition of Fst to use:  Wright's original
+definition or Weir's unbiased estimator.
+
+A column is appended to the SNP table giving the Fst for each retained SNP.
+
+  </help>
+</tool>