Mercurial > repos > miller-lab > genome_diversity
annotate add_fst_column.xml @ 9:22fe0154fa54
added support for heterochromatic regions
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Tue, 10 Jul 2012 11:41:22 -0400 |
parents | e29f4d801bb0 |
children | 9b92372de9f6 |
rev | line source |
---|---|
0 | 1 <tool id="gd_add_fst_column" name="Add an FST column" version="1.0.0"> |
2 <description>to a table</description> | |
3 | |
4 <command interpreter="python"> | |
5 add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output" | |
6 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) | |
7 #set $arg = '%s:%s' % ($individual_col, $individual) | |
8 "$arg" | |
9 #end for | |
10 </command> | |
11 | |
12 <inputs> | |
7
e29f4d801bb0
change wsf -> snp; wpf -> sap
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
13 <param name="input" type="data" format="snp" label="SNP table" /> |
0 | 14 <param name="p1_input" type="data" format="ind" label="Population 1 individuals" /> |
15 <param name="p2_input" type="data" format="ind" label="Population 2 individuals" /> | |
16 | |
17 <param name="data_source" type="select" format="integer" label="Data source"> | |
18 <option value="0" selected="true">sequence coverage</option> | |
19 <option value="1">estimated genotype</option> | |
20 </param> | |
21 | |
22 <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" /> | |
23 <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" /> | |
24 | |
25 <param name="retain" type="select" label="Special treatment"> | |
26 <option value="0" selected="true">Skip row</option> | |
27 <option value="1">Set FST = -1</option> | |
28 </param> | |
29 | |
30 <param name="discard_fixed" type="select" label="Apparently fixed SNPs"> | |
31 <option value="0">Retain SNPs that appear fixed in the two populations</option> | |
32 <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option> | |
33 </param> | |
34 | |
35 <param name="biased" type="select" label="FST estimator"> | |
36 <option value="0" selected="true">Wright's original definition</option> | |
37 <option value="1">Weir's unbiased estimator</option> | |
38 </param> | |
39 | |
40 </inputs> | |
41 | |
42 <outputs> | |
7
e29f4d801bb0
change wsf -> snp; wpf -> sap
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
43 <data name="output" format="snp" metadata_source="input" /> |
0 | 44 </outputs> |
45 | |
46 <tests> | |
47 <test> | |
7
e29f4d801bb0
change wsf -> snp; wpf -> sap
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
48 <param name="input" value="test_in/sample.snp" ftype="snp" /> |
0 | 49 <param name="p1_input" value="test_in/a.ind" ftype="ind" /> |
50 <param name="p2_input" value="test_in/b.ind" ftype="ind" /> | |
51 <param name="data_source" value="0" /> | |
52 <param name="min_reads" value="3" /> | |
53 <param name="min_qual" value="0" /> | |
54 <param name="retain" value="0" /> | |
55 <param name="discard_fixed" value="1" /> | |
56 <param name="biased" value="0" /> | |
7
e29f4d801bb0
change wsf -> snp; wpf -> sap
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
57 <output name="output" file="test_out/add_fst_column/add_fst_column.snp" /> |
0 | 58 </test> |
59 </tests> | |
60 | |
61 <help> | |
62 **What it does** | |
63 | |
64 The user specifies a SNP table and two "populations" of individuals, | |
65 both previously defined using the Galaxy tool to select individuals from | |
66 a SNP table. No individual can be in both populations. Other choices are | |
67 as follows. | |
68 | |
69 Data source. The allele frequencies of a SNP in the two populations can be | |
70 estimated either by the total number of reads of each allele, or by adding | |
71 the frequencies inferred from genotypes of individuals in the populations. | |
72 | |
73 After specifying the data source, the user sets lower bounds on amount | |
74 of data required at a SNP. For estimating the Fst using read counts, | |
75 the bound is the minimum count of reads of the two alleles in a population. | |
76 For estimations based on genotype, the bound is the minimum reported genotype | |
77 quality per individual. | |
78 | |
79 The user specifies whether the SNPs that violate the lower bound should be | |
80 ignored or the Fst set to -1. | |
81 | |
82 The user specifies whether SNPs where both populations appear to be fixed | |
83 for the same allele should be retained or discarded. | |
84 | |
85 Finally, the user chooses which definition of Fst to use: Wright's original | |
86 definition or Weir's unbiased estimator. | |
87 | |
88 A column is appended to the SNP table giving the Fst for each retained SNP. | |
89 </help> | |
90 </tool> |