comparison dpmix.xml @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 91e835060ad2
children 4188853b940b
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 <tool id="gd_dpmix" name="Admixture" version="1.1.0"> 1 <tool id="gd_dpmix" name="Admixture" version="1.1.0">
2 <description>: Map genomic intervals resembling specified ancestral populations</description> 2 <description>: Map genomic intervals resembling specified source populations</description>
3 3
4 <command interpreter="python"> 4 <command interpreter="python">
5 dpmix.py "$input" 5 #import json
6 #import base64
7 #import zlib
8 #set $ind_names = $input.dataset.metadata.individual_names
9 #set $ind_colms = $input.dataset.metadata.individual_columns
10 #set $ind_dict = dict(zip($ind_names, $ind_colms))
11 #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
12 #set $ind_comp = zlib.compress($ind_json, 9)
13 #set $ind_arg = base64.b64encode($ind_comp)
14 dpmix.py '$input'
6 #if $input_type.choice == '0' 15 #if $input_type.choice == '0'
7 "gd_snp" "$input_type.data_source" 16 'gd_snp' '$input_type.data_source'
8 #else if $input_type.choice == '1' 17 #else if $input_type.choice == '1'
9 "gd_genotype" "1" 18 'gd_genotype' '1'
10 #end if 19 #end if
11 "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" "gd.heterochromatic.loc" 20 #if $third_pop.choice == '0'
12 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) 21 #set $ap3_arg = '/dev/null'
13 #set $arg = '%s:%s' % ($individual_col, $individual) 22 #set $ap3_name_arg = ''
14 "$arg" 23 #else if $third_pop.choice == '1'
15 #end for 24 #set $ap3_arg = $third_pop.ap3_input
25 #set $ap3_name_arg = $third_pop.ap3_input.name
26 #end if
27 #if $user_het.choice == '0'
28 #set $het_arg = 'use_installed'
29 #else if $user_het.choice == '1'
30 #set $het_arg = $user_het.het_file
31 #else if $user_het.choice == '2'
32 #set $het_arg = 'use_none'
33 #end if
34 '$switch_penalty' '$ap1_input' '$ap1_input.name' '$ap2_input' '$ap2_input.name' '$ap3_arg' '$ap3_name_arg' '$p_input' '$output' '$output2' '$output2.files_path' '$input.dataset.metadata.dbkey' '$input.dataset.metadata.ref' '$GALAXY_DATA_INDEX_DIR' 'gd.heterochromatic.loc' '$ind_arg' '$het_arg' '1'
16 </command> 35 </command>
17 36
18 <inputs> 37 <inputs>
19 <conditional name="input_type"> 38 <conditional name="input_type">
20 <param name="choice" type="select" format="integer" label="Input format"> 39 <param name="choice" type="select" format="integer" label="Input format">
36 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> 55 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
37 </param> 56 </param>
38 </when> 57 </when>
39 </conditional> 58 </conditional>
40 59
41 <param name="ap1_input" type="data" format="gd_indivs" label="Ancestral population 1 individuals" /> 60 <param name="ap1_input" type="data" format="gd_indivs" label="Source population 1 individuals" />
42 <param name="ap2_input" type="data" format="gd_indivs" label="Ancestral population 2 individuals" /> 61 <param name="ap2_input" type="data" format="gd_indivs" label="Source population 2 individuals" />
62
63 <conditional name="third_pop">
64 <param name="choice" type="select" format="integer" label="Include third source population">
65 <option value="0" selected="true">no</option>
66 <option value="1">yes</option>
67 </param>
68 <when value="0" />
69 <when value="1">
70 <param name="ap3_input" type="data" format="gd_indivs" label="Source population 3 individuals" />
71 </when>
72 </conditional>
73
43 <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" /> 74 <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" />
44 75
45 <param name="switch_penalty" type="float" min="0" value="10" label="Genotype switch penalty" help="Note: Depends on the density of SNPs. For instance, with 50,000 SNPs in a vertebrate genome, 1.0 might be appropriate, with millions of SNPs, a value between 10 and 100 might be reasonable."/> 76 <param name="switch_penalty" type="float" min="0" value="10" label="Genotype switch penalty" help="Note: Depends on the density of SNPs. For instance, with 50,000 SNPs in a vertebrate genome, 1.0 might be appropriate, with millions of SNPs, a value between 10 and 100 might be reasonable."/>
77
78 <conditional name="user_het">
79 <param name="choice" type="select" format="integer" label="Heterochromatin info">
80 <option value="0" selected="true">use installed</option>
81 <option value="1">use your own</option>
82 <option value="2">use none</option>
83 </param>
84 <when value="0" />
85 <when value="1">
86 <param name="het_file" type="data" format="txt" label="Heterochromatin dataset" />
87 </when>
88 </conditional>
89
90 <!--
91 <param name="add_logs" type="select" format="integer" label="Probabilities">
92 <option value="1" selected="true">add logs of probabilities</option>
93 <option value="0">add probabilities</option>
94 </param>
95 -->
96
46 </inputs> 97 </inputs>
47 98
48 <outputs> 99 <outputs>
49 <data name="output" format="tabular" /> 100 <data name="output" format="tabular" />
50 <data name="output2" format="html" /> 101 <data name="output2" format="html" />
86 137
87 ----- 138 -----
88 139
89 **What it does** 140 **What it does**
90 141
91 The user specifies two "ancestral" populations (i.e., sources for 142 The user specifies two or three source populations (i.e., sources
92 chromosomes) and a set of potentially admixed individuals, and chooses 143 for chromosomes) and a set of potentially admixed individuals, and
93 between the sequence coverage or the estimated genotypes to measure 144 chooses between the sequence coverage or the estimated genotypes to
94 the similarity of genomic intervals in admixed individuals to the two 145 measure the similarity of genomic intervals in admixed individuals to
95 classes of ancestral chromosomes. The user also picks a "genotype switch penalty", 146 the three classes of source chromosomes. The user also specifies a
96 typically between 10 and 100. For each potentially admixed individual, 147 "switch penalty", controlling the strength of evidence needed to switch
97 the program divides the genome into three "genotypes": (0) homozygous 148 between source populations as the the program scans along a chromosome.
98 for the first ancestral population (i.e., both chromosomes from that 149 Choice of picksan appropriate value depends on the number of SNPs and, to
99 population), (1) heterozygous, or (2) homozygous for the second ancestral 150 a lesser extent, on the time since the admixture events. With several
100 population. Parts of a chromosome that are labeled as "heterochromatic" 151 million SNPs genome-wide, reasonable values might fall between 10
101 are given the non-genotype "3". Smaller values of the switch penalty 152 and 100. If there are 3 source populatons, then for each potentially
102 (corresponding to more ancient admixture events) generally lead to the 153 admixed individual the program divides the genome into six "genotypes":
103 reconstruction of more frequent changes between genotypes. 154
155 1. homozygous for the first source population (i.e., both chromosomes from that population),
156 2. homozygous for the second source population,
157 3. homozygous for the third source population,
158 4. heterozygous for the first and second populations (i.e., one chromosome from each),
159 5. heterozygous for the first and third populations, or
160 6. heterozygous for the second and third populations.
161
162 Parts of a reference chromosome that are labeled as "heterochromatic"
163 are given the "non-genotype" 0. With two source populations, only
164 "genotypes" 1, 2 and 3 are possible, where 3 now means heterozygous in
165 the two source populations.
104 166
105 There are two output datasets generated. A tabular dataset with chromosome, 167 There are two output datasets generated. A tabular dataset with chromosome,
106 start, stop, and pairs of columns containing the "genotypes" from above 168 start, stop, and pairs of columns containing the "genotypes" from above
107 and label from the admixed individual. The second dataset is a composite 169 and label from the admixed individual. The second dataset is a composite
108 dataset with general information from the run and a link to a pdf which 170 dataset with general information from the run and a link to a pdf which
109 graphically shows the ancestral population along each of the chromosomes. 171 graphically shows the source population along each of the chromosomes.
110 The second link is to a text file with summary information of the 172 The second link is to a text file with summary information of the
111 "genotypes" over the whole genome. 173 "genotypes" over the whole genome.
112
113 </help> 174 </help>
114 </tool> 175 </tool>