Mercurial > repos > miller-lab > genome_diversity
diff dpmix.xml @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 91e835060ad2 |
children | 4188853b940b |
line wrap: on
line diff
--- a/dpmix.xml Mon Jun 03 12:29:29 2013 -0400 +++ b/dpmix.xml Mon Jul 15 10:47:35 2013 -0400 @@ -1,18 +1,37 @@ <tool id="gd_dpmix" name="Admixture" version="1.1.0"> - <description>: Map genomic intervals resembling specified ancestral populations</description> + <description>: Map genomic intervals resembling specified source populations</description> <command interpreter="python"> - dpmix.py "$input" + #import json + #import base64 + #import zlib + #set $ind_names = $input.dataset.metadata.individual_names + #set $ind_colms = $input.dataset.metadata.individual_columns + #set $ind_dict = dict(zip($ind_names, $ind_colms)) + #set $ind_json = json.dumps($ind_dict, separators=(',',':')) + #set $ind_comp = zlib.compress($ind_json, 9) + #set $ind_arg = base64.b64encode($ind_comp) + dpmix.py '$input' #if $input_type.choice == '0' - "gd_snp" "$input_type.data_source" + 'gd_snp' '$input_type.data_source' #else if $input_type.choice == '1' - "gd_genotype" "1" + 'gd_genotype' '1' #end if - "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" "gd.heterochromatic.loc" - #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) - #set $arg = '%s:%s' % ($individual_col, $individual) - "$arg" - #end for + #if $third_pop.choice == '0' + #set $ap3_arg = '/dev/null' + #set $ap3_name_arg = '' + #else if $third_pop.choice == '1' + #set $ap3_arg = $third_pop.ap3_input + #set $ap3_name_arg = $third_pop.ap3_input.name + #end if + #if $user_het.choice == '0' + #set $het_arg = 'use_installed' + #else if $user_het.choice == '1' + #set $het_arg = $user_het.het_file + #else if $user_het.choice == '2' + #set $het_arg = 'use_none' + #end if + '$switch_penalty' '$ap1_input' '$ap1_input.name' '$ap2_input' '$ap2_input.name' '$ap3_arg' '$ap3_name_arg' '$p_input' '$output' '$output2' '$output2.files_path' '$input.dataset.metadata.dbkey' '$input.dataset.metadata.ref' '$GALAXY_DATA_INDEX_DIR' 'gd.heterochromatic.loc' '$ind_arg' '$het_arg' '1' </command> <inputs> @@ -38,11 +57,43 @@ </when> </conditional> - <param name="ap1_input" type="data" format="gd_indivs" label="Ancestral population 1 individuals" /> - <param name="ap2_input" type="data" format="gd_indivs" label="Ancestral population 2 individuals" /> + <param name="ap1_input" type="data" format="gd_indivs" label="Source population 1 individuals" /> + <param name="ap2_input" type="data" format="gd_indivs" label="Source population 2 individuals" /> + + <conditional name="third_pop"> + <param name="choice" type="select" format="integer" label="Include third source population"> + <option value="0" selected="true">no</option> + <option value="1">yes</option> + </param> + <when value="0" /> + <when value="1"> + <param name="ap3_input" type="data" format="gd_indivs" label="Source population 3 individuals" /> + </when> + </conditional> + <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" /> <param name="switch_penalty" type="float" min="0" value="10" label="Genotype switch penalty" help="Note: Depends on the density of SNPs. For instance, with 50,000 SNPs in a vertebrate genome, 1.0 might be appropriate, with millions of SNPs, a value between 10 and 100 might be reasonable."/> + + <conditional name="user_het"> + <param name="choice" type="select" format="integer" label="Heterochromatin info"> + <option value="0" selected="true">use installed</option> + <option value="1">use your own</option> + <option value="2">use none</option> + </param> + <when value="0" /> + <when value="1"> + <param name="het_file" type="data" format="txt" label="Heterochromatin dataset" /> + </when> + </conditional> + + <!-- + <param name="add_logs" type="select" format="integer" label="Probabilities"> + <option value="1" selected="true">add logs of probabilities</option> + <option value="0">add probabilities</option> + </param> + --> + </inputs> <outputs> @@ -88,27 +139,37 @@ **What it does** -The user specifies two "ancestral" populations (i.e., sources for -chromosomes) and a set of potentially admixed individuals, and chooses -between the sequence coverage or the estimated genotypes to measure -the similarity of genomic intervals in admixed individuals to the two -classes of ancestral chromosomes. The user also picks a "genotype switch penalty", -typically between 10 and 100. For each potentially admixed individual, -the program divides the genome into three "genotypes": (0) homozygous -for the first ancestral population (i.e., both chromosomes from that -population), (1) heterozygous, or (2) homozygous for the second ancestral -population. Parts of a chromosome that are labeled as "heterochromatic" -are given the non-genotype "3". Smaller values of the switch penalty -(corresponding to more ancient admixture events) generally lead to the -reconstruction of more frequent changes between genotypes. +The user specifies two or three source populations (i.e., sources +for chromosomes) and a set of potentially admixed individuals, and +chooses between the sequence coverage or the estimated genotypes to +measure the similarity of genomic intervals in admixed individuals to +the three classes of source chromosomes. The user also specifies a +"switch penalty", controlling the strength of evidence needed to switch +between source populations as the the program scans along a chromosome. +Choice of picksan appropriate value depends on the number of SNPs and, to +a lesser extent, on the time since the admixture events. With several +million SNPs genome-wide, reasonable values might fall between 10 +and 100. If there are 3 source populatons, then for each potentially +admixed individual the program divides the genome into six "genotypes": + +1. homozygous for the first source population (i.e., both chromosomes from that population), +2. homozygous for the second source population, +3. homozygous for the third source population, +4. heterozygous for the first and second populations (i.e., one chromosome from each), +5. heterozygous for the first and third populations, or +6. heterozygous for the second and third populations. + +Parts of a reference chromosome that are labeled as "heterochromatic" +are given the "non-genotype" 0. With two source populations, only +"genotypes" 1, 2 and 3 are possible, where 3 now means heterozygous in +the two source populations. There are two output datasets generated. A tabular dataset with chromosome, start, stop, and pairs of columns containing the "genotypes" from above and label from the admixed individual. The second dataset is a composite dataset with general information from the run and a link to a pdf which -graphically shows the ancestral population along each of the chromosomes. +graphically shows the source population along each of the chromosomes. The second link is to a text file with summary information of the "genotypes" over the whole genome. - </help> </tool>