Mercurial > repos > miller-lab > genome_diversity
diff dpmix.xml @ 14:8ae67e9fb6ff
Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:35:56 -0400 |
parents | |
children | d6b961721037 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,93 @@ +<tool id="gd_dpmix" name="Admixture" version="1.0.0"> + <description>: Map genomic intervals resembling specified ancestral populations</description> + + <command interpreter="python"> + dpmix.py "$input" "$data_source" "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" "gd.heterochromatic.loc" + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="Dataset"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + <param name="ap1_input" type="data" format="gd_indivs" label="Ancestral population 1 individuals" /> + <param name="ap2_input" type="data" format="gd_indivs" label="Ancestral population 2 individuals" /> + <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" /> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="switch_penalty" type="integer" min="0" value="10" label="Switch penalty" /> + </inputs> + + <outputs> + <data name="output" format="tabular" /> + <data name="output2" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="ap1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> + <param name="ap2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> + <param name="p_input" value="test_in/c.gd_indivs" ftype="gd_indivs" /> + <param name="data_source" value="0" /> + <param name="switch_penalty" value="10" /> + + <output name="output" file="test_out/dpmix/dpmix.tabular" /> + + <output name="output2" file="test_out/dpmix/dpmix.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="dpmix.pdf" value="test_out/dpmix/dpmix.pdf" compare="sim_size" delta = "10000" /> + <extra_files type="file" name="misc.txt" value="test_out/dpmix/misc.txt" /> + </output> + </test> + </tests> + + <help> + +**Dataset formats** + +The input datasets are in gd_snp_ and gd_indivs_ formats. It is important for +the Individuals datasets to have unique names and for there to be no overlap +between the two populations. Rename these datasets if +needed to make them unique. +There are two output datasets, one tabular_ and one composite. (`Dataset missing?`_) + +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _gd_indivs: ./static/formatHelp.html#gd_indivs +.. _tabular: ./static/formatHelp.html#tab +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user specifies two "ancestral" populations (i.e., sources for +chromosomes) and a set of potentially admixed individuals, and chooses +between the sequence coverage or the estimated genotypes to measure +the similarity of genomic intervals in admixed individuals to the two +classes of ancestral chromosomes. The user also picks a "switch penalty", +typically between 10 and 100. For each potentially admixed individual, +the program divides the genome into three "genotypes": (0) homozygous +for the first ancestral population (i.e., both chromosomes from that +population), (1) heterozygous, or (2) homozygous for the second ancestral +population. Parts of a chromosome that are labeled as "heterochromatic" +are given the non-genotype, 3. Smaller values of the switch penalty +(corresponding to more ancient admixture events) generally lead to the +reconstruction of more frequent changes between genotypes. + +There are two output datasets generated. A tabular dataset with chromosome, +start, stop, and pairs of columns containing the "genotypes" from above +and label from the admixed individual. The second dataset is a composite +dataset with general information from the run and a link to a pdf which +graphically shows the ancestral population along each of the chromosomes. +The second link is to a text file with summary information of the +"genotypes" over the whole genome. + + </help> +</tool>