Mercurial > repos > miller-lab > genome_diversity
diff prepare_population_structure.xml @ 14:8ae67e9fb6ff
Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:35:56 -0400 |
parents | |
children | f04f40a36cc8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_population_structure.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,118 @@ +<tool id="gd_prepare_population_structure" name="Prepare Input" version="1.0.0"> + <description>: Filter and convert to the format needed for these tools</description> + + <command interpreter="python"> + prepare_population_structure.py "$input" "$min_reads" "$min_qual" "$min_spacing" "$output" "$output.files_path" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + #for $population in $individuals.populations + #set $pop_arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name)) + "$pop_arg" + #end for + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = 'individual:%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP dataset" /> + <param name="min_reads" type="integer" min="0" value="0" label="Minimum reads covering a SNP, per individual" /> + <param name="min_qual" type="integer" min="0" value="0" label="Minimum quality value, per individual" /> + <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs on the same scaffold" /> + <conditional name="individuals"> + <param name="choice" type="select" label="Individuals"> + <option value="0" selected="true">All</option> + <option value="1">Choose</option> + </param> + <when value="0" /> + <when value="1"> + <repeat name="populations" title="Population" min="1"> + <param name="p_input" type="data" format="gd_indivs" label="Individuals" /> + </repeat> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="gd_ped"> + <actions> + <action type="metadata" name="base_name" default="admix" /> + </actions> + </data> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="min_reads" value="3" /> + <param name="min_qual" value="30" /> + <param name="min_spacing" value="0" /> + <param name="choice" value="0" /> + <output name="output" file="test_out/prepare_population_structure/prepare_population_structure.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="admix.map" value="test_out/prepare_population_structure/admix.map" /> + <extra_files type="file" name="admix.ped" value="test_out/prepare_population_structure/admix.ped" /> + </output> + </test> + </tests> + + <help> + +**Dataset formats** + +The input datasets are in gd_snp_ and gd_indivs_ formats. It is important +for the Individuals datasets to have unique names; rename them if +necessary to make them unique. These names are used by the later tools in +the graphical displays. +The output dataset is gd_ped_. (`Dataset missing?`_) + +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _gd_indivs: ./static/formatHelp.html#gd_indivs +.. _gd_ped: ./static/formatHelp.html#gd_ped +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The tool converts a gd_snp dataset into two tables, called "admix.map" and +"admix.ped", needed for estimating the population structure. The user +can read or download those files, or simply pass this tool's output on to +other programs. The user imposes conditions on which SNPs to consider, +such as the minimum coverage and/or quality value for every individual, +or the distance to the closest SNP in the same contig (as named in the +first column of the SNP table). A useful piece of information produced +by the tool is the number of SNPs meeting those conditions, which can +be found by clicking on the eye icon in the history panel after the program +runs. + +----- + +**Example** + +- input:: + + Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 + Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 + Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 + etc. + +- output map file:: + + 1 snp1 0 2 + 1 snp3 0 4 + 1 snp4 0 5 + 1 snp5 0 6 + 1 snp6 0 7 + 1 snp7 0 8 + 1 snp8 0 9 + 1 snp9 0 10 + +- output ped file:: + + PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + + </help> +</tool>