Mercurial > repos > iuc > king
changeset 1:1932808c6fab draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/king/ commit 1c50106137c5b3260f18864ac9084056fa91ec80"
author | iuc |
---|---|
date | Mon, 18 Apr 2022 18:04:56 +0000 |
parents | ec2bc87ebd7b |
children | |
files | king.xml test-data/king_pcplot.png test-data/subset_bed.sh test-data/test.model |
diffstat | 4 files changed, 511 insertions(+), 111 deletions(-) [+] |
line wrap: on
line diff
--- a/king.xml Tue Nov 16 20:14:31 2021 +0000 +++ b/king.xml Mon Apr 18 18:04:56 2022 +0000 @@ -2,82 +2,266 @@ <tool id="king" name="KING" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>Kinship-based INference for GWAS</description> <macros> - <token name="@TOOL_VERSION@">2.2.4</token> + <token name="@TOOL_VERSION@">2.2.7</token> <token name="@VERSION_SUFFIX@">0</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">king</requirement> - <requirement type="package" version="1.6.4">r-kinship2</requirement> - <requirement type="package" version="1.2.2">r-igraph</requirement> + <requirement type="package" version="1.8.5">r-kinship2</requirement> + <requirement type="package" version="1.3.0">r-igraph</requirement> + <requirement type="package" version="1.7_9">r-e1071</requirement> </requirements> <version_command> king | head -1 | cut -d' ' -f 2 </version_command> <command detect_errors="exit_code"><![CDATA[ ln -s '$genotype' input.bed && - ln -s '$family' input.fam && - ln -s '$map' input.bim && - king -b input.bed --fam input.fam --bim input.bim - $related - $duplicate - $kinship - $ibdseg - $ibs - $homog - #if str($degree): - --degree $degree + #if $opt_int.family: + ln -s '$opt_int.family' input.fam && + #end if + #if $opt_int.map: + ln -s '$opt_int.map' input.bim && + #end if + #if $gen_rsk.model: + ln -s '$gen_rsk.model' input.model && + #end if + ## General Input + king -b input.bed + ## Close Relative Inference + $close_rel_i.related + $close_rel_i.duplicate + ## Pairwise Relatedness Inference + $pair_rel_i.kinship + $pair_rel_i.ibdseg + $pair_rel_i.ibs + $pair_rel_i.homog + ## Inference Parameter + #if str($inf_param.degree): + --degree $inf_param.degree + #end if + #if str($inf_param.seglength): + --seglength $inf_param.seglength + #end if + ## Relationship Application + $rel_app.unrelated + $rel_app.cluster + $rel_app.build + ## QC Report + #if str($qc_rep.callrateN): + --callrateN $callrateN + #end if + #if str($qc_rep.callrateM): + --callrateM $callrateM + #end if + ## Population Structure + $pop_str.pca + $pop_str.mds + ## Structure Parameter + #if str($str_par.pcs) + --pcs $str_par.pcs #end if - #if str($projection): - --projection $projection + #if str($str_par.projection) + --projection $str_par.projection + #end if + ## Disease Assosciation + $dis_ass.tdt + ## Quantitiative Trait Association + $qnt_trt.mtscore + ## Association Model + #if str($ass_mod.trait) + --trait $ass_mod.trait + #end if + #if str($ass_mod.covariate) + --covariate $ass_mod.covariate + #end if + ## Association parameter + $ass_par.invnorm + #if str($ass_par.maxP) + --maxP $ass_par.maxP #end if - $unrelated - $build - $cluster - $rplot - + ## Genetic Risk Score + $gen_rsk.risk + #if $gen_rsk.model: + --model input.model + #end if + #if str($gen_rsk.prevalence) + --prevalence $gen_rsk.prevalence + #end if + $gen_rsk.noflip + ## Computing parameter + --cpus "\${GALAXY_SLOTS:-4}" + ## Optional Input + #if $opt_int.family + --fam input.fam + #end if + #if $opt_int.map + --bim input.bim + #end if + #if $opt_int.sexchr + --sexchr $opt_int.sexchr + #end if + ## Output + $opt_par.rplot + $opt_par.pngplot + $opt_par.plink > '$kingoutlog' ]]></command> <inputs> - <param name="genotype" type="data" format="pbed,binary" label="Binary Genotype File" /> - <param name="family" type="data" format="lped,txt" label="Family File" /> - <param name="map" type="data" format="tabular" label="Map File" /> + <param name="genotype" type="data" format="pbed" label="Binary Genotype File" help="Generated by PLINK" /> <!-- Beginning of the optional paramters --> - <param argument="--related" type="boolean" truevalue="--related" falsevalue="" label="Relationship Inference" help="Implements the fastest and integrated relationship inference." /> - <param argument="--duplicate" type="boolean" truevalue="--duplicate" falsevalue="" label="Duplicate Analysis" help="Implements the fastest (and accurate) algorithm to identify duplicates or MZ twins" /> - <param argument="--kinship" type="boolean" truevalue="--kinship" falsevalue="" label="Kinship Inference" help="Estimates pair-wise kinship coefficients" /> - <param argument="--ibdseg" type="boolean" truevalue="--ibdseg" falsevalue="" label="IBD Segment Analysis" help="IBD segment analysis determines all IBD (IBD1 and IBD2) segments shared between relatives" /> - <param argument="--ibs" type="boolean" truevalue="--ibs" falsevalue="" label="IBS Summary Statistics" help="Counts and average of IBS" /> - <param argument="--homog" type="boolean" truevalue="--homog" falsevalue="" label="Homogeneous Population" help="Estimates pair-wise kinship coefficients assuming a homogeneous population." /> - <param argument="--degree" type="integer" min="0" optional="true" label="Degrees of relatedness" help="Filters relative pairs based on kinship coefficients." /> - <param argument="--projection" type="integer" min="0" optional="true" label="Projection N" help="Includes the first N samples of a subset." /> - <param argument="--unrelated" type="boolean" truevalue="--unrelated" falsevalue="" label="Unrelated Option" help="Extract a list of unrelated individuals." /> - <param argument="--build" type="boolean" truevalue="--build" falsevalue="" label="Reconstruct Pedigree" help="Reconstructs pedigrees using SNP data" /> - <param argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" label="Cluster Parameter" help="Clusters relatives into families by generating an updateid file." /> - <param argument="--rplot" type="boolean" truevalue="--rplot" falsevalue="" label="R Code and Plots" help="Generates R code first and then calls R program to make plots in a PDF file." /> - <param name="use_log" type="boolean" checked="false" label="Output a Log?" /> + <section name="close_rel_i" title="Close Relative Inference" > + <param argument="--related" type="boolean" truevalue="--related" falsevalue="" + label="Relationship Inference" + help="Implements the fastest and integrated relationship inference." /> + <param argument="--duplicate" type="boolean" truevalue="--duplicate" falsevalue="" + label="Duplicate Analysis" + help="Implements the fastest (and accurate) algorithm to identify duplicates or MZ twins" /> + </section> + <!-- Despite many of the below parameters being grouped together, they can all + surprisingly be used independently of one another, i.e. no mutual exclusivity that I can see --> + <section name="pair_rel_i" title="Pairwise Relative Inference" > + <param argument="--kinship" type="boolean" truevalue="--kinship" falsevalue="" + label="Kinship Inference" + help="Estimates pair-wise kinship coefficients" /> + <param argument="--ibdseg" type="boolean" truevalue="--ibdseg" falsevalue="" + label="IBD Segment Analysis" + help="IBD segment analysis determines all IBD (IBD1 and IBD2) segments shared between relatives" /> + <param argument="--ibs" type="boolean" truevalue="--ibs" falsevalue="" + label="IBS Summary Statistics" + help="Counts and average of IBS" /> + <param argument="--homog" type="boolean" truevalue="--homog" falsevalue="" + label="Homogeneous Population" + help="Estimates pair-wise kinship coefficients assuming a homogeneous population." /> + </section> + <section name="inf_param" title="Inference Parameter" > + <param argument="--degree" type="integer" min="0" optional="true" + label="Degrees of relatedness" + help="Filters relative pairs based on kinship coefficients." /> + <param argument="--seglength" type="integer" min="1" optional="true" + label="Minimum IBD segments" + help="specifies the minimum length of IDB segments that are considered towards the relationship inference" /> + </section> + <section name="rel_app" title="Relationship Application" > + <param argument="--unrelated" type="boolean" truevalue="--unrelated" falsevalue="" + label="Unrelated Option" + help="Extract a list of unrelated individuals." /> + <param argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" + label="Cluster Parameter" + help="Clusters relatives into families by generating an updateid file." /> + <param argument="--build" type="boolean" truevalue="--build" falsevalue="" + label="Reconstruct Pedigree" + help="Reconstructs pedigrees using SNP data" /> + </section> + <section name="qc_rep" title="QC Report" > + <param argument="--callrateN" type="integer" min="1" optional="true" + label="Set the N callrate" /> + <param argument="--callrateM" type="integer" min="1" optional="true" + label="Set the M callrate" /> + </section> + <section name="pop_str" title="Population Structure" > + <param argument="--pca" type="boolean" truevalue="--pca" falsevalue="" + label="Add PCA as dimension reduction algorithm" /> + <param argument="--mds" type="boolean" truevalue="--mds" falsevalue="" + label="Add MDS as dimension reduction algorithm" /> + </section> + <section name="str_par" title="Structure Parameter" > + <!-- Ye, all options can be selected here, apparently... --> + <param argument="--pcs" type="integer" min="1" optional="true" + label="Number of Principal Components to use" + help="allow the specification of the number of PCs with a default value of 10." /> + <param argument="--projection" type="integer" min="0" optional="true" + label="Projection N" + help="Includes the first N samples of a subset." /> + </section> + <section name="dis_ass" title="Disease Association" > + <param argument="--tdt" type="boolean" truevalue="--tdt" falsevalue="" + label="Transmission/Disequilibrium Test" + help="implements the well-known Transmission/Disequilibrium Test for family data that consist of parent-affected child trios." /> + </section> + <section name="qnt_trt" title="Quantitative Trait Association" > + <param argument="--mtscore" type="boolean" truevalue="--mtscore" falsevalue="" + label="Score Test between SNP and quantitative trait" + help="Only association results with P value less than 5E-8 are printed out, including both cis- and trans- effect associations. "/> + </section> + <section name="ass_mod" title="Association Model" > + <param argument="--trait" type="text" optional="true" + label="Trait names" + help="Specifies the trait names to be analyzed in the association analysis." /> + <param argument="--covariate" type="text" optional="true" + label="Covariate names" + help="Specifies the covariate names to be adjusted in the association analysis" /> + </section> + <section name="ass_par" title="Association Parameter" > + <param argument="--invnorm" type="boolean" truevalue="--invnorm" falsevalue="" + label="Normal Transformation" + help="Carries out inverse normal transformation for quantitative traits prior to association analysis."/> + <param argument="--maxP" type="float" min="0" optional="true" + label="maximum P values" + help="specifies the maximum P values to print out in the output files."/> + </section> + <section name="gen_rsk" title="Genetic Risk Score" > + <param argument="--risk" type="boolean" truevalue="--risk" falsevalue="" + label="Predict Disease Risk" + help="predicts disease risks for each individual according to the GRS risk model" /> + <param name="model" type="data" format="txt,tabular" label="GRS Risk Model file" optional="true" /> + <param argument="--prevalence" type="float" min="0" optional="true" + label="Disease prevalence" /> + <param argument="--noflip" type="boolean" truevalue="--noflip" falsevalue="" + label="No Flip" + help="If strands of genotype data are already consistent with model, enable this." /> + </section> + <section name="opt_int" title="Optional Input" > + <param name="family" type="data" format="lped,txt" label="Family File" optional="true" /> + <param name="map" type="data" format="tabular" label="Map File" optional="true" /> + <param argument="--sexchr" type="integer" min="0" optional="true" + label="Pair number of the Sex Chromosome." + help="specifies the pair number of the sex chromosome, which should be userful for non-human species. The default sex chromosome is 23." /> + </section> + <section name="opt_par" title="Optional Parameter" > + <param argument="--rplot" type="boolean" truevalue="--rplot" falsevalue="" + label="R Code and Plots" + help="Generates R code first and then calls R program to make plots in a PDF file." /> + <param argument="--pngplot" type="boolean" truevalue="--pngplot" falsevalue="" + label="R Code and Plots in PNG format" + help="Generates R code first and then calls R program to make plots in a PNG file for certain applications." /> + <param argument="--plink" type="boolean" truevalue="--plink" falsevalue="" + label="Output data in PLINK format" /> + </section> + <param name="use_log" type="boolean" checked="false" + label="Output a Log?" /> </inputs> <outputs> <data name="kingoutlog" format="txt" label="${tool.name} on ${on_string} : Log" > <filter>use_log == True</filter> </data> <collection name="kingoutput_txt" type="list" label="${tool.name} on ${on_string}: Metrics"> - <discover_datasets pattern="king(?P<designation>.+)\.txt" format="txt" /> + <discover_datasets pattern="king(?P<name>.+)\.txt" format="txt" /> + <discover_datasets pattern="king\.(?P<name>.+)" format="txt" /> + <discover_datasets pattern="king_(?P<name>.+)\.plink" format="plink" /> + <discover_datasets pattern="king_(?P<name>.+)\.R" format="txt" /> </collection> <collection name="kingoutput_log" type="list" label="${tool.name} on ${on_string}: Metrics Logs" > <filter>use_log == True</filter> - <discover_datasets pattern="king(?P<designation>.+)\.log" format="txt" /> + <discover_datasets pattern="king(?P<name>.+)\.log" format="txt" /> + <discover_datasets pattern="king_(?P<name>.+)\.Rout" format="txt" /> </collection> <collection name="kingoutput_pdf" type="list" label="${tool.name} on ${on_string}: Plots"> - <discover_datasets pattern="king_(?P<designation>.+)\.pdf" format="pdf" /> + <filter>opt_par["rplot"] or opt_par["pngplot"]</filter> + <discover_datasets pattern="king_(?P<name>.+)\.pdf" format="pdf" /> + <discover_datasets pattern="king_(?P<name>.+)\.png" format="png" /> </collection> </outputs> <tests> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="related" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="close_rel_i" > + <param name="related" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -88,12 +272,18 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="related" value="true" /> - <param name="degree" value="2" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="close_rel_i" > + <param name="related" value="true" /> + </section> + <section name="inf_param" > + <param name="degree" value="2" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -105,13 +295,21 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="4"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="related" value="true" /> - <param name="degree" value="2" /> - <param name="rplot" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="close_rel_i" > + <param name="related" value="true" /> + </section> + <section name="inf_param" > + <param name="degree" value="2" /> + </section> + <section name="opt_par" > + <param name="rplot" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -125,11 +323,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="duplicate" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="close_rel_i" > + <param name="duplicate" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -139,11 +341,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="kinship" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="pair_rel_i" > + <param name="kinship" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -153,12 +359,18 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="related" value="true" /> - <param name="projection" value="100000" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="close_rel_i" > + <param name="related" value="true" /> + </section> + <section name="str_par" > + <param name="projection" value="100000" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -171,11 +383,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="ibdseg" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="pair_rel_i" > + <param name="ibdseg" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -186,13 +402,21 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="4"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="ibdseg" value="true" /> - <param name="degree" value="3" /> - <param name="rplot" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="pair_rel_i" > + <param name="ibdseg" value="true" /> + </section> + <section name="inf_param" > + <param name="degree" value="3" /> + </section> + <section name="opt_par" > + <param name="rplot" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -204,11 +428,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="ibs" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="pair_rel_i" > + <param name="ibs" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -222,11 +450,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="homog" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="pair_rel_i" > + <param name="homog" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -237,11 +469,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="unrelated" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="rel_app" > + <param name="unrelated" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -252,12 +488,18 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="unrelated" value="true" /> - <param name="degree" value="2" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="rel_app" > + <param name="unrelated" value="true" /> + </section> + <section name="inf_param" > + <param name="degree" value="2" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog" > <assert_contents> @@ -268,11 +510,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="build" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="rel_app" > + <param name="build" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog" > <assert_contents> @@ -283,12 +529,18 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="build" value="true" /> - <param name="degree" value="2" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="rel_app" > + <param name="build" value="true" /> + </section> + <section name="inf_param" > + <param name="degree" value="2" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -299,11 +551,15 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="3"> <param name="genotype" value="new.6000.bed"/> - <param name="family" value="new.6000.fam"/> - <param name="map" value="new.6000.bim"/> - <param name="cluster" value="true" /> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="rel_app" > + <param name="cluster" value="true" /> + </section> <param name="use_log" value="true" /> <output name="kingoutlog"> <assert_contents> @@ -313,12 +569,126 @@ </assert_contents> </output> </test> + <test> <!-- risk prediction --> + <param name="genotype" value="new.6000.bed"/> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="gen_rsk"> + <param name="model" ftype="txt" value="test.model" /> + <param name="risk" value="true"/> + <param name="prevalence" value="0.004"/> + <param name="noflip" value="true" /> + </section> + <param name="use_log" value="true" /> + <output name="kingoutlog" > + <assert_contents> + <has_text text="rs2602970 11 T A 0.265 0.403 C T 0.474 SWITCHED" /> + <has_text text="rs1161312 17 C T 0.459 0.295 G A 0.408 FLIPPED" /> + </assert_contents> + </output> + <output_collection name="kingoutput_txt" count="1"> + <element name="grs" > + <assert_contents> + <has_text text="13292 NA07014 1.000 1.000 4.917 3.243 0.0006 0.9927" /> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="3" > <!-- association mapping 1 --> + <param name="genotype" value="new.6000.bed"/> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="dis_ass"> + <param name="tdt" value="true" /> + </section> + <param name="use_log" value="true" /> + <output name="kingoutlog" > + <assert_contents> + <has_text text="There are no parent-affected-offspring trios in the data." /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="3"> <!-- association mapping 2 --> + <param name="genotype" value="new.6000.bed"/> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="ass_mod"> + <param name="covariate" value="," /> + </section> + <section name="qnt_trt" > + <param name="mtscore" value="false"/> + </section> + <section name="ass_par"> + <param name="invnorm" value="true" /> + <param name="maxP" value="5e-8"/> + </section> + <param name="use_log" value="true" /> + <output name="kingoutlog" > + <assert_contents> + <has_text_matching expression="Inference\s+0\s+1\s+1\s+0" /> + </assert_contents> + </output> + <output_collection name="kingoutput_txt" count="3" > + <element name="allsegs" > + <assert_contents> + <has_text_matching expression="1\s+1\s+51\.\d+\s+95\.\d+\s+44\.\d+\s+294\s+rs7534689\s+rs1858111" /> + </assert_contents> + </element> + <element name="kin0" > + <assert_contents> + <has_text_matching expression="FID1\s+ID1\s+FID2\s+ID2\s+N_SNP\s+HetHet\s+IBS0\s+HetConc\s+HomIBS0\s+Kinship\s+IBD1Seg\s+IBD2Seg\s+PropIBD\s+InfType" /> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="4" > <!-- ancestry inferrence --> + <param name="genotype" value="new.6000.bed"/> + <section name="opt_int" > + <param name="family" value="new.6000.fam"/> + <param name="map" value="new.6000.bim"/> + </section> + <section name="pop_str"> + <param name="pca" value="true" /> + </section> + <section name="str_par" > + <param name="projection" value="1"/> + </section> + <section name="opt_par"> + <param name="rplot" value="true"/> + <param name="pngplot" value="true"/> + </section> + <param name="use_log" value="true" /> + <output name="kingoutlog"> + <assert_contents> + <has_text_matching expression="10\s+eigenvalues:\s+581\.\d+\s+114\.\d+\s+112\.\d+\s+111\.\d+\s+109\.\d+\s+109\.\d+\s+108\.\d+\s+108\.\d+\s+107\.\d+\s+107\.\d+" /> + </assert_contents> + </output> + <output_collection name="kingoutput_txt" count="3" > + <element name="pc"> + <assert_contents> + <has_text_matching expression="Y117\s+NA19239\s+0\s+0\s+1\s+1\s+.*" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="kingoutput_log" count="2" /> + <output_collection name="kingoutput_pdf" count="1" > + <element name="pcplot" value="king_pcplot.png" compare="sim_size" delta="2500" /> + </output_collection> + </test> </tests> <help><![CDATA[ - `KING <http://people.virginia.edu/~wc9c/KING/>`_ is a toolset that makes use of high-throughput SNP data typically seen in a genome-wide association study (GWAS) - or a sequencing project. Applications of KING include family relationship inference and pedigree error checking, - quality control, population substructure identification, forensics, gene mapping, etc. + `KING <http://people.virginia.edu/~wc9c/KING/>`_ is a toolset that makes use of + high-throughput SNP data typically seen in a genome-wide association study (GWAS) or a + sequencing project. Applications of KING include family relationship inference and + pedigree error checking, quality control, population substructure identification, + forensics, gene mapping, etc. ]]> </help> @@ -326,3 +696,4 @@ <citation type="doi">10.1093/bioinformatics/btq559</citation> </citations> </tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/subset_bed.sh Mon Apr 18 18:04:56 2022 +0000 @@ -0,0 +1,20 @@ +binary_file=ex.bed +target_markers=6000 +target_cols=$(( 6 + ( $target_markers * 2 ))) + +dec_prefix=out +subs_prefix=sub +new_prefix="new."${target_markers} + +##conda activate plink +## Extract binary file into new map, ped, and fam files +plink --bfile $(basename $binary_file .bed) --recode --out ${dec_prefix} + +## Subset ped and map +cat ${dec_prefix}.ped | cut -d' ' -f 1-${target_cols} > ${subs_prefix}.ped +head ${dec_prefix}.map -n ${target_markers} > ${subs_prefix}.map + +## Recode into new +plink --file ${subs_prefix} --out ${new_prefix} +echo ${new_prefix}* +##conda deactivate
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.model Mon Apr 18 18:04:56 2022 +0000 @@ -0,0 +1,9 @@ +SNP EA AF WT CHR POS OA +rs326468 A 0.131 1.702 6 32626272 C +rs6596679 G 0.095 1.801 6 32591213 A +rs1500238 T 0.076 1.367 6 32605884 C +rs664978 T 0.345 0.839 6 32583299 C +rs1161240 C 0.216 1.079 6 32450613 A +rs2602970 T 0.265 0.403 11 2182224 A +rs1161312 C 0.459 0.295 17 38066240 T +