changeset 1:e9758eee6697 draft

Update to KggSeq v0.7_20150118
author crs4
date Tue, 28 Apr 2015 04:42:14 -0400
parents d388273fb83f
children e1a21c2f4997
files COPYING kggseq_variant_selection.xml tool_dependencies.xml
diffstat 3 files changed, 99 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/COPYING	Fri Sep 12 21:40:16 2014 -0400
+++ b/COPYING	Tue Apr 28 04:42:14 2015 -0400
@@ -1,7 +1,7 @@
-Copyright © 2013-2014 CRS4 Srl. http://www.crs4.it/
+Copyright © 2013-2015 CRS4 Srl. http://www.crs4.it/
 Created by:
 Paolo Uva <paolo.uva@crs4.it>
-Nicola Soranzo <nicola.soranzo@crs4.it>
+Nicola Soranzo <nicola.soranzo@tgac.ac.uk>
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the
--- a/kggseq_variant_selection.xml	Fri Sep 12 21:40:16 2014 -0400
+++ b/kggseq_variant_selection.xml	Tue Apr 28 04:42:14 2015 -0400
@@ -1,7 +1,7 @@
-<tool id="kggseq_variant_selection" name="Variant selection with KGGSeq" version="1.1">
+<tool id="kggseq_variant_selection" name="Variant selection with KGGSeq" version="1.2">
   <description></description>
   <requirements>
-    <requirement type="package" version="0.4_20140910">kggseq</requirement>
+    <requirement type="package" version="0.7_20150118">kggseq</requirement>
   </requirements>
   <command>
 java -jar \$KGGSEQ_JAR_PATH/kggseq.jar
@@ -19,49 +19,37 @@
 --db-gene $db_gene
 $composite_subject_id
 
-## Variant filters
+## Variant and genotype filters
 $pass_variant_only
-#if str($variant_filters.variant_filters_select) == "yes"
-  --seq-qual $variant_filters.seq_qual
-  --seq-mq $variant_filters.seq_mq
-  --seq-sb $variant_filters.seq_sb
-  --seq-fs $variant_filters.seq_fs
-  --min-heta $variant_filters.min_heta
-  --min-homa $variant_filters.min_homa
-  --min-hetu $variant_filters.min_hetu
-  --min-homu $variant_filters.min_homu
-  --min-obsa $variant_filters.min_obsa
-  --min-obsu $variant_filters.min_obsu
-  --min-obs $variant_filters.min_obs
-  #if str($variant_filters.hwe_control)
-    --hwe-control $variant_filters.hwe_control
-  #end if
-  #if str($variant_filters.hwe_case)
-    --hwe-case $variant_filters.hwe_case
+#if str($variant_genotype_filters.variant_genotype_filters_select) == "yes"
+  --seq-qual $variant_genotype_filters.seq_qual
+  --seq-mq $variant_genotype_filters.seq_mq
+  --seq-sb $variant_genotype_filters.seq_sb
+  --seq-fs $variant_genotype_filters.seq_fs
+  --min-heta $variant_genotype_filters.min_heta
+  --min-homa $variant_genotype_filters.min_homa
+  --min-hetu $variant_genotype_filters.min_hetu
+  --min-homu $variant_genotype_filters.min_homu
+  --min-obsa $variant_genotype_filters.min_obsa
+  --min-obsu $variant_genotype_filters.min_obsu
+  --min-obs $variant_genotype_filters.min_obs
+  #if str($variant_genotype_filters.hwe_control)
+    --hwe-control $variant_genotype_filters.hwe_control
   #end if
-  #if str($variant_filters.hwe_all)
-    --hwe-all $variant_filters.hwe_all
+  #if str($variant_genotype_filters.hwe_case)
+    --hwe-case $variant_genotype_filters.hwe_case
+  #end if
+  #if str($variant_genotype_filters.hwe_all)
+    --hwe-all $variant_genotype_filters.hwe_all
   #end if
+  --gty-qual $variant_genotype_filters.gty_qual
+  --gty-dp $variant_genotype_filters.gty_dp
+  --gty-sec-pl $variant_genotype_filters.gty_sec_pl
+  --gty-af-ref $variant_genotype_filters.gty_af_ref
+  --gty-af-het $variant_genotype_filters.gty_af_het
+  --gty-af-alt $variant_genotype_filters.gty_af_alt
 #else
-  --seq-qual 0
-  --seq-mq 0
-#end if
-
-## Genotype filters
-#if str($genotype_filters.genotype_filters_select) == "yes"
-  --gty-qual $genotype_filters.gty_qual
-  --gty-dp $genotype_filters.gty_dp
-  --gty-sec-pl $genotype_filters.gty_sec_pl
-  --gty-af-ref $genotype_filters.gty_af_ref
-  --gty-af-het $genotype_filters.gty_af_het
-  --gty-af-alt $genotype_filters.gty_af_alt
-#else
-  --gty-qual 0
-  --gty-dp 0
-  --gty-sec-pl 0
-  --gty-af-ref 1
-  --gty-af-het 0
-  --gty-af-alt 0
+  --no-qc
 #end if
 
 ## Genetic inheritance
@@ -75,6 +63,19 @@
   #end if
 #end if
 
+## Homozygosity - IBS - IBD filters
+#if str($hom_ibs_ibd_filters.hom_ibs_ibd_filters_select) == "yes"
+  #if str($hom_ibs_ibd_filters.homozygosity_case_filter)
+    --homozygosity-case-filter $hom_ibs_ibd_filters.homozygosity_case_filter
+  #end if
+  #if str($hom_ibs_ibd_filters.ibs_case_filter)
+    --ibs-case-filter $hom_ibs_ibd_filters.ibs_case_filter
+  #end if
+  #if str($hom_ibs_ibd_filters.ibd_annot) != 'None'
+    --ibd-annot $hom_ibs_ibd_filters.ibd_annot
+  #end if
+#end if
+
 ## Gene feature filters
 #if str($gene_feature_filters.gene_feature_filters_select) == "yes" and $gene_feature_filters.gene_features
   --gene-feature-in $gene_feature_filters.gene_features
@@ -120,6 +121,9 @@
 
 ## Add annotations
 #if str($add_annotations.add_annotations_select) == "yes"
+  #if str($add_annotations.o_flanking_seq)
+    --o-flanking-seq $add_annotations.o_flanking_seq
+  #end if
   $add_annotations.genome_annotation
   $add_annotations.omim_annotation
   $add_annotations.cosmic_annotation
@@ -138,18 +142,19 @@
   <inputs>
     <param name="inputFile" type="data" format="vcf" label="VCF Variant file (--vcf-file)" help="Coordinates must refer to hg19" />
     <param name="pedFile" type="data" format="tabular" label="Pedigree (--ped-file)" />
+    <param name="composite_subject_id" type="boolean" truevalue="--composite-subject-id" falsevalue="" checked="false" label="Composite subject ID (--composite-subject-id)" />
     <param name="db_gene" type="select" display="checkboxes" multiple="true" label="Database(s) to annotate and filter variants (--db-gene)">
       <option value="refgene" selected="true">refgene: The RefGene database compiled by UCSC from hg19 refGene. Note: RefSeq has NO mitochondria gene definition</option>
-      <option value="gencode">gencode: The GENCODE gene sets. Note: GECODE contains similar number of coding genes but more transcripts than RefGene. It HAS the mitochondria gene definition</option>
+      <option value="gencode">gencode: The GENCODE gene sets. Note: GENCODE contains similar number of coding genes but more transcripts than RefGene. It HAS the mitochondria gene definition</option>
       <option value="knowngene">knowngene: The UCSC knonwGene datasase compiled by UCSC from hg19 knownGene</option>
+      <option value="ensembl">ensembl: The Ensembl gene datasase compiled by UCSC from hg19 ensGene</option>
       <validator type="no_options" message="Select at least one database" />
     </param>
-    <param name="composite_subject_id" type="boolean" truevalue="--composite-subject-id" falsevalue="" checked="false" label="Composite subject ID (--composite-subject-id)" />
     <param name="pass_variant_only" type="boolean" truevalue="--vcf-filter-in PASS" falsevalue="" checked="true" label="Don't include filtered loci (--vcf-filter-in PASS)" help="Keep only variants with FILTER equal to PASS" />
 
-    <!-- Variant quality control -->
-    <conditional name="variant_filters">
-      <param name="variant_filters_select" type="select" label="Specify variant quality filters?">
+    <!-- Variant and genotype quality control -->
+    <conditional name="variant_genotype_filters">
+      <param name="variant_genotype_filters_select" type="select" label="Specify variant and genotype quality filters?">
         <option value="yes">Yes</option>
         <option value="no" selected="true">No</option>
       </param>
@@ -158,27 +163,16 @@
         <param name="seq_mq" type="integer" value="20" label="Minimum overall Phred-scaled mapping quality score for the variant (--seq-mq)" />
         <param name="seq_sb" type="float" value="-10" label="Maximal overall strand bias score for the variant (--seq-sb)" />
         <param name="seq_fs" type="integer" value="60" label="Maximal overall strand bias Phred-scaled p-value (using Fisher's exact test) for the variant (--seq-fs)" />
-        <param name="min_heta" type="integer" value="1" label="Minimal observed number of heterozygote genotypes in cases (the affected) (--min-heta)" />
-        <param name="min_homa" type="integer" value="1" label="Minimal observed number of alternate homozygote genotypes in cases (the affected) (--min-homa)" />
-        <param name="min_hetu" type="integer" value="1" label="Minimal observed number of heterozygote genotypes in controls (the unaffected) (--min-hetu)" />
-        <param name="min_homu" type="integer" value="1" label="Minimal observed number of alternate homozygote genotypes in controls (the unaffected) (--min-homu)" />
-        <param name="min_obsa" type="integer" value="1" label="Minimal observed number of non-missing genotypes in cases (the affected) (--min-obsa)" />
-        <param name="min_obsu" type="integer" value="1" label="Minimal observed number of non-missing genotypes in controls (the unaffected) (--min-obsu)" />
-        <param name="min_obs" type="integer" value="2" label="Minimal observed number of non-missing genotypes in all samples (--min-obs)" />
+        <param name="min_heta" type="integer" value="0" label="Minimal observed number of heterozygote genotypes in cases (the affected) (--min-heta)" />
+        <param name="min_homa" type="integer" value="0" label="Minimal observed number of alternate homozygote genotypes in cases (the affected) (--min-homa)" />
+        <param name="min_hetu" type="integer" value="0" label="Minimal observed number of heterozygote genotypes in controls (the unaffected) (--min-hetu)" />
+        <param name="min_homu" type="integer" value="0" label="Minimal observed number of alternate homozygote genotypes in controls (the unaffected) (--min-homu)" />
+        <param name="min_obsa" type="integer" value="0" label="Minimal observed number of non-missing genotypes in cases (the affected) (--min-obsa)" />
+        <param name="min_obsu" type="integer" value="0" label="Minimal observed number of non-missing genotypes in controls (the unaffected) (--min-obsu)" />
+        <param name="min_obs" type="integer" value="1" label="Minimal observed number of non-missing genotypes in all samples (--min-obs)" />
         <param name="hwe_control" type="float" value="" optional="true" label="Exclude variants in controls with the Hardy-Weinberg test p-value &lt;= this value (--hwe-control)" />
         <param name="hwe_case" type="float" value="" optional="true" label="Exclude variants in cases with the Hardy-Weinberg test p-value &lt;= this value (--hwe-case)" />
         <param name="hwe_all" type="float" value="" optional="true" label="Exclude variants in all subjects with the Hardy-Weinberg test p-value &lt;= this value (--hwe-all)" />
-      </when>
-      <when value="no" />
-    </conditional>
-
-    <!-- Genotype quality control -->
-    <conditional name="genotype_filters">
-      <param name="genotype_filters_select" type="select" label="Specify genotype quality filters?">
-        <option value="yes">Yes</option>
-        <option value="no" selected="true">No</option>
-      </param>
-      <when value="yes">
         <param name="gty_qual" type="integer" value="10" label="Minimum Phred-scaled genotyping quality (--gty-qual)" />
         <param name="gty_dp" type="integer" value="4" label="Minimal read depth per genotype (--gty-dp)" />
         <param name="gty_sec_pl" type="integer" value="20" label="Minimal value for second smallest normalized Phred-scaled genotype quality (--gty-sec-pl)" />
@@ -229,6 +223,20 @@
       <when value="no" />
     </conditional>
 
+    <!-- Homozygosity filtering -->
+    <conditional name="hom_ibs_ibd_filters">
+      <param name="hom_ibs_ibd_filters_select" type="select" label="Specify homozygosity filters?">
+        <option value="yes">Yes</option>
+        <option value="no" selected="true">No</option>
+      </param>
+      <when value="yes">
+        <param name="homozygosity_case_filter" type="integer" value="" optional="true" label="Filter by Runs of Homozygosity (ROH) (--homozygosity-case-filter)" help="Minimal length (in kb) of consecutive homozygous genotype for each interesting variant" />
+        <param name="ibs_case_filter" type="integer" value="" optional="true" label="Filter by Identical by State (IBS) (--ibs-case-filter)" help="Minimal length (in kb) of the region in which there is at least one allele identical among all cases" />
+        <param name="ibd_annot" type="data" format="txt,tabular,bed" optional="true" label="Add Identical by Descent (IBD) annotation (--ibd-annot)" help="File with IBD or significant linkage regions. Variants within these regions will be highlighted. Note: title line CHR START END is needed" />
+      </when>
+      <when value="no" />
+    </conditional>
+
     <!-- Gene feature filtering -->
     <conditional name="gene_feature_filters">
       <param name="gene_feature_filters_select" type="select" label="Specify gene feature filters?">
@@ -252,29 +260,37 @@
           <option value="12">Downstream of transcription end site</option>
           <option value="13">ncRNA</option>
           <option value="14">Intergenic</option>
-          <option value="15">Unknown</option>
+          <option value="15">Monomorphic</option>
+          <option value="16">Unknown</option>
         </param>
         <param name="splicing" type="integer" value="2" label="Window size in base-pair around the splicing junction to flag the variant as splicing (--splicing)" />
-        <param name="neargene" type="integer" value="1000" label="Size of region upstream and downstream (--neargene)" />
+        <param name="neargene" type="integer" value="1000" label="Size in base-pair of region upstream and downstream (--neargene)" />
       </when>
       <when value="no" />
     </conditional>
 
     <!-- Allele frequency filtering -->
     <conditional name="allele_freq_filters">
-      <param name="allele_freq_filters_select" type="select" label="Specify common variants filters?">
+      <param name="allele_freq_filters_select" type="select" label="Specify filters by allele frequency?">
         <option value="yes">Yes</option>
         <option value="no" selected="true">No</option>
       </param>
       <when value="yes">
         <param name="allele_freq_db" type="select" display="checkboxes" multiple="true" label="Select databases for allelic frequency filtering (--db-filter)">
-          <option value="hg19_1kg201305" selected="true">hg19_1kg201305: 1000 Genomes Project 2013 May release</option>
-          <option value="hg19_1kg201204">hg19_1kg201204: 1000 Genomes Project 2012 April release</option>
-          <option value="hg19_ESP6500AA" selected="true">hg19_ESP6500AA: African American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option>
-          <option value="hg19_ESP6500EA" selected="true">hg19_ESP6500EA: European American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option>
-          <option value="hg19_dbsnp141" selected="true">hg19_dbsnp141: dbSNP version 141</option>
-          <option value="hg19_dbsnp138">hg19_dbsnp138: dbSNP version 138</option>
-          <option value="hg19_dbsnp137">hg19_dbsnp137: dbSNP version 137</option>
+          <option value="1kg201305" selected="true">1KG 201305: 1000 Genomes Project 2013 May release</option>
+          <option value="1kg201204">1KG 201204: 1000 Genomes Project 2012 April release</option>
+          <option value="1kgafr201204">1KG 201204 AFR: 1000 Genomes Project 2012 April release - African</option>
+          <option value="1kgeur201204">1KG 201204 EUR: 1000 Genomes Project 2012 April release - European</option>
+          <option value="1kgamr201204">1KG 201204 AMR: 1000 Genomes Project 2012 April release - Mixed American</option>
+          <option value="1kgasn201204">1KG 201204 ASN: 1000 Genomes Project 2012 April release - Asian</option>
+          <option value="ESP6500AA" selected="true">ESP6500AA: African American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option>
+          <option value="ESP6500EA" selected="true">ESP6500EA: European American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option>
+          <option value="dbsnp141" selected="true">dnSNP 141</option>
+          <option value="dbsnp138">dbSNP 138</option>
+          <option value="dbsnp138nf">dbSNP 138nf: dbSNP version 138 without the flagged SNPs by UCSC. Flagged SNPs include SNPs clinically associated by dbSNP, mapped to a single location in the reference genome assembly, and not known to have a minor allele frequency of at least 1%</option>
+          <option value="dbsnp137">dbSNP 137</option>
+          <option value="dbsnp135">dbSNP 135</option>
+          <option value="exac">Exome Aggregation Consortium (ExAC): Variants from 61,486 unrelated individuals sequenced as part of various disease-specific and population genetic studies</option>
         </param>
         <param name="rare_allele_freq" type="float" value="0.01" label="Minor allele frequency (MAF) for selecting rare variants (--rare-allele-freq)" help="Set to &gt;1 to only annotate MAF, without filtering" />
       </when>
@@ -314,7 +330,7 @@
         <option value="no" selected="true">No</option>
       </param>
       <when value="yes">
-        <param name="filter_nondisease_variant" type="boolean" truevalue="--filter-nondisease-variant" falsevalue="" checked="true" label="Filter out variants predicted to be non-disease causal (--filter-nondisease-variant)" />
+        <param name="filter_nondisease_variant" type="boolean" truevalue="--filter-nondisease-variant" falsevalue="" checked="false" label="Filter out variants predicted to be non-disease causal (--filter-nondisease-variant)" />
         <param name="mendel_causing" type="boolean" truevalue="--mendel-causing-predict all" falsevalue="" checked="true" label="Predict Mendelian disease-causing variants by logistic regression model (--mendel-causing-predict)" />
       </when>
       <when value="no" />
@@ -327,10 +343,11 @@
         <option value="no" selected="true">No</option>
       </param>
       <when value="yes">
+        <param name="o_flanking_seq" type="integer" value="" label="Size in bp of flanking sequence to extract (--o-flanking-seq)" />
         <param name="genome_annotation" type="boolean" truevalue="--genome-annot" falsevalue="" checked="true" label="Add genomic functional annotations (presudogenes, TFBS, enhancer, UniProt) (--genome-annot)" />
         <param name="omim_annotation" type="boolean" truevalue="--omim-annot" falsevalue="" checked="true" label="Add OMIM annotation (--omim-annot)" />
         <param name="cosmic_annotation" type="boolean" truevalue="--cosmic-annot" falsevalue="" checked="true" label="Add COSMIC annotation (--cosmic-annot)" />
-        <param name="pubmed_type" type="select" label="Text mining in PubMed: find co-mentions of the search terms specified below with:">
+        <param name="pubmed_type" type="select" label="Text mining in PubMed: find co-mentions of the search terms specified below with">
           <option value="--pubmed-mining">The cytogenetic position of each variant (--pubmed-mining)</option>
           <option value="--pubmed-mining-gene">The gene in which each variant is located (--pubmed-mining-gene)</option>
         </param>
--- a/tool_dependencies.xml	Fri Sep 12 21:40:16 2014 -0400
+++ b/tool_dependencies.xml	Tue Apr 28 04:42:14 2015 -0400
@@ -1,11 +1,12 @@
 <?xml version="1.0"?>
 <tool_dependency>
-  <package name="kggseq" version="0.4_20140910">
+  <package name="kggseq" version="0.7_20150118">
     <install version="1.0">
       <actions>
-        <action type="download_by_url" target_filename="kggseq_archive-0.4_20140910.tar.gz">https://github.com/nsoranzo/kggseq_archive/archive/v0.4_20140910.tar.gz</action>
+        <action type="download_by_url" target_filename="kggseq_archive-0.7_20150118.tar.gz">https://github.com/crs4/kggseq_archive/archive/v0.7_20150118.tar.gz</action>
         <action type="shell_command">touch test.vcf</action>
-        <action type="shell_command">java -jar kggseq.jar --no-lib-check --resource resources --buildver hg19 --db-filter hg19_1kg201305,hg19_1kg201204,hg19_ESP6500AA,hg19_ESP6500EA,hg19_dbsnp141,hg19_dbsnp138,hg19_dbsnp137 --genome-annot --db-gene refgene,gencode,knowngene --db-score dbnsfp --superdup-annot --vcf-file test.vcf</action>
+        <action type="shell_command">java -jar kggseq.jar --no-lib-check --resource resources --buildver hg19 --db-filter 1kg201305,1kg201204,1kgafr201204,1kgeur201204,1kgamr201204,1kgasn201204,dbsnp135,dbsnp137,dbsnp138,dbsnp138nf,dbsnp141,ESP6500AA,ESP6500EA,exac --genome-annot --db-gene refgene,gencode,knowngene,ensembl --db-score dbnsfp --superdup-annot --cosmic-annot --vcf-file test.vcf</action>
+        <action type="shell_command">java -Xmx4g -jar kggseq.jar --no-lib-check --no-qc --resource resources --buildver hg19 --o-flanking-seq 10 --vcf-file examples/rare.disease.hg19.vcf</action>
         <action type="move_directory_files">
           <source_directory>.</source_directory>
           <destination_directory>$INSTALL_DIR</destination_directory>