diff snpEff.xml @ 8:13b6ad2ddace

SnpEffect v3.2
author Jim Johnson <jj@umn.edu>
date Mon, 13 May 2013 12:45:07 -0500
parents b26a1aff7f81
children 937367efb1da
line wrap: on
line diff
--- a/snpEff.xml	Thu Mar 28 12:29:45 2013 -0500
+++ b/snpEff.xml	Mon May 13 12:45:07 2013 -0500
@@ -1,12 +1,67 @@
-<tool id="snpEff" name="SnpEff" version="3.1">
+<tool id="snpEff" name="SnpEff" version="3.2">
 	<description>Variant effect and annotation</description>
 	<!-- 
 	    You will need to change the path to wherever your installation is.
 		You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
 	<command>java -Xmx6G -jar /path/to/your/snpEff/snpEff.jar eff -c /path/to/your/snpEff/snpEff/snpEff.config $inputFormat $offset -upDownStreamLen $udLength $filterIn $filterHomHet -no $filterOut -stats $statsFile $genomeVersion $input > $output </command>
+Options:
+        -a , -around            : Show N codons and amino acids around change (only in coding regions). Default is 0 codons.
+        -i <format>             : Input format [ vcf, txt, pileup, bed ]. Default: VCF.
+        -o <format>             : Ouput format [ txt, vcf, gatk, bed, bedAnn ]. Default: VCF.
+        -interval               : Use a custom interval file (you may use this option many times)
+        -chr <string>           : Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output.
+        -s,  -stats             : Name of stats file (summary). Default is 'snpEff_summary.html'
+        -t                      : Use multiple threads (implies '-noStats'). Default 'off'
+
+Sequence change filter options:
+        -del                    : Analyze deletions only
+        -ins                    : Analyze insertions only
+        -hom                    : Analyze homozygous variants only
+        -het                    : Analyze heterozygous variants only
+        -minQ X, -minQuality X  : Filter out variants with quality lower than X
+        -maxQ X, -maxQuality X  : Filter out variants with quality higher than X
+        -minC X, -minCoverage X : Filter out variants with coverage lower than X
+        -maxC X, -maxCoverage X : Filter out variants with coverage higher than X
+        -nmp                    : Only MNPs (multiple nucleotide polymorphisms)
+        -snp                    : Only SNPs (single nucleotide polymorphisms)
+
+Results filter options:
+        -fi  <bedFile>                  : Only analyze changes that intersect with the intervals specified in this file (you may use this option many times)
+        -no-downstream                  : Do not show DOWNSTREAM changes
+        -no-intergenic                  : Do not show INTERGENIC changes
+        -no-intron                      : Do not show INTRON changes
+        -no-upstream                    : Do not show UPSTREAM changes
+        -no-utr                         : Do not show 5_PRIME_UTR or 3_PRIME_UTR changes
+
+Annotations options:
+        -cancer                         : Perform 'cancer' comparissons (Somatic vs Germline). Default: false
+        -canon                          : Only use canonical transcripts.
+        -geneId                         : Use gene ID instead of gene name (VCF output). Default: false
+        -hgvs                           : Use HGVS annotations for amino acid sub-field. Default: false
+        -lof                            : Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.
+        -reg <name>                     : Regulation track to use (this option can be used add several times).
+        -oicr                           : Add OICR tag in VCF file. Default: false
+        -onlyReg                        : Only use regulation tracks.
+        -onlyTr <file.txt>              : Only use the transcripts in this file. Format: One transcript ID per line.
+        -sequenceOntolgy                : Use Sequence Ontolgy terms. Default: false
+        -ss, -spliceSiteSize <int>      : Set size for splice sites (donor and acceptor) in bases. Default: 2
+        -ud, -upDownStreamLen <int>     : Set upstream downstream interval length (in bases)
+
+Generic options:
+        -0                      : File positions are zero-based (same as '-inOffset 0 -outOffset 0')
+        -1                      : File positions are one-based (same as '-inOffset 1 -outOffset 1')
+        -c , -config            : Specify config file
+        -h , -help              : Show this help and exit
+        -if, -inOffset          : Offset input by a number of bases. E.g. '-inOffset 1' for one-based input files
+        -of, -outOffset         : Offset output by a number of bases. E.g. '-outOffset 1' for one-based output files
+        -noLog                  : Do not report usage statistics to server
+        -noStats                : Do not create stats (summary) file
+        -q , -quiet             : Quiet mode (do not show any messages or errors)
+        -v , -verbose           : Verbose mode
+
 	-->
 	<requirements>
-		<requirement type="package" version="3.1">snpEff</requirement>
+		<requirement type="package" version="3.2">snpEff</requirement>
 	</requirements>
 	<command>
 SNPEFF_DATA_DIR=`grep '^data_dir' \$JAVA_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`;
@@ -14,14 +69,28 @@
 then java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar download  -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion ;
 fi";
 java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar eff -c \$JAVA_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength 
+#if $spliceSiteSize and $spliceSiteSize.__str__ != '':
+  -spliceSiteSize $spliceSiteSize
+#end if
 #if $filterIn and $filterIn.__str__ != 'no_filter':
   -$filterIn 
 #end if
 #if $filterHomHet and $filterHomHet.__str__ != 'no_filter':
   -$filterHomHet 
 #end if
+#if $annotations and $annotations.__str__ != '':
+  -#slurp
+  #echo ' -'.join($annotations.__str__.split(','))
+#end if
 #if $filterOut and $filterOut.__str__ != '':
-  #echo ' '.join($filterOut.__str__.split(','))
+  -#slurp
+  #echo ' -'.join($filterOut.__str__.split(','))
+#end if
+#if str( $transcripts ) != 'None':
+  -onlyTr $transcripts
+#end if
+#if str( $intervals ) != 'None':     ### fix this for multiple dataset input
+  -interval $intervals
 #end if
 #if $statsFile:
   -stats $statsFile 
@@ -29,26 +98,30 @@
 #if $offset.__str__ != '':
   -${offset} 
 #end if
-  $genomeVersion $input > $snpeff_output 
+#if $chr.__str__.strip() != '':
+  -chr "$chr" 
+#end if
+  $noLog $genomeVersion $input > $snpeff_output 
 </command>
 	<inputs>
 		<param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>
 
 		<param name="inputFormat" type="select" label="Input format">
-			<option value="vcf">VCF</option>
-			<option value="txt">Tabular</option>
-			<option value="pileup">Pileup</option>
-			<option value="bed">BED</option>
+			<option value="vcf" selected="true">VCF</option>
+			<option value="txt">Tabular (Deprecated)</option>
+			<option value="pileup">Pileup (Deprecated)</option>
+			<option value="bed">BED (Deprecated)</option>
 		</param>
 
 		<param name="outputFormat" type="select" label="Output format">
+			<option value="vcf" selected="true">VCF (only if input is VCF)</option>
 			<option value="txt">Tabular</option>
-			<option value="vcf">VCF (only if input is VCF)</option>
 			<option value="bed">BED</option>
 			<option value="bedAnn">BED Annotations</option>
 		</param>
 
 		<param name="genomeVersion" type="select" label="Genome">
+                        <!--GENOME	DESCRIPTION-->
 			<options from_file="snpeffect_genomedb.loc">
 				<column name="name" index="1"/>
 				<column name="value" index="0"/>
@@ -66,40 +139,82 @@
 			<option value="20000">20000 bases</option>
 		</param>
 
-        <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes">
+		<param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases. Default: 2">
+			<option value="1">1 base</option>
+			<option value="2">2 bases</option>
+			<option value="3">3 bases</option>
+			<option value="4">4 bases</option>
+			<option value="5">5 bases</option>
+			<option value="6">6 bases</option>
+			<option value="7">7 bases</option>
+			<option value="8">8 bases</option>
+			<option value="9">9 bases</option>
+		</param>
+
+        	<param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes">
 			<option value="no_filter" selected="true">No filter (analyze everything)</option>
 			<option value="hom">Analyze homozygous sequence changes only </option>
 			<option value="het">Analyze heterozygous sequence changes only </option>
-        </param>
+        	</param>
 
-        <param name="filterIn" type="select" display="radio" label="Filter sequence changes">
+                <!-- The tool testing code can not handle select,radio,checkbox values that start with '-', so the '-' is added in the command generation -->
+        	<param name="filterIn" type="select" display="radio" label="Filter sequence changes">
 			<option value="no_filter" selected="true">No filter (analyze everything)</option>
 			<option value="del">Analyze deletions only </option>
 			<option value="ins">Analyze insertions only </option>
-			<option value="nmp">Only MNPs (multiple nucleotide polymorphisms) </option>
+			<option value="mnp">Only MNPs (multiple nucleotide polymorphisms) </option>
 			<option value="snp">Only SNPs (single nucleotide polymorphisms) </option>
-        </param>
+        	</param>
+
+        	<param name="annotations" type="select" display="checkboxes" multiple="true" optional="true" label="Annotation options">
+			<option value="cancer">Perform 'cancer' comparissons (Somatic vs Germline). Default: false</option>
+                        <option value="canon">Only use canonical transcripts.</option>
+                        <option value="geneId">Use gene ID instead of gene name (VCF output). Default: false</option>
+                        <option value="hgvs">Use HGVS annotations for amino acid sub-field. Default: false</option>
+                        <option value="lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.</option>
+                        <option value="oicr">Add OICR tag in VCF file. Default: false</option>
+                        <option value="onlyReg">Only use regulation tracks.</option>
+                        <option value="sequenceOntolgy">Use Sequence Ontolgy terms. Default: false</option>
+        	</param>
 
-        <param name="filterOut" type="select" display="checkboxes" multiple="true" optional="true" label="Filter output">
-			<option value="-no-downstream">Do not show DOWNSTREAM changes </option>
-			<option value="-no-intergenic">Do not show INTERGENIC changes </option>
-			<option value="-no-intron">Do not show INTRON changes </option>
-			<option value="-no-upstream">Do not show UPSTREAM changes </option>
-			<option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option>
-        </param>
+        	<param name="regulation" type="select" display="checkboxes" multiple="true" optional="true" label="Non-coding and regulatory Annotation">
+                       <help>These are available for only a few genomes</help>
+                       <!--GENOME	REG_NAME  -->
+                       <options from_file="snpeffect_regulationdb.loc">
+                                <column name="name" index="1"/>
+                                <column name="value" index="0"/>
+				<filter type="param_value" ref="genomeVersion" key="name" column="1" />
+                        </options>
+        	</param>
 
-        <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
+        	<param name="intervals" format="bed" type="data" optional="true" label="Use custom interval file for annotation"/>
+        	<param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file. Format: One transcript ID per line."/>
+
+        	<param name="filterOut" type="select" display="checkboxes" multiple="true" optional="true" label="Filter output">
+			<option value="no-downstream">Do not show DOWNSTREAM changes </option>
+			<option value="no-intergenic">Do not show INTERGENIC changes </option>
+			<option value="no-intron">Do not show INTRON changes </option>
+			<option value="no-upstream">Do not show UPSTREAM changes </option>
+			<option value="no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option>
+        	</param>
+
+        	<param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
 			<option value="" selected="true">Use default (based on input type)</option>
 			<option value="0">Force zero-based positions (both input and output)</option>
 			<option value="1">Force one-based positions (both input and output)</option>
 		</param>
-        <param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/>
+        	<param name="chr" type="text" optionl="true" label="Text to prepend to chromosome name" help="By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'.  You can prepend any string you want to the chromosome name.">
+                                       <validator type="regex" message="No whitespace allows">^\S*$</validator>
+ 
+		</param>
+        	<param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/>
+        	<param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Do not report usage statistics to server"/>
 	</inputs>
 	<outputs>
-		<data format="tabular" name="snpeff_output" >
+		<data format="vcf" name="snpeff_output" >
 			<change_format>
+				<when input="outputFormat" value="vcf" format="vcf" />
 				<when input="outputFormat" value="txt" format="tabular" />
-				<when input="outputFormat" value="vcf" format="vcf" />
 				<when input="outputFormat" value="bed" format="bed" />
 				<when input="outputFormat" value="bedAnn" format="bed" />
 			</change_format>
@@ -110,8 +225,8 @@
                 </data>
 	</outputs>
         <stdio>
-          <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
           <exit_code range="1:"  level="fatal"   description="Error" />
+          <exit_code range="-1"  level="fatal"   description="Error: Cannot open file" />
         </stdio>
         <tests>
             <test>
@@ -122,20 +237,26 @@
                 <param name="udLength" value="0"/>
                 <param name="filterHomHet" value="no_filter"/>
                 <param name="filterIn" value="no_filter"/>
-                <param name="filterOut" value="-no-upstream"/>
+                <param name="generate_stats" value="False"/>
+                <!--
+                <param name="filterOut" value="no-upstream"/>
+                -->
                 <output name="snpeff_output">
                     <assert_contents>
                         <!-- Check that an effect was added -->
-                        <has_text text="EFF=NON_SYNONYMOUS_CODING" />
+                        <has_text text="EFF=" />
                     </assert_contents>
                 </output>
+                        <!-- Check for a HTML header indicating that this was successful -->
+                <!--
                 <output name="statsFile">
                     <assert_contents>
-                        <!-- Check for a HTML header indicating that this was successful -->
                         <has_text text="SnpEff: Variant analysis" />
                     </assert_contents>
                 </output>
+                --> 
             </test>
+
             <test>
                 <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
                 <param name="inputFormat" value="vcf"/>
@@ -184,7 +305,7 @@
                 <param name="udLength" value="0"/>
                 <param name="filterHomHet" value="no_filter"/>
                 <param name="filterIn" value="no_filter"/>
-                <param name="filterOut" value="-no-upstream"/>
+                <param name="filterOut" value="no-upstream"/>
                 <param name="generate_stats" value="False"/>
                 <output name="snpeff_output">
                     <assert_contents>