diff snpSift_filter.xml @ 0:9e8280e19338 draft

Uploaded
author iuc
date Thu, 22 Jan 2015 08:39:07 -0500
parents
children 98708b88af9f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_filter.xml	Thu Jan 22 08:39:07 2015 -0500
@@ -0,0 +1,189 @@
+<tool id="snpSift_filter" name="SnpSift Filter" version="4.0.0">
+    <options sanitize="False" />
+    <description>Filter variants using arbitrary expressions</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse 
+        #if $filtering.mode == 'field':
+            #if $filtering.replace.pass:
+                --pass
+                #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0:
+                    --filterId "$filtering.replace.filterId"
+                #end if
+            #end if
+            #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0:
+                --addFilter "$filtering.addFilter"
+            #end if
+            #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0:
+                --rmFilter "$filtering.rmFilter"
+            #end if
+        #end if
+         > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param name="expr" type="text" label="Filter criteria" size="160" help="Need help? See below a few examples." />
+        <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" />
+        <conditional name="filtering">
+            <param name="mode" type="select" label="Filter mode">
+                <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option>
+                <option value="field">Change the FILTER field, but retain all entries</option>
+            </param> 
+            <when value="entries"/>
+            <when value="field">
+                <conditional name="replace">
+                    <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" 
+                           help="appends an ID tag to non-matching entry FILTER "/>
+                    <when value="no"/>
+                    <when value="yes">
+                        <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10"
+                               help="Default ID is 'SnpSift'"/>
+                    </when>
+                </conditional>
+                <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
+                <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
+            </when>
+        </conditional>
+    </inputs>
+    <configfiles>
+        <configfile name="exprFile">
+$expr#slurp
+        </configfile> 
+    </configfiles>
+
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="QUAL >= 50"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="28837706" />
+            <not_has_text text="NT_166464" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(CHROM = '19')"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="3205820" />
+            <not_has_text text="NT_16" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(POS >= 20175) &amp; (POS &lt;= 35549)"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="20175" />
+            <has_text text="35549" />
+            <has_text text="22256" />
+            <not_has_text text="18933" />
+            <not_has_text text="37567" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="( DP >= 5 )"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="DP=5;" />
+            <has_text text="DP=6;" />
+            <not_has_text text="DP=1;" />
+            </assert_contents>
+        </output>
+        </test>
+    </tests>
+    <help>
+
+**SnpSift filter**
+
+You can filter a VCF file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility.
+
+Some examples:
+
+  - *I want just the variants from the second million bases of chr1*:
+
+    ::
+
+    ( CHROM = 'chr1' ) &amp; ( POS &gt; 1000000 )  &amp; ( POS &lt; 2000000 )
+
+  - *Filter value is either 'PASS' or it is missing*:
+
+    ::
+
+    (FILTER = 'PASS') | ( na FILTER )  
+
+  - *I want to filter lines with an EFF of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*:
+
+    ::
+  
+    ( EFF[*].EFFECT = 'frameshift_variant' )
+
+  - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*:
+
+    ::
+  
+    ( EFF[*].EFFECT = 'FRAME_SHIFT' )
+
+  - *I want to filter out samples with quality less than 30*:
+
+    ::
+
+    ( QUAL &gt; 30 )
+
+  - *...but we also want InDels that have quality 20 or more*:
+
+    ::
+
+    (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )
+  
+  - *...or any homozygous variant present in more than 3 samples*:
+
+    ::
+
+    (countHom() > 3) | (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )
+  
+  - *...or any heterozygous sample with coverage 25 or more*:
+
+    ::
+
+    ((countHet() > 0) &amp; (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )
+  
+  - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:
+
+    ::
+  
+    (isHom( GEN[0] ) &amp; isVariant( GEN[0] ) &amp; isRef( GEN[1] ))
+
+
+**For information regarding HGVS and Sequence Ontology terms versus classic names**:
+
+  - http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology
+  - http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect
+
+
+@EXTERNAL_DOCUMENTATION@
+	http://snpeff.sourceforge.net/SnpSift.html#filter
+
+@CITATION_SECTION@
+
+    </help>
+</tool>