view snpEff.xml @ 7:b26a1aff7f81

Fix issues with snpEff commandline template and add test cases
author Jim Johnson <jj@umn.edu>
date Thu, 28 Mar 2013 12:29:45 -0500
parents 3b0c657b852b
children 13b6ad2ddace
line wrap: on
line source

<tool id="snpEff" name="SnpEff" version="3.1">
	<description>Variant effect and annotation</description>
	<!-- 
	    You will need to change the path to wherever your installation is.
		You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
	<command>java -Xmx6G -jar /path/to/your/snpEff/snpEff.jar eff -c /path/to/your/snpEff/snpEff/snpEff.config $inputFormat $offset -upDownStreamLen $udLength $filterIn $filterHomHet -no $filterOut -stats $statsFile $genomeVersion $input > $output </command>
	-->
	<requirements>
		<requirement type="package" version="3.1">snpEff</requirement>
	</requirements>
	<command>
SNPEFF_DATA_DIR=`grep '^data_dir' \$JAVA_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`;
eval "if [ ! -e \$SNPEFF_DATA_DIR/$genomeVersion ] ;
then java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar download  -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion ;
fi";
java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar eff -c \$JAVA_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength 
#if $filterIn and $filterIn.__str__ != 'no_filter':
  -$filterIn 
#end if
#if $filterHomHet and $filterHomHet.__str__ != 'no_filter':
  -$filterHomHet 
#end if
#if $filterOut and $filterOut.__str__ != '':
  #echo ' '.join($filterOut.__str__.split(','))
#end if
#if $statsFile:
  -stats $statsFile 
#end if
#if $offset.__str__ != '':
  -${offset} 
#end if
  $genomeVersion $input > $snpeff_output 
</command>
	<inputs>
		<param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>

		<param name="inputFormat" type="select" label="Input format">
			<option value="vcf">VCF</option>
			<option value="txt">Tabular</option>
			<option value="pileup">Pileup</option>
			<option value="bed">BED</option>
		</param>

		<param name="outputFormat" type="select" label="Output format">
			<option value="txt">Tabular</option>
			<option value="vcf">VCF (only if input is VCF)</option>
			<option value="bed">BED</option>
			<option value="bedAnn">BED Annotations</option>
		</param>

		<param name="genomeVersion" type="select" label="Genome">
			<options from_file="snpeffect_genomedb.loc">
				<column name="name" index="1"/>
				<column name="value" index="0"/>
			</options>
		</param>

		<param name="udLength" type="select" label="Upstream / Downstream length">
			<option value="0">No upstream / downstream intervals (0 bases)</option>
			<option value="200">200 bases</option>
			<option value="500">500 bases</option>
			<option value="1000">1000 bases</option>
			<option value="2000">2000 bases</option>
			<option value="5000" selected="true">5000 bases</option>
			<option value="10000">10000 bases</option>
			<option value="20000">20000 bases</option>
		</param>

        <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes">
			<option value="no_filter" selected="true">No filter (analyze everything)</option>
			<option value="hom">Analyze homozygous sequence changes only </option>
			<option value="het">Analyze heterozygous sequence changes only </option>
        </param>

        <param name="filterIn" type="select" display="radio" label="Filter sequence changes">
			<option value="no_filter" selected="true">No filter (analyze everything)</option>
			<option value="del">Analyze deletions only </option>
			<option value="ins">Analyze insertions only </option>
			<option value="nmp">Only MNPs (multiple nucleotide polymorphisms) </option>
			<option value="snp">Only SNPs (single nucleotide polymorphisms) </option>
        </param>

        <param name="filterOut" type="select" display="checkboxes" multiple="true" optional="true" label="Filter output">
			<option value="-no-downstream">Do not show DOWNSTREAM changes </option>
			<option value="-no-intergenic">Do not show INTERGENIC changes </option>
			<option value="-no-intron">Do not show INTRON changes </option>
			<option value="-no-upstream">Do not show UPSTREAM changes </option>
			<option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option>
        </param>

        <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
			<option value="" selected="true">Use default (based on input type)</option>
			<option value="0">Force zero-based positions (both input and output)</option>
			<option value="1">Force one-based positions (both input and output)</option>
		</param>
        <param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/>
	</inputs>
	<outputs>
		<data format="tabular" name="snpeff_output" >
			<change_format>
				<when input="outputFormat" value="txt" format="tabular" />
				<when input="outputFormat" value="vcf" format="vcf" />
				<when input="outputFormat" value="bed" format="bed" />
				<when input="outputFormat" value="bedAnn" format="bed" />
			</change_format>
		</data>
                
		<data format="html" name="statsFile">
                    <filter>generate_stats == True</filter>
                </data>
	</outputs>
        <stdio>
          <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
          <exit_code range="1:"  level="fatal"   description="Error" />
        </stdio>
        <tests>
            <test>
                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
                <param name="inputFormat" value="vcf"/>
                <param name="outputFormat" value="vcf"/>
                <param name="genomeVersion" value="testCase"/>
                <param name="udLength" value="0"/>
                <param name="filterHomHet" value="no_filter"/>
                <param name="filterIn" value="no_filter"/>
                <param name="filterOut" value="-no-upstream"/>
                <output name="snpeff_output">
                    <assert_contents>
                        <!-- Check that an effect was added -->
                        <has_text text="EFF=NON_SYNONYMOUS_CODING" />
                    </assert_contents>
                </output>
                <output name="statsFile">
                    <assert_contents>
                        <!-- Check for a HTML header indicating that this was successful -->
                        <has_text text="SnpEff: Variant analysis" />
                    </assert_contents>
                </output>
            </test>
            <test>
                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
                <param name="inputFormat" value="vcf"/>
                <param name="outputFormat" value="vcf"/>
                <param name="genomeVersion" value="testCase"/>
                <param name="udLength" value="0"/>
                <param name="filterHomHet" value="het"/>
                <param name="filterIn" value="no_filter"/>
                <!--
                <param name="filterOut" value=""/>
                -->
                <param name="generate_stats" value="False"/>
                <output name="snpeff_output">
                    <assert_contents>
                        <!-- Check that NO effects were added since -het is set -->
                        <not_has_text text="EFF=NON_SYNONYMOUS_CODING" />
                    </assert_contents>
                </output>
            </test>
            <test>
                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
                <param name="inputFormat" value="vcf"/>
                <param name="outputFormat" value="vcf"/>
                <param name="genomeVersion" value="testCase"/>
                <param name="udLength" value="0"/>
                <param name="filterHomHet" value="no_filter"/>
                <param name="filterIn" value="del"/>
                <!--
                <param name="filterOut" value=""/>
                -->
                <param name="generate_stats" value="False"/>
                <output name="snpeff_output">
                    <assert_contents>
                        <!-- Check that deleletions were evaluated -->
                        <has_text_matching expression="Y\t59030478\t.*EFF=INTERGENIC" />
                        <!-- Check that insertion on last line was NOT evaluated -->
                        <has_text_matching expression="Y\t59032947\t.*SF=5\tGT" />
                    </assert_contents>
                </output>
            </test>
            <test>
                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
                <param name="inputFormat" value="vcf"/>
                <param name="outputFormat" value="vcf"/>
                <param name="genomeVersion" value="testCase"/>
                <param name="udLength" value="0"/>
                <param name="filterHomHet" value="no_filter"/>
                <param name="filterIn" value="no_filter"/>
                <param name="filterOut" value="-no-upstream"/>
                <param name="generate_stats" value="False"/>
                <output name="snpeff_output">
                    <assert_contents>
                        <!-- Check that NO UPSTREAM  effect was added -->
                        <not_has_text text="UPSTREAM" />
                    </assert_contents>
                </output>
            </test>

        </tests>
	<help>

This tool calculate the effect of variants (SNPs/MNPs/Insertions) and deletions.

For details about this tool, please go to http://snpEff.sourceforge.net

	</help>
</tool>