Mercurial > repos > dereeper > sniplay3
view vcfToolsFilter.xml @ 1:b058193a71d0 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 12 Feb 2015 15:41:00 -0500 |
parents | 9dec9f724a50 |
children |
line wrap: on
line source
<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1"> <!-- [REQUIRED] Tool description displayed after the tool name --> <description> </description> <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> <requirements> <requirement type="binary">perl</requirement> <requirement type="package" version="v1.07">VCFtools</requirement> </requirements> <!-- [OPTIONAL] Command to be executed to get the tool's version string --> <version_command> <!-- tool_binary -v --> </version_command> <!-- [REQUIRED] The command to execute --> <command interpreter="perl"> vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end #if str( $samples ) == "": 'None' #else $samples #end if #if str( $chromosomes ) == "": 'None' #else $chromosomes #end if #if str( $export ) == "plink": $fileout_map #else '' #end if </command> <!-- [REQUIRED] Input files and tool parameters --> <inputs> <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/> <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list"> <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator> </param> <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list"> <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator> </param> <param name="export" type="select" label="Output format" > <option value="VCF" selected="true">VCF</option> <option value="freq">freq</option> <option value="plink">plink</option> </param> <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." /> <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." /> <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." /> <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" /> <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" /> <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." > <option value="ALL" selected="true">All</option> <option value="SNP">SNP</option> <option value="INDEL">Indel</option> </param> <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." /> <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." /> </inputs> <!-- [REQUIRED] Output files --> <outputs> <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" > <change_format> <when input="export" value="freq" format="tabular" /> <when input="export" value="plink" format="txt" /> </change_format> </data> <data name="fileout_map" format="txt" label="${fileout_label}.map"> <filter>(export == 'plink')</filter> </data> <data name="filelog" format="txt" label="${fileout_label}.log" /> </outputs> <!-- [STRONGLY RECOMMANDED] Exit code rules --> <stdio> <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> <exit_code range="1:" level="fatal" /> </stdio> <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> <tests> <!-- [HELP] Test files have to be in the ~/test-data directory --> <test> <param name="filein" value="sample.vcf" /> <param name="chromosomes" value="chr1" /> <param name="export" value="VCF" /> <param name="frequency" value="0.001" /> <param name="max_freq" value="0.5" /> <param name="allow_missing" value="1" /> <param name="nb_alleles_min" value="2" /> <param name="nb_alleles_max" value="4" /> <param name="type_p" value="ALL" /> <param name="bound_start" value="1" /> <param name="bound_end" value="100000000" /> <output name="fileout" file="result.vcf" /> <output name="filelog" file="result.log" /> </test> </tests> <!-- [OPTIONAL] Help displayed in Galaxy --> <help> .. class:: infomark **Authors** --------------------------------------------------- .. class:: infomark **Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep. --------------------------------------------------- ================ VCF tools filter ================ ----------- Description ----------- Filter VCF file ----------------- Workflow position ----------------- **Upstream tools** =========== ========================== ======= Name output file(s) format =========== ========================== ======= =========== ========================== ======= **Downstream tools** =========== ========================== ======= Name output file(s) format =========== ========================== ======= =========== ========================== ======= ---------- Input file ---------- VCF file VCF file with all SNPs ---------- Parameters ---------- Output file basename Prefix for the output VCF file Samples Samples to be analyzed. Comma separated list Chromosomes Chromosomes to be analyzed. Comma separated list Output format VCF/freq/plink Minimum MAF Minimum frequency Maximum MAF Maximum frequency Missing data proportion Allowed missing data proportion per site. Must be comprised between 0 and 1. Number of alleles Accepted number of alleles min and max. Polymorphisms Type of polymorphisms to keep (ALL/SNP/INDEL). Bounds Lower bound and upper bound for a range of sites to be processed. ------------ Output files ------------ VCF file VCF file filtered Log file --------------------------------------------------- --------------- Working example --------------- Input files =========== VCF file --------- :: #fileformat=VCFv4.1 #FILTER=<ID=LowQual,Description="Low quality"> #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> [...] CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 Parameters ========== Output name -> filtered_chr1 Chromosomes -> chr1 Output format -> VCF Minimum MAF -> 0.001 Maximum MAF -> 0.5 Missing data proportion -> 1 Number of alleles min -> 2 Number of alleles max -> 4 Polymorphisms -> All Lower bound -> 1 Upper bound -> 100000000 Output files ============ filtered_genelist_intron.vcf --------- :: #fileformat=VCFv4.1 #FILTER=<ID=LowQual,Description="Low quality"> #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> [...] CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0 </help> </tool>