Mercurial > repos > dereeper > sniplay3
diff vcfToolsFilter.xml @ 0:9dec9f724a50 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 12 Feb 2015 15:37:31 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcfToolsFilter.xml Thu Feb 12 15:37:31 2015 -0500 @@ -0,0 +1,268 @@ +<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package" version="v1.07">VCFtools</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end + #if str( $samples ) == "": + 'None' + #else + $samples + #end if + #if str( $chromosomes ) == "": + 'None' + #else + $chromosomes + #end if + #if str( $export ) == "plink": + $fileout_map + #else + '' + #end if + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> + <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/> + <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list"> + <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator> + </param> + <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list"> + <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator> + </param> + <param name="export" type="select" label="Output format" > + <option value="VCF" selected="true">VCF</option> + <option value="freq">freq</option> + <option value="plink">plink</option> + </param> + <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." /> + <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." /> + <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." /> + <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" /> + <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" /> + <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." > + <option value="ALL" selected="true">All</option> + <option value="SNP">SNP</option> + <option value="INDEL">Indel</option> + </param> + <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." /> + <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." /> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" > + <change_format> + <when input="export" value="freq" format="tabular" /> + <when input="export" value="plink" format="txt" /> + </change_format> + </data> + <data name="fileout_map" format="txt" label="${fileout_label}.map"> + <filter>(export == 'plink')</filter> + </data> + <data name="filelog" format="txt" label="${fileout_label}.log" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <test> + <param name="filein" value="sample.vcf" /> + <param name="chromosomes" value="chr1" /> + <param name="export" value="VCF" /> + <param name="frequency" value="0.001" /> + <param name="max_freq" value="0.5" /> + <param name="allow_missing" value="1" /> + <param name="nb_alleles_min" value="2" /> + <param name="nb_alleles_max" value="4" /> + <param name="type_p" value="ALL" /> + <param name="bound_start" value="1" /> + <param name="bound_end" value="100000000" /> + <output name="fileout" file="result.vcf" /> + <output name="filelog" file="result.log" /> + </test> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** + +--------------------------------------------------- + +.. class:: infomark + +**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep. + +--------------------------------------------------- + +================ +VCF tools filter +================ + +----------- +Description +----------- + + Filter VCF file + +----------------- +Workflow position +----------------- + +**Upstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +**Downstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +---------- +Input file +---------- + +VCF file + VCF file with all SNPs + +---------- +Parameters +---------- + +Output file basename + Prefix for the output VCF file + +Samples + Samples to be analyzed. Comma separated list + +Chromosomes + Chromosomes to be analyzed. Comma separated list + +Output format + VCF/freq/plink + +Minimum MAF + Minimum frequency + +Maximum MAF + Maximum frequency + +Missing data proportion + Allowed missing data proportion per site. Must be comprised between 0 and 1. + +Number of alleles + Accepted number of alleles min and max. + +Polymorphisms + Type of polymorphisms to keep (ALL/SNP/INDEL). +Bounds + Lower bound and upper bound for a range of sites to be processed. + +------------ +Output files +------------ + +VCF file + VCF file filtered + +Log file + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +VCF file +--------- + +:: + + #fileformat=VCFv4.1 + #FILTER=<ID=LowQual,Description="Low quality"> + #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> + [...] + CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 + chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 + + +Parameters +========== + +Output name -> filtered_chr1 + +Chromosomes -> chr1 + +Output format -> VCF + +Minimum MAF -> 0.001 + +Maximum MAF -> 0.5 + +Missing data proportion -> 1 + +Number of alleles min -> 2 + +Number of alleles max -> 4 + +Polymorphisms -> All + +Lower bound -> 1 + +Upper bound -> 100000000 + + +Output files +============ + +filtered_genelist_intron.vcf +--------- + +:: + + #fileformat=VCFv4.1 + #FILTER=<ID=LowQual,Description="Low quality"> + #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> + [...] + CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 + chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0 + + + </help> + +</tool>