Mercurial > repos > artbio > sambamba
diff sambamba_filter.xml @ 0:e3cbb848d8f7 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 1ff1d6786536e134d019c6d6d12ee9885f44b601"
author | artbio |
---|---|
date | Thu, 21 May 2020 09:51:19 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sambamba_filter.xml Thu May 21 09:51:19 2020 -0400 @@ -0,0 +1,234 @@ +<tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.4"> + <description> + on flags, fields, and tags using Sambamba + </description> + <requirements> + <requirement type="package" version="0.7.1">sambamba</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Error occured" /> + </stdio> + <!-- <version_command>sambamba 2>&1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> --> + <command detect_errors="exit_code"><![CDATA[ + ln -s $input input.bam && + ln -s $input.metadata.bam_index input.bai && + #if $sambamba_options.selector == 'filter' + sambamba view -h -t \${GALAXY_SLOTS:-4} + #if $sambamba_options.query != '': + --filter='$sambamba_options.query' + -f '$format' -o $outfile input.bam $sambamba_options.region + #end if + #else + sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format' + --subsampling-seed='$sambamba_options.seed' + -s '$sambamba_options.fraction' -o '$outfile' input.bam + #end if + ]]></command> + <inputs> + <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/> + <param name="format" type="select" label="format of the tool output"> + <option value="bam">BAM</option> + <option value="sam">SAM</option> + </param> + <conditional name="sambamba_options"> + <param name="selector" type="select" label="Filter or Down-sample alignments"> + <option value="sample">Down-sample bam or sam alignments</option> + <option value="filter" selected="true">Filter bam or sam alignements</option> + </param> + <when value="filter"> + <param name="query" type="text" size="80"> + <sanitizer invalid_char="X"> + <valid initial="string.ascii_letters,string.digits, string.punctuation"> + <add value=" " /> + </valid> + </sanitizer> + <label>Filter expression</label> + <help>See below for query syntax.</help> + </param> + <param name="region" type="text" size="40" label="Region in format chr:beg-end"> + <help> + Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000' + (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000' + (region between 1,000,000 and 2,000,000bp including the end points). + The coordinates are 1-based. + </help> + </param> + </when> + <when value="sample"> + <param name="seed" type="integer" value="123" size="10"> + <label>Seed value for randomisation</label> + <help> + Be careful at selecting different seed values if you + re-subsample a subsample output of this tool + </help> + </param> + <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling"> + <help> + Use a real number between 0 and 1 to indicate the relative size of + the fraction you wish to retrieve + </help> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="outfile" format="bam"> + <change_format> + <when input="format" value="sam" format="sam" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input" value="ex1_header.sam" ftype="sam" /> + <param name="selector" value="filter" /> + <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" /> + <param name="format" value="bam" /> + <param name="region" value="" /> + <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" /> + </test> + <test> + <param name="input" value="c1215_fixmate.bam" ftype="bam" /> + <param name="selector" value="filter" /> + <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" /> + <param name="format" value="sam" /> + <param name="region" value="AL096846:1000-5000" /> + <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/> + </test> + <test> + <param name="input" value="ex1_header.sam" ftype="sam" /> + <param name="selector" value="sample" /> + <param name="seed" value="123" /> + <param name="fraction" value="0.1" /> + <param name="format" value="bam" /> + <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" /> + </test> + <test> + <param name="input" value="c1215_fixmate.bam" ftype="bam" /> + <param name="selector" value="sample" /> + <param name="seed" value="123" /> + <param name="fraction" value="0.1" /> + <param name="format" value="sam" /> + <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/> + </test> + </tests> + <help> +Sambamba Filter Overview +======================== + +This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file. + + +Filter Syntax +============= + +A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed. + +*Basic condition* is a one for a single record field, tag, or flag. + +You can use ``==,`` ``!=,`` ``>``, ``<``, ``>=``, ``<=`` comparison operators for both integers and strings. + +Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``. + +Examples of filter expressions +------------------------------ + +:: + + mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7) + read_name == 'abc\'def' + +Basic conditions for flags +-------------------------- + +The following flag names are recognized: + * paired + * proper_pair + * unmapped + * mate_is_unmapped + * reverse_strand + * mate_is_reverse_strand + * first_of_pair + * second_of_pair + * secondary_alignment + * failed_quality_control + * duplicate + +Example +~~~~~~~ + +:: + + not (unmapped or mate_is_unmapped) and first_of_pair + +Basic conditions for fields +--------------------------- + +Conditions for integer and string fields are supported. + +List of integer fields: + * ref_id + * position + * mapping_quality + * sequence_length + * mate_ref_id + * mate_position + * template_length + + +List of string fields: + * read_name + * sequence + * cigar + + +Example +~~~~~~~ + +:: + + ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80 + +Basic conditions for tags +------------------------- + +Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type. + +In order to do filtering based on the presence of a particular tag, you can use special ``null`` value. + +Example +~~~~~~~ + +:: + + [RG] != null and [AM] == 37 + +Down-sampling +============= + +The tool is using the following sambamba command line for sampling: + +:: + + sambamba view -h -t <number of Galaxy threads configured in job_conf.xml> -f <bam or sam> + --subsampling-seed=<an integer> + -s <a real number between 0 and 1> -o <bam or sam output> input_file + +Warnings +-------- + +The tool does not down-sample at a user given **number of lines**, because sambamba does not +expose this functionality. For tool performances, we decided not to add it in this wrapper. + +If you down-sample a dataset that has been *already down-sampled* with this tool, it is +important that you choose **another seed** for randomisation. Otherwise, the new subsampling +was reported not to conform the indicated fraction. + + +.. _sambamba: http://github.com/lomereiter/sambamba + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv098</citation> + </citations> +</tool>