Mercurial > repos > devteam > samtool_filter2
changeset 0:2d4ae2f8231e draft
Imported from capsule None
author | devteam |
---|---|
date | Thu, 27 Feb 2014 16:16:26 -0500 |
parents | |
children | 94d5786febc4 |
files | samtool_filter2.xml test-data/bam_to_sam_in1.sam test-data/bam_to_sam_in2.sam tool_dependencies.xml |
diffstat | 4 files changed, 277 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtool_filter2.xml Thu Feb 27 16:16:26 2014 -0500 @@ -0,0 +1,232 @@ +<tool id="samtool_filter2" name="Filter SAM or BAM, output SAM or BAM" version="1.1.1"> + <description>files on FLAG MAPQ RG LN or by region</description> + <requirements> + <requirement type="package" version="0.1.18">samtools</requirement> + </requirements> + <!-- + samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] [-F skipFlag] [-q minMapQ] [-l library] [-r readGroup] [-R rgFile] <in.bam>|<in.sam> [region1 [...]] + Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]] + + Options: -b output BAM + -h print header for the SAM output + -H print header only (no alignments) + -S input is SAM + -u uncompressed BAM output (force -b) + -1 fast compression (force -b) + -x output FLAG in HEX (samtools-C specific) + -X output FLAG in string (samtools-C specific) + -c print only the count of matching records + -L FILE output alignments overlapping the input BED FILE [null] + -t FILE list of reference names and lengths (force -S) [null] + -T FILE reference sequence file (force -S) [null] + -o FILE output file name [stdout] + -R FILE list of read groups to be outputted [null] + -f INT required flag, 0 for unset [0] + -F INT filtering flag, 0 for unset [0] + -q INT minimum mapping quality [0] + -l STR only output reads in library STR [null] + -r STR only output reads in read group STR [null] + -? longer help + --> + <command> +##set up input files, regions requires input.bam and input.bai +#if isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('bam').__class__): + #set $input = 'input.bam' + ln -s $input1 $input && + ln -s $input1.metadata.bam_index input.bai && +#elif isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('sam').__class__): + #set $input = 'input.sam' + ln -s $input1 $input && +#end if +samtools view -o "$output1" $header + + #if $input1.datatype.file_ext == 'sam': + -S + #end if + + #if $outputtype.__str__ == "bam": + -b + #end if + + + #if $mapq.__str__ != '': + -q $mapq + #end if + #if $flag.filter.__str__ == 'yes': + #if $flag.reqBits.__str__ != 'None': + #set $reqs = $flag.reqBits.__str__.split(',') + #set $reqFlag = 0 + #for $xn in $reqs: + #set $reqFlag += int(xn,16) + #end for + -f $hex($reqFlag) + #end if + #if $flag.skipBits.__str__ != 'None': + #set $skips = $flag.skipBits.__str__.split(',') + #set $skipFlag = 0 + #for $xn in $skips: + #set $skipFlag += int(xn,16) + #end for + -F $hex($skipFlag) + #end if + #end if + #if $read_group.__str__.strip() != '': + -r $read_group + #end if + #if $library.__str__.strip() != '': + -l $library + #end if + #if $bed_file.__str__ != "None" and len($bed_file.__str__) > 0: + -L $bed_file + #end if + $input + #if $regions.__str__.strip() != '' and $input1.datatype.file_ext == 'bam': + $regions.__str__.strip() + #end if + ## need to redirect stderr message so galaxy does not think this failed + 2>&1 + </command> + <inputs> + <param name="input1" type="data" format="sam,bam" label="SAM or BAM File to Filter" /> + <param name="header" type="select" label="Header in output"> + <option value="-h">Include Header</option> + <option value="">Exclude Header</option> + <option value="-H">Only the Header</option> + </param> + <param name="mapq" type="integer" value="" optional="true" label="Minimum MAPQ quality score"> + <validator type="in_range" message="The MAPQ quality score can't be negative" min="0"/> + </param> + <conditional name="flag"> + <param name="filter" type="select" label="Filter on bitwise flag"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="reqBits" type="select" multiple="true" display="checkboxes" label="Only output alignments with all of these flag bits set" > + <option value="0x0001">Read is paired</option> + <option value="0x0002">Read is mapped in a proper pair</option> + <option value="0x0004">The read is unmapped</option> + <option value="0x0008">The mate is unmapped</option> + <option value="0x0010">Read strand</option> + <option value="0x0020">Mate strand</option> + <option value="0x0040">Read is the first in a pair</option> + <option value="0x0080">Read is the second in a pair</option> + <option value="0x0100">The alignment or this read is not primary</option> + <option value="0x0200">The read fails platform/vendor quality checks</option> + <option value="0x0400">The read is a PCR or optical duplicate</option> + </param> + <param name="skipBits" type="select" multiple="true" display="checkboxes" label="Skip alignments with any of these flag bits set" > + <option value="0x0001">Read is paired</option> + <option value="0x0002">Read is mapped in a proper pair</option> + <option value="0x0004">The read is unmapped</option> + <option value="0x0008">The mate is unmapped</option> + <option value="0x0010">Read strand</option> + <option value="0x0020">Mate strand</option> + <option value="0x0040">Read is the first in a pair</option> + <option value="0x0080">Read is the second in a pair</option> + <option value="0x0100">The alignment or this read is not primary</option> + <option value="0x0200">The read fails platform/vendor quality checks</option> + <option value="0x0400">The read is a PCR or optical duplicate</option> + </param> + </when> + </conditional> + <param name="library" type="text" value="" size="20" label="Select alignments from Library" + help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/> + <param name="read_group" type="text" value="" size="20" label="Select alignments from Read Group" + help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/> + <param name="bed_file" type="data" format="bed" optional="true" label="Output alignments overlapping the regions in the BED FILE"/> + <param name="regions" type="text" value="" size="180" label="Select regions (only used when the input is in BAM format)" + help="region should be presented in one of the following formats: `chr1', `chr2:1,000' and `chr3:1000-2,000'"/> + <param name="outputtype" type="select" label="Select the output format"> + <option value="bam">bam</option> + <option value="sam">sam</option> + </param> + </inputs> + <outputs> + <data name="output1" format_source="input1" label="${tool.name} on ${on_string}: ${input1.datatype.file_ext}"> + <change_format> + <when input="outputtype" value="bam" format="bam" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" /> + <param name="header" value=""/> + <param name="filter" value="yes"/> + <param name="reqBits" value="0x0080"/> + <param name="outputtype" value="sam"/> + <output name="output1" > + <assert_contents> + <has_text text="141" /> + <not_has_text text="77" /> + </assert_contents> + </output> + </test> + <test> + <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" /> + <param name="header" value=""/> + <param name="filter" value="no"/> + <param name="read_group" value="rg1"/> + <param name="outputtype" value="sam"/> + <output name="output1" > + <assert_contents> + <has_text text="rg1" /> + <not_has_text text="rg2" /> + </assert_contents> + </output> + </test> + <test> + <param name="input1" value="bam_to_sam_in1.sam" ftype="sam" /> + <param name="header" value=""/> + <param name="filter" value="yes"/> + <param name="skipBits" value="0x0008"/> + <param name="mapq" value="250"/> + <param name="outputtype" value="sam"/> + <output name="output1" > + <assert_contents> + <has_text text="both_reads_align_clip_marked" /> + <not_has_text text="both_reads_present_only_first_aligns" /> + </assert_contents> + </output> + </test> + </tests> + <help> + + +**What it does** + +This tool uses the samtools view command in SAMTools_ toolkit to filter a SAM or BAM file on the MAPQ (mapping quality), FLAG bits, Read Group, Library, or region. + +**Input** + +Input is either a SAM or BAM file. + +**Output** + +The output file will be SAM or BAM (depending on the chosen option), filtered by the selected options. + +**Options** + +Filtering by read group or library requires headers in the input SAM or BAM file. + +If regions are specified, only alignments overlapping the specified regions will be output. An alignment may be given multiple times if it is overlapping several regions. +A region can be presented, for example, in the following format:: + + chr2 (the whole chr2) + chr2:1000000 (region starting from 1,000,000bp) + chr2:1,000,000-2,000,000 (region between 1,000,000 and 2,000,000bp including the end points). + +Note: The coordinate is 1-based. + +Multiple regions may be specified, separated by a space character:: + + chr2:1000000-2000000 chr2:1,000,000-2,000,000 chrX + + + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bam_to_sam_in1.sam Thu Feb 27 16:16:26 2014 -0500 @@ -0,0 +1,14 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bam_to_sam_in2.sam Thu Feb 27 16:16:26 2014 -0500 @@ -0,0 +1,25 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:100001 +@SQ SN:chr3 LN:10001 +@SQ SN:chr4 LN:1001 +@RG ID:rg1 SM:s1 +@RG ID:rg2 SM:s3 +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Feb 27 16:16:26 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.18"> + <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>