Mercurial > repos > devteam > samtools_slice_bam
changeset 0:68ba55e96489 draft
Uploaded tool tarball.
| author | devteam | 
|---|---|
| date | Mon, 26 Aug 2013 14:25:12 -0400 | 
| parents | |
| children | 74a8d2d60258 | 
| files | samtools_slice_bam.py samtools_slice_bam.xml test-data/gatk/fake_phiX_reads_1.bam test-data/gatk/fake_phiX_variant_locations.bed tool_dependencies.xml | 
| diffstat | 5 files changed, 123 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_slice_bam.py Mon Aug 26 14:25:12 2013 -0400 @@ -0,0 +1,75 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for slicing a BAM file by provided BED file using SAMTools. +%prog input_filename.sam output_filename.bam +""" +#TODO: Confirm that the sort is necessary e.g. if input regions are out of order + + +import sys, optparse, os, tempfile, subprocess, shutil + +CHUNK_SIZE = 2**20 #1mb + +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + (options, args) = parser.parse_args() + + assert len( args ) == 4, "Invalid command line: samtools_slice_bam.py input.bam input.bam.bai input.interval output.bam" + input_bam_filename, input_index_filename, input_interval_filename, output_bam_filename = args + + tmp_dir = tempfile.mkdtemp( prefix='tmp-samtools_slice_bam-' ) + + tmp_input_bam_filename = os.path.join( tmp_dir, 'input_bam.bam' ) + os.symlink( input_bam_filename, tmp_input_bam_filename ) + os.symlink( input_index_filename, "%s.bai" % tmp_input_bam_filename ) + + #Slice BAM + unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' ) + unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' ) + cmd = 'samtools view -b -L "%s" "%s" > "%s"' % ( input_interval_filename, tmp_input_bam_filename, unsorted_bam_filename ) + proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) + return_code = proc.wait() + if return_code: + stderr_target = sys.stderr + else: + stderr_target = sys.stdout + stderr = open( unsorted_stderr_filename ) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + + #sort sam, so indexing will not fail + #TODO: confirm if sorting is necessary (is original BAM order maintained, or does the output follow the order of input intervals?) + sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' ) + sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' ) + cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_bam_filename ) + proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) + return_code = proc.wait() + + if return_code: + stderr_target = sys.stderr + else: + stderr_target = sys.stdout + stderr = open( sorted_stderr_filename ) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + + cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_slice_bam.xml Mon Aug 26 14:25:12 2013 -0400 @@ -0,0 +1,40 @@ +<tool id="samtools_slice_bam" name="Slice BAM" version="0.0.1"> + <description>by provided regions</description> + <requirements> + <requirement type="package" version="0.1.18">samtools</requirement> + </requirements> + <command interpreter="python">samtools_slice_bam.py + "${input_bam}" + "${input_bam.metadata.bam_index}" + "${input_interval}" + "${output_bam}" + </command> + <inputs> + <param name="input_bam" type="data" format="bam" label="BAM file" /> + <param name="input_interval" type="data" format="bed" label="BED file" /> + </inputs> + <outputs> + <data format="bam" name="output_bam"/> + </outputs> + <tests> + <test> + <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" /> + <param name="input_interval" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <output name="output_bam" file="gatk/fake_phiX_reads_1.bam" ftype="bam" /> + </test> + </tests> + <help> +**What it does** + + Accepts an input BAM file and an input BED file and creates an output BAM file containing only those alignments that overlap the provided BED intervals. + +------ + +**Citation** + +For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gatk/fake_phiX_variant_locations.bed Mon Aug 26 14:25:12 2013 -0400 @@ -0,0 +1,2 @@ +phiX174 1442 1443 +phiX174 1445 1446
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Aug 26 14:25:12 2013 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.18"> + <repository changeset_revision="a7936f4ea405" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
