# HG changeset patch
# User devteam
# Date 1377541512 14400
# Node ID 68ba55e9648985f2c84cc54f4748d8337cb55189
Uploaded tool tarball.
diff -r 000000000000 -r 68ba55e96489 samtools_slice_bam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_slice_bam.py Mon Aug 26 14:25:12 2013 -0400
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+#Dan Blankenberg
+
+"""
+A wrapper script for slicing a BAM file by provided BED file using SAMTools.
+%prog input_filename.sam output_filename.bam
+"""
+#TODO: Confirm that the sort is necessary e.g. if input regions are out of order
+
+
+import sys, optparse, os, tempfile, subprocess, shutil
+
+CHUNK_SIZE = 2**20 #1mb
+
+def cleanup_before_exit( tmp_dir ):
+ if tmp_dir and os.path.exists( tmp_dir ):
+ shutil.rmtree( tmp_dir )
+
+def __main__():
+ #Parse Command Line
+ parser = optparse.OptionParser()
+ (options, args) = parser.parse_args()
+
+ assert len( args ) == 4, "Invalid command line: samtools_slice_bam.py input.bam input.bam.bai input.interval output.bam"
+ input_bam_filename, input_index_filename, input_interval_filename, output_bam_filename = args
+
+ tmp_dir = tempfile.mkdtemp( prefix='tmp-samtools_slice_bam-' )
+
+ tmp_input_bam_filename = os.path.join( tmp_dir, 'input_bam.bam' )
+ os.symlink( input_bam_filename, tmp_input_bam_filename )
+ os.symlink( input_index_filename, "%s.bai" % tmp_input_bam_filename )
+
+ #Slice BAM
+ unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' )
+ unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' )
+ cmd = 'samtools view -b -L "%s" "%s" > "%s"' % ( input_interval_filename, tmp_input_bam_filename, unsorted_bam_filename )
+ proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir )
+ return_code = proc.wait()
+ if return_code:
+ stderr_target = sys.stderr
+ else:
+ stderr_target = sys.stdout
+ stderr = open( unsorted_stderr_filename )
+ while True:
+ chunk = stderr.read( CHUNK_SIZE )
+ if chunk:
+ stderr_target.write( chunk )
+ else:
+ break
+ stderr.close()
+
+ #sort sam, so indexing will not fail
+ #TODO: confirm if sorting is necessary (is original BAM order maintained, or does the output follow the order of input intervals?)
+ sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' )
+ sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' )
+ cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_bam_filename )
+ proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir )
+ return_code = proc.wait()
+
+ if return_code:
+ stderr_target = sys.stderr
+ else:
+ stderr_target = sys.stdout
+ stderr = open( sorted_stderr_filename )
+ while True:
+ chunk = stderr.read( CHUNK_SIZE )
+ if chunk:
+ stderr_target.write( chunk )
+ else:
+ break
+ stderr.close()
+
+ cleanup_before_exit( tmp_dir )
+
+if __name__=="__main__": __main__()
diff -r 000000000000 -r 68ba55e96489 samtools_slice_bam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_slice_bam.xml Mon Aug 26 14:25:12 2013 -0400
@@ -0,0 +1,40 @@
+
+ by provided regions
+
+ samtools
+
+ samtools_slice_bam.py
+ "${input_bam}"
+ "${input_bam.metadata.bam_index}"
+ "${input_interval}"
+ "${output_bam}"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+ Accepts an input BAM file and an input BED file and creates an output BAM file containing only those alignments that overlap the provided BED intervals.
+
+------
+
+**Citation**
+
+For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_
+
+If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
+
+
+
diff -r 000000000000 -r 68ba55e96489 test-data/gatk/fake_phiX_reads_1.bam
Binary file test-data/gatk/fake_phiX_reads_1.bam has changed
diff -r 000000000000 -r 68ba55e96489 test-data/gatk/fake_phiX_variant_locations.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gatk/fake_phiX_variant_locations.bed Mon Aug 26 14:25:12 2013 -0400
@@ -0,0 +1,2 @@
+phiX174 1442 1443
+phiX174 1445 1446
diff -r 000000000000 -r 68ba55e96489 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Aug 26 14:25:12 2013 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+