# HG changeset patch # User brad-chapman # Date 1307478456 14400 # Node ID 5a9ada9a3191cc2b85c92e5a359a665520c5d5f7 Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository diff -r 000000000000 -r 5a9ada9a3191 bam_to_fastq/bam_to_fastq-readme.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_fastq/bam_to_fastq-readme.txt Tue Jun 07 16:27:36 2011 -0400 @@ -0,0 +1,9 @@ +Use Picard's SamToFastq program to convert BAM files to fastq. This makes it +easy to store reads in Galaxy as compressed, accessible BAM files but then +allow them to be extracted to feed into programs requiring fastq. + +Requires: + Picard (http://picard.sourceforge.net/) + The SamToFastq.jar file needs to be linked from this directory or available + in a standard directory like /usr/share/java/picard. + pysam (http://code.google.com/p/pysam/) diff -r 000000000000 -r 5a9ada9a3191 bam_to_fastq/bam_to_fastq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_fastq/bam_to_fastq.xml Tue Jun 07 16:27:36 2011 -0400 @@ -0,0 +1,27 @@ + + Convert BAM file to fastq + bam_to_fastq_wrapper.py $in_bam $out $out.id $__new_file_path__ + + + + + + + + +**What it does** + +Extract sequences and quality scores from a BAM file, converting into fastq files. + +**Input** + +A BAM alignment file. + +**Output** + +Fastq files with sequence and quality data. Output qualities are in Sanger format. +For single end data, one fastq file is produced; paired end data will have separate +fastq files for the forward and reverse reads. + + + diff -r 000000000000 -r 5a9ada9a3191 bam_to_fastq/bam_to_fastq_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_fastq/bam_to_fastq_wrapper.py Tue Jun 07 16:27:36 2011 -0400 @@ -0,0 +1,48 @@ +"""Wrapper script providing conversion from BAM to fastq, handling paired ends. + +Requires: + Picard (http://picard.sourceforge.net/) + The SamToFastq.jar file needs to be linked from this directory or available + in a standard directory like /usr/share/java/picard. + pysam (http://code.google.com/p/pysam/) +""" +import os +import sys +import subprocess + +import pysam + +def main(in_bam, out_fastq, out_id, extra_file_dir): + out_fastq2 = check_for_paired(in_bam, out_id, extra_file_dir) + picard_jar = find_picard_jar("SamToFastq") + opts = [("INPUT", in_bam), ("FASTQ", out_fastq), + ("QUIET", "true"), ("VERBOSITY", "WARNING")] + if out_fastq2: + opts.append(("SECOND_END_FASTQ", out_fastq2)) + opts = ["%s=%s" % (x, y) for x, y in opts] + cl = ["java", "-jar", picard_jar] + opts + subprocess.check_call(cl) + +def find_picard_jar(name): + test_dirs = [os.path.dirname(__file__), "/usr/share/java/picard"] + for d in test_dirs: + f = os.path.join(d, "%s.jar" % name) + if os.path.exists(f): + return f + raise ValueError("Could not find %s in %s" % (name, test_dirs)) + +def check_for_paired(in_bam, out_id, extra_file_dir): + if is_paired(in_bam): + return os.path.join(extra_file_dir, "%s_%s_%s_%s_%s" % + ('primary', out_id, 'pair2', 'visible', 'fastqsanger')) + else: + return None + +def is_paired(in_bam): + samfile = pysam.Samfile(in_bam, "rb") + read = samfile.fetch(until_eof=True).next() + samfile.close() + return read.is_paired + +if __name__ == "__main__": + main(*sys.argv[1:])