Mercurial > repos > pjbriggs > macs21
diff macs21_wrapper.py @ 0:06cb587a5e87 draft
Uploaded initial version 2.1.0-4.
author | pjbriggs |
---|---|
date | Tue, 30 Jun 2015 08:16:18 -0400 |
parents | |
children | 00d73c812399 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macs21_wrapper.py Tue Jun 30 08:16:18 2015 -0400 @@ -0,0 +1,262 @@ +#!/bin/env python +# +# Galaxy wrapper to run MACS 2.1 +# +# Completely rewritten from the original macs2 wrapped by Ziru Zhou +# taken from http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 + +import sys +import os +import subprocess +import tempfile +import shutil + +def move_file(working_dir,name,destination): + """Move a file 'name' from 'working_dir' to 'destination' + + """ + if destination is None: + # Nothing to do + return + source = os.path.join(working_dir,name) + if os.path.exists(source): + shutil.move(source,destination) + +def convert_xls_to_interval(xls_file,interval_file,header=None): + """Convert MACS XLS file to interval + + From the MACS readme: "Coordinates in XLS is 1-based which is different with + BED format." + + However this function no longer performs any coordinate conversions, it + simply ensures that any blank or non-data lines are commented out + + """ + fp = open(interval_file,'wb') + if header: + fp.write('#%s\n' % header) + for line in open(xls_file): + # Keep all existing comment lines + if line.startswith('#'): + fp.write(line) + else: + # Split line into fields and test to see if + # the 'start' field is actually an integer + fields = line.split('\t') + if len(fields) > 1: + try: + int(fields[1]) + except ValueError: + # Integer conversion failed so comment out + # "bad" line instead + fields[0] = "#%s" % fields[0] + fp.write( '\t'.join( fields ) ) + fp.close() + +def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, + chrom_sizes,working_dir=None): + """Make bigWig file from a bedGraph + + The protocol is: + + $ fetchChromSizes.sh mm9 > mm9.chrom.sizes + $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped + $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw + + Get the binaries from + http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ + + We skip the fetchChromSizes step if the 'chrom_sizes' + argument supplied a valid file with the chromosome sizes + for the genome build in question. + + """ + print "Generating bigWig from bedGraph..." + # Check for chromosome sizes + if not os.path.exists(chrom_sizes): + # Determine genome build + chrom_sizes = os.path.basename(chrom_sizes) + genome_build = chrom_sizes.split('.')[0] + if genome_build == '?': + # No genome build set + sys.stderr.write("ERROR genome build not set, cannot get sizes for '?'\n") + sys.stderr.write("Assign a genome build to your input dataset and rerun\n") + sys.exit(1) + print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build + # Run fetchChromSizes + chrom_sizes = os.path.join(working_dir,chrom_sizes) + stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") + cmd = "fetchChromSizes %s" % genome_build + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, + stdout=open(chrom_sizes,'wb'), + stderr=open(stderr_file,'wb')) + proc.wait() + # Copy stderr from fetchChromSizes for information only + for line in open(stderr_file,'r'): + print line.strip() + os.remove(stderr_file) + # Check that the sizes file was downloaded + if not os.path.exists(chrom_sizes): + sys.stderr.write("Failed to download chrom sizes for '%s'\n" % genome_build) + sys.exit(1) + # Run bedClip + treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) + cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) + proc.wait() + # Check that clipped file exists + treat_clipped = os.path.join(working_dir,treat_clipped) + if not os.path.exists(treat_clipped): + sys.stderr.write("Failed to create clipped bed file\n") + sys.exit(1) + # Run bedGraphToBigWig + cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes, + bigwig_file) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) + proc.wait() + # Clean up temporary chrom length file + if os.path.dirname(chrom_sizes) == working_dir: + print "Removing temporary chrom sizes file" + os.remove(chrom_sizes) + +if __name__ == "__main__": + + # Echo the command line + print ' '.join(sys.argv) + + # Initialise output files - values are set by reading from + # the command line supplied by the Galaxy wrapper + output_extra_html = None + output_extra_path = None + output_broadpeaks = None + output_gappedpeaks = None + output_narrowpeaks = None + output_treat_pileup = None + output_lambda_bedgraph = None + output_bigwig = None + output_xls_to_interval_peaks_file = None + output_peaks = None + output_bdgcmp = None + + # Other initialisations + chrom_sizes_file = None + + # Build the MACS 2.1 command line + # Initial arguments are always the same: command & input ChIP-seq file name + cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] + + # Process remaining args + for arg in sys.argv[3:]: + if arg.startswith('--format='): + # Convert format to uppercase + format_ = arg.split('=')[1].upper() + cmdline.append("--format=%s" % format_) + elif arg.startswith('--name='): + # Replace whitespace in name with underscores + experiment_name = '_'.join(arg.split('=')[1].split()) + cmdline.append("--name=%s" % experiment_name) + elif arg.startswith('--length='): + # Extract chromosome size file + chrom_sizes_file = arg.split('=')[1] + elif arg.startswith('--output-'): + # Handle destinations for output files + arg0,filen = arg.split('=') + if arg0 == '--output-summits': + output_summits = filen + elif arg0 == '--output-extra-files': + output_extra_html = filen + elif arg0 == '--output-extra-files-path': + output_extra_path = filen + elif arg0 == '--output-broadpeaks': + output_broadpeaks = filen + elif arg0 == '--output-gappedpeaks': + output_gappedpeaks = filen + elif arg0 == '--output-narrowpeaks': + output_narrowpeaks = filen + elif arg0 == '--output-pileup': + output_treat_pileup = filen + elif arg0 == '--output-lambda-bedgraph': + output_lambda_bedgraph = filen + elif arg0 == '--output-bigwig': + output_bigwig = filen + elif arg0 == '--output-xls-to-interval': + output_xls_to_interval_peaks_file = filen + elif arg0 == '--output-peaks': + output_peaks = filen + else: + # Pass remaining args directly to MACS + # command line + cmdline.append(arg) + + cmdline = ' '.join(cmdline) + print "Generated command line:\n%s" % cmdline + + # Execute MACS2 + # + # Make a working directory + working_dir = tempfile.mkdtemp() + # + # Collect stderr in a file for reporting later + stderr_filen = tempfile.NamedTemporaryFile().name + # + # Run MACS2 + proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir, + stderr=open(stderr_filen,'wb')) + proc.wait() + + # Run R script to create PDF from model script + if os.path.exists(os.path.join(working_dir,"%s_model.r" % experiment_name)): + cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % \ + (experiment_name, experiment_name) + proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir) + proc.wait() + + # Convert XLS to interval, if requested + if output_xls_to_interval_peaks_file is not None: + peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) + if os.path.exists(peaks_xls_file): + convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, + header='peaks file') + + # Create bigWig from bedGraph, if requested + if output_bigwig is not None: + treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) + if os.path.exists(treat_bedgraph_file): + make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, + chrom_sizes_file,working_dir) + + # Move MACS2 output files from working dir to their final destinations + move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) + move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) + move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) + move_file(working_dir,"%s_peaks.broadPeak" % experiment_name,output_broadpeaks) + move_file(working_dir,"%s_peaks.gappedPeak" % experiment_name,output_gappedpeaks) + move_file(working_dir,"%s_treat_pileup.bdg" % experiment_name,output_treat_pileup) + move_file(working_dir,"%s_control_lambda.bdg" % experiment_name,output_lambda_bedgraph) + move_file(working_dir,"bdgcmp_out.bdg",output_bdgcmp) + + # Move remaining file to the 'extra files' path and link from the HTML + # file to allow user to access them from within Galaxy + html_file = open(output_extra_html,'wb') + html_file.write('<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name) + # Make the 'extra files' directory + os.mkdir(output_extra_path) + # Move the files + for filen in sorted(os.listdir(working_dir)): + shutil.move(os.path.join(working_dir,filen), + os.path.join(output_extra_path,filen)) + html_file.write( '<li><a href="%s">%s</a></li>\n' % (filen,filen)) + # All files moved, close out HTML + html_file.write( '</ul></p>\n' ) + # Append any stderr output + html_file.write('<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % + open(stderr_filen,'rb').read()) + html_file.write('</body></html>\n') + html_file.close() + + # Clean up the working directory and files + os.unlink(stderr_filen) + os.rmdir(working_dir)