Mercurial > repos > pjbriggs > macs21
view macs21_wrapper.py @ 3:4124781932db draft
Updated to MACS 2.1.1 and use conda for dependency resolution.
author | pjbriggs |
---|---|
date | Tue, 20 Mar 2018 11:25:04 -0400 |
parents | 00d73c812399 |
children |
line wrap: on
line source
#!/bin/env python # # Galaxy wrapper to run MACS 2.1 # # Completely rewritten from the original macs2 wrapped by Ziru Zhou # taken from http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 import sys import os import subprocess import tempfile import shutil def move_file(working_dir,name,destination): """Move a file 'name' from 'working_dir' to 'destination' """ if destination is None: # Nothing to do return source = os.path.join(working_dir,name) if os.path.exists(source): shutil.move(source,destination) def convert_xls_to_interval(xls_file,interval_file,header=None): """Convert MACS XLS file to interval From the MACS readme: "Coordinates in XLS is 1-based which is different with BED format." However this function no longer performs any coordinate conversions, it simply ensures that any blank or non-data lines are commented out """ fp = open(interval_file,'wb') if header: fp.write('#%s\n' % header) for line in open(xls_file): # Keep all existing comment lines if line.startswith('#'): fp.write(line) else: # Split line into fields and test to see if # the 'start' field is actually an integer fields = line.split('\t') if len(fields) > 1: try: int(fields[1]) except ValueError: # Integer conversion failed so comment out # "bad" line instead fields[0] = "#%s" % fields[0] fp.write( '\t'.join( fields ) ) fp.close() def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, chrom_sizes,working_dir=None): """Make bigWig file from a bedGraph The protocol is: $ fetchChromSizes.sh mm9 > mm9.chrom.sizes $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped $ bedSort treat.clipped treat.clipped.sorted $ bedGraphToBigWig treat.clipped.sorted mm9.chrom.sizes treat.bw Get the binaries from http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ We skip the fetchChromSizes step if the 'chrom_sizes' argument supplied a valid file with the chromosome sizes for the genome build in question. """ print "Generating bigWig from bedGraph..." # Check for chromosome sizes if not os.path.exists(chrom_sizes): # Determine genome build chrom_sizes = os.path.basename(chrom_sizes) genome_build = chrom_sizes.split('.')[0] if genome_build == '?': # No genome build set sys.stderr.write("ERROR genome build not set, cannot get sizes for '?'\n") sys.stderr.write("Assign a genome build to your input dataset and rerun\n") sys.exit(1) print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build # Run fetchChromSizes chrom_sizes = os.path.join(working_dir,chrom_sizes) stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") cmd = "fetchChromSizes %s" % genome_build print "Running %s" % cmd proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, stdout=open(chrom_sizes,'wb'), stderr=open(stderr_file,'wb')) proc.wait() # Copy stderr from fetchChromSizes for information only for line in open(stderr_file,'r'): print line.strip() os.remove(stderr_file) # Check that the sizes file was downloaded if not os.path.exists(chrom_sizes): sys.stderr.write("Failed to download chrom sizes for '%s'\n" % genome_build) sys.exit(1) # Run bedClip treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) print "Running %s" % cmd proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) proc.wait() # Check that clipped file exists treat_clipped = os.path.join(working_dir,treat_clipped) if not os.path.exists(treat_clipped): sys.stderr.write("Failed to create clipped bed file\n") sys.exit(1) # Run bedSort treat_clipped_sorted = "%s.sorted" % os.path.basename(treat_clipped) cmd = "bedSort %s %s" % (treat_clipped,treat_clipped_sorted) print "Running %s" % cmd proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) proc.wait() # Check that sorted file exists treat_clipped_sorted = os.path.join(working_dir,treat_clipped_sorted) if not os.path.exists(treat_clipped_sorted): sys.stderr.write("Failed to create sorted clipped bed file\n") sys.exit(1) # Run bedGraphToBigWig cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped_sorted, chrom_sizes, bigwig_file) print "Running %s" % cmd proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) proc.wait() # Clean up temporary chrom length file if os.path.dirname(chrom_sizes) == working_dir: print "Removing temporary chrom sizes file" os.remove(chrom_sizes) if __name__ == "__main__": # Echo the command line print ' '.join(sys.argv) # Initialise output files - values are set by reading from # the command line supplied by the Galaxy wrapper output_extra_html = None output_extra_path = None output_broadpeaks = None output_gappedpeaks = None output_narrowpeaks = None output_treat_pileup = None output_lambda_bedgraph = None output_bigwig = None output_xls_to_interval_peaks_file = None output_peaks = None output_bdgcmp = None # Other initialisations chrom_sizes_file = None # Build the MACS 2.1 command line # Initial arguments are always the same: command & input ChIP-seq file name cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] # Process remaining args for arg in sys.argv[3:]: if arg.startswith('--format='): # Convert format to uppercase format_ = arg.split('=')[1].upper() cmdline.append("--format=%s" % format_) elif arg.startswith('--name='): # Replace whitespace in name with underscores experiment_name = '_'.join(arg.split('=')[1].split()) cmdline.append("--name=%s" % experiment_name) elif arg.startswith('--length='): # Extract chromosome size file chrom_sizes_file = arg.split('=')[1] elif arg.startswith('--output-'): # Handle destinations for output files arg0,filen = arg.split('=') if arg0 == '--output-summits': output_summits = filen elif arg0 == '--output-extra-files': output_extra_html = filen elif arg0 == '--output-extra-files-path': output_extra_path = filen elif arg0 == '--output-broadpeaks': output_broadpeaks = filen elif arg0 == '--output-gappedpeaks': output_gappedpeaks = filen elif arg0 == '--output-narrowpeaks': output_narrowpeaks = filen elif arg0 == '--output-pileup': output_treat_pileup = filen elif arg0 == '--output-lambda-bedgraph': output_lambda_bedgraph = filen elif arg0 == '--output-bigwig': output_bigwig = filen elif arg0 == '--output-xls-to-interval': output_xls_to_interval_peaks_file = filen elif arg0 == '--output-peaks': output_peaks = filen else: # Pass remaining args directly to MACS # command line cmdline.append(arg) cmdline = ' '.join(cmdline) print "Generated command line:\n%s" % cmdline # Execute MACS2 # # Make a working directory working_dir = tempfile.mkdtemp() # # Collect stderr in a file for reporting later stderr_filen = tempfile.NamedTemporaryFile().name # # Run MACS2 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir, stderr=open(stderr_filen,'wb')) proc.wait() exit_code = proc.returncode if exit_code != 0: sys.stderr.write(open(stderr_filen,'rb').read()) sys.exit(exit_code) # Run R script to create PDF from model script if os.path.exists(os.path.join(working_dir,"%s_model.r" % experiment_name)): cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % \ (experiment_name, experiment_name) proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir) proc.wait() # Convert XLS to interval, if requested if output_xls_to_interval_peaks_file is not None: peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) if os.path.exists(peaks_xls_file): convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, header='peaks file') # Create bigWig from bedGraph, if requested if output_bigwig is not None: treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) if os.path.exists(treat_bedgraph_file): make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, chrom_sizes_file,working_dir) # Move MACS2 output files from working dir to their final destinations move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) move_file(working_dir,"%s_peaks.broadPeak" % experiment_name,output_broadpeaks) move_file(working_dir,"%s_peaks.gappedPeak" % experiment_name,output_gappedpeaks) move_file(working_dir,"%s_treat_pileup.bdg" % experiment_name,output_treat_pileup) move_file(working_dir,"%s_control_lambda.bdg" % experiment_name,output_lambda_bedgraph) move_file(working_dir,"bdgcmp_out.bdg",output_bdgcmp) # Move remaining file to the 'extra files' path and link from the HTML # file to allow user to access them from within Galaxy html_file = open(output_extra_html,'wb') html_file.write('<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name) # Make the 'extra files' directory os.mkdir(output_extra_path) # Move the files for filen in sorted(os.listdir(working_dir)): shutil.move(os.path.join(working_dir,filen), os.path.join(output_extra_path,filen)) html_file.write( '<li><a href="%s">%s</a></li>\n' % (filen,filen)) # All files moved, close out HTML html_file.write( '</ul></p>\n' ) # Append any stderr output html_file.write('<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open(stderr_filen,'rb').read()) html_file.write('</body></html>\n') html_file.close() # Clean up the working directory and files os.unlink(stderr_filen) os.rmdir(working_dir)