| 0 | 1 import glob | 
|  | 2 import gzip | 
|  | 3 import json | 
|  | 4 import os | 
|  | 5 import os.path | 
|  | 6 import shutil | 
|  | 7 import subprocess | 
|  | 8 import sys | 
|  | 9 import tempfile | 
|  | 10 | 
|  | 11 CHUNK_SIZE = 1024 | 
|  | 12 | 
|  | 13 def gunzip_cat_glob_path( glob_path, target_filename, delete = False ): | 
|  | 14     out = open( target_filename, 'wb' ) | 
|  | 15     for filename in glob.glob( glob_path ): | 
|  | 16         fh = gzip.open( filename, 'rb' ) | 
|  | 17         while True: | 
|  | 18             data = fh.read( CHUNK_SIZE ) | 
|  | 19             if data: | 
|  | 20                 out.write( data ) | 
|  | 21             else: | 
|  | 22                 break | 
|  | 23         fh.close() | 
|  | 24         if delete: | 
|  | 25             os.unlink( filename ) | 
|  | 26     out.close() | 
|  | 27 | 
|  | 28 def xls_to_interval( xls_file, interval_file, header = None ): | 
|  | 29     out = open( interval_file, 'wb' ) | 
|  | 30     if header: | 
|  | 31         out.write( '#%s\n' % header ) | 
|  | 32     wrote_header = False | 
|  | 33     #From macs readme: Coordinates in XLS is 1-based which is different with BED format. | 
|  | 34     for line in open( xls_file ): | 
|  | 35         #keep all existing comment lines | 
|  | 36         if line.startswith( '#' ): | 
|  | 37             out.write( line ) | 
|  | 38         elif not wrote_header: | 
|  | 39             out.write( '#%s' % line ) | 
|  | 40             wrote_header = True | 
|  | 41         else: | 
|  | 42             fields = line.split( '\t' ) | 
|  | 43             if len( fields ) > 1: | 
|  | 44                 fields[1] = str( int( fields[1] ) - 1 ) | 
|  | 45             out.write( '\t'.join( fields ) ) | 
|  | 46     out.close() | 
|  | 47 | 
|  | 48 def main(): | 
|  | 49     options = json.load( open( sys.argv[1] ) ) | 
|  | 50     output_bed = sys.argv[2] | 
|  | 51     output_extra_html = sys.argv[3] | 
|  | 52     output_extra_path = sys.argv[4] | 
|  | 53 | 
|  | 54     experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier | 
|  | 55     cmdline = "macs -t %s" % ",".join( options['input_chipseq'] ) | 
|  | 56     if options['input_control']: | 
|  | 57         cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) ) | 
|  | 58     cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] ) | 
|  | 59     if 'wig' in options: | 
|  | 60         wigextend = int( options['wig']['wigextend']  ) | 
|  | 61         if wigextend >= 0: | 
|  | 62             wigextend = "--wigextend='%s'" % wigextend | 
|  | 63         else: | 
|  | 64             wigextend = '' | 
|  | 65         cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] ) | 
|  | 66     if 'nomodel' in options: | 
|  | 67         cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) | 
|  | 68     if 'diag' in options: | 
|  | 69         cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] ) | 
|  | 70 | 
|  | 71     tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user | 
|  | 72     stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report | 
|  | 73     proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) ) | 
|  | 74     proc.wait() | 
|  | 75     #We don't want to set tool run to error state if only warnings or info, e.g. mfold could be decreased to improve model, but let user view macs log | 
|  | 76     #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup | 
|  | 77     if proc.returncode: | 
|  | 78         stderr_f = open( stderr_name ) | 
|  | 79         while True: | 
|  | 80             chunk = stderr_f.read( CHUNK_SIZE ) | 
|  | 81             if not chunk: | 
|  | 82                 stderr_f.close() | 
|  | 83                 break | 
|  | 84             sys.stderr.write( chunk ) | 
|  | 85 | 
|  | 86     #run R to create pdf from model script | 
|  | 87     if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): | 
|  | 88         cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) | 
|  | 89         proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) | 
|  | 90         proc.wait() | 
|  | 91 | 
|  | 92 | 
|  | 93     #move bed out to proper output file | 
|  | 94     created_bed_name =  os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) | 
|  | 95     if os.path.exists( created_bed_name ): | 
|  | 96         shutil.move( created_bed_name, output_bed ) | 
|  | 97 | 
|  | 98     #parse xls files to interval files as needed | 
|  | 99     if options['xls_to_interval']: | 
|  | 100         create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) | 
|  | 101         if os.path.exists( create_peak_xls_file ): | 
|  | 102             xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' ) | 
|  | 103         create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) | 
|  | 104         if os.path.exists( create_peak_xls_file ): | 
|  | 105             xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' ) | 
|  | 106 | 
|  | 107     #merge and move wig files as needed, delete gz'd files and remove emptied dirs | 
|  | 108     if 'wig' in options: | 
|  | 109         wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name ) | 
|  | 110         if os.path.exists( wig_base_dir ): | 
|  | 111             #treatment | 
|  | 112             treatment_dir = os.path.join( wig_base_dir, "treat" ) | 
|  | 113             if os.path.exists( treatment_dir ): | 
|  | 114                 gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True ) | 
|  | 115                 os.rmdir( treatment_dir ) | 
|  | 116                 #control | 
|  | 117                 if options['input_control']: | 
|  | 118                     control_dir = os.path.join( wig_base_dir, "control" ) | 
|  | 119                     if os.path.exists( control_dir ): | 
|  | 120                         gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True ) | 
|  | 121                         os.rmdir( control_dir ) | 
|  | 122             os.rmdir( wig_base_dir ) | 
|  | 123 | 
|  | 124     #move all remaining files to extra files path of html file output to allow user download | 
|  | 125     out_html = open( output_extra_html, 'wb' ) | 
|  | 126     out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name ) | 
|  | 127     os.mkdir( output_extra_path ) | 
|  | 128     for filename in sorted( os.listdir( tmp_dir ) ): | 
|  | 129         shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) ) | 
|  | 130         out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) ) | 
|  | 131     out_html.write( '</ul></p>\n' ) | 
|  | 132     out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() ) | 
|  | 133     out_html.write( '</body></html>\n' ) | 
|  | 134     out_html.close() | 
|  | 135 | 
|  | 136     os.unlink( stderr_name ) | 
|  | 137     os.rmdir( tmp_dir ) | 
|  | 138 | 
|  | 139 if __name__ == "__main__": main() |