macs: macs_wrapper.py annotate

annotate macs_wrapper.py @ 0:ae2ec275332a draft

Imported from capsule None

author	devteam
date	Mon, 27 Jan 2014 09:28:45 -0500 (2014-01-27)
parents
children	e5f3bfba6812

rev	line source
0 ae2ec275332a Imported from capsule None devteam parents: diff changeset	1 import glob
ae2ec275332a Imported from capsule None devteam parents: diff changeset	2 import gzip
ae2ec275332a Imported from capsule None devteam parents: diff changeset	3 import json
ae2ec275332a Imported from capsule None devteam parents: diff changeset	4 import os
ae2ec275332a Imported from capsule None devteam parents: diff changeset	5 import os.path
ae2ec275332a Imported from capsule None devteam parents: diff changeset	6 import shutil
ae2ec275332a Imported from capsule None devteam parents: diff changeset	7 import subprocess
ae2ec275332a Imported from capsule None devteam parents: diff changeset	8 import sys
ae2ec275332a Imported from capsule None devteam parents: diff changeset	9 import tempfile
ae2ec275332a Imported from capsule None devteam parents: diff changeset	10
ae2ec275332a Imported from capsule None devteam parents: diff changeset	11 CHUNK_SIZE = 1024
ae2ec275332a Imported from capsule None devteam parents: diff changeset	12
ae2ec275332a Imported from capsule None devteam parents: diff changeset	13 def gunzip_cat_glob_path( glob_path, target_filename, delete = False ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	14 out = open( target_filename, 'wb' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	15 for filename in glob.glob( glob_path ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	16 fh = gzip.open( filename, 'rb' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	17 while True:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	18 data = fh.read( CHUNK_SIZE )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	19 if data:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	20 out.write( data )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	21 else:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	22 break
ae2ec275332a Imported from capsule None devteam parents: diff changeset	23 fh.close()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	24 if delete:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	25 os.unlink( filename )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	26 out.close()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	27
ae2ec275332a Imported from capsule None devteam parents: diff changeset	28 def xls_to_interval( xls_file, interval_file, header = None ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	29 out = open( interval_file, 'wb' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	30 if header:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	31 out.write( '#%s\n' % header )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	32 wrote_header = False
ae2ec275332a Imported from capsule None devteam parents: diff changeset	33 #From macs readme: Coordinates in XLS is 1-based which is different with BED format.
ae2ec275332a Imported from capsule None devteam parents: diff changeset	34 for line in open( xls_file ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	35 #keep all existing comment lines
ae2ec275332a Imported from capsule None devteam parents: diff changeset	36 if line.startswith( '#' ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	37 out.write( line )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	38 elif not wrote_header:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	39 out.write( '#%s' % line )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	40 wrote_header = True
ae2ec275332a Imported from capsule None devteam parents: diff changeset	41 else:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	42 fields = line.split( '\t' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	43 if len( fields ) > 1:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	44 fields[1] = str( int( fields[1] ) - 1 )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	45 out.write( '\t'.join( fields ) )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	46 out.close()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	47
ae2ec275332a Imported from capsule None devteam parents: diff changeset	48 def main():
ae2ec275332a Imported from capsule None devteam parents: diff changeset	49 options = json.load( open( sys.argv[1] ) )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	50 output_bed = sys.argv[2]
ae2ec275332a Imported from capsule None devteam parents: diff changeset	51 output_extra_html = sys.argv[3]
ae2ec275332a Imported from capsule None devteam parents: diff changeset	52 output_extra_path = sys.argv[4]
ae2ec275332a Imported from capsule None devteam parents: diff changeset	53
ae2ec275332a Imported from capsule None devteam parents: diff changeset	54 experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier
ae2ec275332a Imported from capsule None devteam parents: diff changeset	55 cmdline = "macs -t %s" % ",".join( options['input_chipseq'] )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	56 if options['input_control']:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	57 cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	58 cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	59 if 'wig' in options:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	60 wigextend = int( options['wig']['wigextend'] )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	61 if wigextend >= 0:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	62 wigextend = "--wigextend='%s'" % wigextend
ae2ec275332a Imported from capsule None devteam parents: diff changeset	63 else:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	64 wigextend = ''
ae2ec275332a Imported from capsule None devteam parents: diff changeset	65 cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	66 if 'nomodel' in options:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	67 cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	68 if 'diag' in options:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	69 cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	70
ae2ec275332a Imported from capsule None devteam parents: diff changeset	71 tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user
ae2ec275332a Imported from capsule None devteam parents: diff changeset	72 stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report
ae2ec275332a Imported from capsule None devteam parents: diff changeset	73 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	74 proc.wait()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	75 #We don't want to set tool run to error state if only warnings or info, e.g. mfold could be decreased to improve model, but let user view macs log
ae2ec275332a Imported from capsule None devteam parents: diff changeset	76 #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup
ae2ec275332a Imported from capsule None devteam parents: diff changeset	77 if proc.returncode:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	78 stderr_f = open( stderr_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	79 while True:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	80 chunk = stderr_f.read( CHUNK_SIZE )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	81 if not chunk:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	82 stderr_f.close()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	83 break
ae2ec275332a Imported from capsule None devteam parents: diff changeset	84 sys.stderr.write( chunk )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	85
ae2ec275332a Imported from capsule None devteam parents: diff changeset	86 #run R to create pdf from model script
ae2ec275332a Imported from capsule None devteam parents: diff changeset	87 if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	88 cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	89 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	90 proc.wait()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	91
ae2ec275332a Imported from capsule None devteam parents: diff changeset	92
ae2ec275332a Imported from capsule None devteam parents: diff changeset	93 #move bed out to proper output file
ae2ec275332a Imported from capsule None devteam parents: diff changeset	94 created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	95 if os.path.exists( created_bed_name ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	96 shutil.move( created_bed_name, output_bed )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	97
ae2ec275332a Imported from capsule None devteam parents: diff changeset	98 #parse xls files to interval files as needed
ae2ec275332a Imported from capsule None devteam parents: diff changeset	99 if options['xls_to_interval']:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	100 create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	101 if os.path.exists( create_peak_xls_file ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	102 xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	103 create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	104 if os.path.exists( create_peak_xls_file ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	105 xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	106
ae2ec275332a Imported from capsule None devteam parents: diff changeset	107 #merge and move wig files as needed, delete gz'd files and remove emptied dirs
ae2ec275332a Imported from capsule None devteam parents: diff changeset	108 if 'wig' in options:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	109 wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	110 if os.path.exists( wig_base_dir ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	111 #treatment
ae2ec275332a Imported from capsule None devteam parents: diff changeset	112 treatment_dir = os.path.join( wig_base_dir, "treat" )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	113 if os.path.exists( treatment_dir ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	114 gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	115 os.rmdir( treatment_dir )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	116 #control
ae2ec275332a Imported from capsule None devteam parents: diff changeset	117 if options['input_control']:
ae2ec275332a Imported from capsule None devteam parents: diff changeset	118 control_dir = os.path.join( wig_base_dir, "control" )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	119 if os.path.exists( control_dir ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	120 gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	121 os.rmdir( control_dir )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	122 os.rmdir( wig_base_dir )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	123
ae2ec275332a Imported from capsule None devteam parents: diff changeset	124 #move all remaining files to extra files path of html file output to allow user download
ae2ec275332a Imported from capsule None devteam parents: diff changeset	125 out_html = open( output_extra_html, 'wb' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	126 out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	127 os.mkdir( output_extra_path )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	128 for filename in sorted( os.listdir( tmp_dir ) ):
ae2ec275332a Imported from capsule None devteam parents: diff changeset	129 shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	130 out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	131 out_html.write( '</ul></p>\n' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	132 out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	133 out_html.write( '</body></html>\n' )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	134 out_html.close()
ae2ec275332a Imported from capsule None devteam parents: diff changeset	135
ae2ec275332a Imported from capsule None devteam parents: diff changeset	136 os.unlink( stderr_name )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	137 os.rmdir( tmp_dir )
ae2ec275332a Imported from capsule None devteam parents: diff changeset	138
ae2ec275332a Imported from capsule None devteam parents: diff changeset	139 if __name__ == "__main__": main()

Mercurial > repos > devteam > macs

annotate macs_wrapper.py @ 0:ae2ec275332a draft