annotate tools/peak_calling/macs_wrapper.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 import pkg_resources
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 pkg_resources.require( "simplejson" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 import simplejson
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 CHUNK_SIZE = 1024
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 def gunzip_cat_glob_path( glob_path, target_filename, delete = False ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 out = open( target_filename, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 for filename in glob.glob( glob_path ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 fh = gzip.open( filename, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 data = fh.read( CHUNK_SIZE )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 if data:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 out.write( data )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 fh.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 if delete:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 os.unlink( filename )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 def xls_to_interval( xls_file, interval_file, header = None ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 out = open( interval_file, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 if header:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 out.write( '#%s\n' % header )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 wrote_header = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 #From macs readme: Coordinates in XLS is 1-based which is different with BED format.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 for line in open( xls_file ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 #keep all existing comment lines
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if line.startswith( '#' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 out.write( line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 elif not wrote_header:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 out.write( '#%s' % line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 wrote_header = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 fields = line.split( '\t' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 if len( fields ) > 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 fields[1] = str( int( fields[1] ) - 1 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 out.write( '\t'.join( fields ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 options = simplejson.load( open( sys.argv[1] ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 output_bed = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 output_extra_html = sys.argv[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 output_extra_path = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 cmdline = "macs -t %s" % ",".join( options['input_chipseq'] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if options['input_control']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 if 'wig' in options:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 wigextend = int( options['wig']['wigextend'] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 if wigextend >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 wigextend = "--wigextend='%s'" % wigextend
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 wigextend = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 if 'nomodel' in options:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 if 'diag' in options:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 #We don't want to set tool run to error state if only warnings or info, e.g. mfold could be decreased to improve model, but let user view macs log
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 if proc.returncode:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 stderr_f = open( stderr_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 chunk = stderr_f.read( CHUNK_SIZE )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 if not chunk:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 stderr_f.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 sys.stderr.write( chunk )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 #run R to create pdf from model script
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 #move bed out to proper output file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 if os.path.exists( created_bed_name ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 shutil.move( created_bed_name, output_bed )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 #parse xls files to interval files as needed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 if options['xls_to_interval']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 if os.path.exists( create_peak_xls_file ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 if os.path.exists( create_peak_xls_file ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 #merge and move wig files as needed, delete gz'd files and remove emptied dirs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 if 'wig' in options:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 if os.path.exists( wig_base_dir ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 #treatment
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 treatment_dir = os.path.join( wig_base_dir, "treat" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 if os.path.exists( treatment_dir ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 os.rmdir( treatment_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 #control
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 if options['input_control']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 control_dir = os.path.join( wig_base_dir, "control" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 if os.path.exists( control_dir ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 os.rmdir( control_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 os.rmdir( wig_base_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 #move all remaining files to extra files path of html file output to allow user download
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 out_html = open( output_extra_html, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 os.mkdir( output_extra_path )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 for filename in sorted( os.listdir( tmp_dir ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 out_html.write( '</ul></p>\n' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 out_html.write( '</body></html>\n' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 out_html.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 os.unlink( stderr_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 os.rmdir( tmp_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 if __name__ == "__main__": main()