Mercurial > repos > pjbriggs > macs21
comparison macs21_wrapper.py @ 0:06cb587a5e87 draft
Uploaded initial version 2.1.0-4.
author | pjbriggs |
---|---|
date | Tue, 30 Jun 2015 08:16:18 -0400 |
parents | |
children | 00d73c812399 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:06cb587a5e87 |
---|---|
1 #!/bin/env python | |
2 # | |
3 # Galaxy wrapper to run MACS 2.1 | |
4 # | |
5 # Completely rewritten from the original macs2 wrapped by Ziru Zhou | |
6 # taken from http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 | |
7 | |
8 import sys | |
9 import os | |
10 import subprocess | |
11 import tempfile | |
12 import shutil | |
13 | |
14 def move_file(working_dir,name,destination): | |
15 """Move a file 'name' from 'working_dir' to 'destination' | |
16 | |
17 """ | |
18 if destination is None: | |
19 # Nothing to do | |
20 return | |
21 source = os.path.join(working_dir,name) | |
22 if os.path.exists(source): | |
23 shutil.move(source,destination) | |
24 | |
25 def convert_xls_to_interval(xls_file,interval_file,header=None): | |
26 """Convert MACS XLS file to interval | |
27 | |
28 From the MACS readme: "Coordinates in XLS is 1-based which is different with | |
29 BED format." | |
30 | |
31 However this function no longer performs any coordinate conversions, it | |
32 simply ensures that any blank or non-data lines are commented out | |
33 | |
34 """ | |
35 fp = open(interval_file,'wb') | |
36 if header: | |
37 fp.write('#%s\n' % header) | |
38 for line in open(xls_file): | |
39 # Keep all existing comment lines | |
40 if line.startswith('#'): | |
41 fp.write(line) | |
42 else: | |
43 # Split line into fields and test to see if | |
44 # the 'start' field is actually an integer | |
45 fields = line.split('\t') | |
46 if len(fields) > 1: | |
47 try: | |
48 int(fields[1]) | |
49 except ValueError: | |
50 # Integer conversion failed so comment out | |
51 # "bad" line instead | |
52 fields[0] = "#%s" % fields[0] | |
53 fp.write( '\t'.join( fields ) ) | |
54 fp.close() | |
55 | |
56 def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, | |
57 chrom_sizes,working_dir=None): | |
58 """Make bigWig file from a bedGraph | |
59 | |
60 The protocol is: | |
61 | |
62 $ fetchChromSizes.sh mm9 > mm9.chrom.sizes | |
63 $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped | |
64 $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw | |
65 | |
66 Get the binaries from | |
67 http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ | |
68 | |
69 We skip the fetchChromSizes step if the 'chrom_sizes' | |
70 argument supplied a valid file with the chromosome sizes | |
71 for the genome build in question. | |
72 | |
73 """ | |
74 print "Generating bigWig from bedGraph..." | |
75 # Check for chromosome sizes | |
76 if not os.path.exists(chrom_sizes): | |
77 # Determine genome build | |
78 chrom_sizes = os.path.basename(chrom_sizes) | |
79 genome_build = chrom_sizes.split('.')[0] | |
80 if genome_build == '?': | |
81 # No genome build set | |
82 sys.stderr.write("ERROR genome build not set, cannot get sizes for '?'\n") | |
83 sys.stderr.write("Assign a genome build to your input dataset and rerun\n") | |
84 sys.exit(1) | |
85 print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build | |
86 # Run fetchChromSizes | |
87 chrom_sizes = os.path.join(working_dir,chrom_sizes) | |
88 stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") | |
89 cmd = "fetchChromSizes %s" % genome_build | |
90 print "Running %s" % cmd | |
91 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, | |
92 stdout=open(chrom_sizes,'wb'), | |
93 stderr=open(stderr_file,'wb')) | |
94 proc.wait() | |
95 # Copy stderr from fetchChromSizes for information only | |
96 for line in open(stderr_file,'r'): | |
97 print line.strip() | |
98 os.remove(stderr_file) | |
99 # Check that the sizes file was downloaded | |
100 if not os.path.exists(chrom_sizes): | |
101 sys.stderr.write("Failed to download chrom sizes for '%s'\n" % genome_build) | |
102 sys.exit(1) | |
103 # Run bedClip | |
104 treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) | |
105 cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) | |
106 print "Running %s" % cmd | |
107 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) | |
108 proc.wait() | |
109 # Check that clipped file exists | |
110 treat_clipped = os.path.join(working_dir,treat_clipped) | |
111 if not os.path.exists(treat_clipped): | |
112 sys.stderr.write("Failed to create clipped bed file\n") | |
113 sys.exit(1) | |
114 # Run bedGraphToBigWig | |
115 cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes, | |
116 bigwig_file) | |
117 print "Running %s" % cmd | |
118 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) | |
119 proc.wait() | |
120 # Clean up temporary chrom length file | |
121 if os.path.dirname(chrom_sizes) == working_dir: | |
122 print "Removing temporary chrom sizes file" | |
123 os.remove(chrom_sizes) | |
124 | |
125 if __name__ == "__main__": | |
126 | |
127 # Echo the command line | |
128 print ' '.join(sys.argv) | |
129 | |
130 # Initialise output files - values are set by reading from | |
131 # the command line supplied by the Galaxy wrapper | |
132 output_extra_html = None | |
133 output_extra_path = None | |
134 output_broadpeaks = None | |
135 output_gappedpeaks = None | |
136 output_narrowpeaks = None | |
137 output_treat_pileup = None | |
138 output_lambda_bedgraph = None | |
139 output_bigwig = None | |
140 output_xls_to_interval_peaks_file = None | |
141 output_peaks = None | |
142 output_bdgcmp = None | |
143 | |
144 # Other initialisations | |
145 chrom_sizes_file = None | |
146 | |
147 # Build the MACS 2.1 command line | |
148 # Initial arguments are always the same: command & input ChIP-seq file name | |
149 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] | |
150 | |
151 # Process remaining args | |
152 for arg in sys.argv[3:]: | |
153 if arg.startswith('--format='): | |
154 # Convert format to uppercase | |
155 format_ = arg.split('=')[1].upper() | |
156 cmdline.append("--format=%s" % format_) | |
157 elif arg.startswith('--name='): | |
158 # Replace whitespace in name with underscores | |
159 experiment_name = '_'.join(arg.split('=')[1].split()) | |
160 cmdline.append("--name=%s" % experiment_name) | |
161 elif arg.startswith('--length='): | |
162 # Extract chromosome size file | |
163 chrom_sizes_file = arg.split('=')[1] | |
164 elif arg.startswith('--output-'): | |
165 # Handle destinations for output files | |
166 arg0,filen = arg.split('=') | |
167 if arg0 == '--output-summits': | |
168 output_summits = filen | |
169 elif arg0 == '--output-extra-files': | |
170 output_extra_html = filen | |
171 elif arg0 == '--output-extra-files-path': | |
172 output_extra_path = filen | |
173 elif arg0 == '--output-broadpeaks': | |
174 output_broadpeaks = filen | |
175 elif arg0 == '--output-gappedpeaks': | |
176 output_gappedpeaks = filen | |
177 elif arg0 == '--output-narrowpeaks': | |
178 output_narrowpeaks = filen | |
179 elif arg0 == '--output-pileup': | |
180 output_treat_pileup = filen | |
181 elif arg0 == '--output-lambda-bedgraph': | |
182 output_lambda_bedgraph = filen | |
183 elif arg0 == '--output-bigwig': | |
184 output_bigwig = filen | |
185 elif arg0 == '--output-xls-to-interval': | |
186 output_xls_to_interval_peaks_file = filen | |
187 elif arg0 == '--output-peaks': | |
188 output_peaks = filen | |
189 else: | |
190 # Pass remaining args directly to MACS | |
191 # command line | |
192 cmdline.append(arg) | |
193 | |
194 cmdline = ' '.join(cmdline) | |
195 print "Generated command line:\n%s" % cmdline | |
196 | |
197 # Execute MACS2 | |
198 # | |
199 # Make a working directory | |
200 working_dir = tempfile.mkdtemp() | |
201 # | |
202 # Collect stderr in a file for reporting later | |
203 stderr_filen = tempfile.NamedTemporaryFile().name | |
204 # | |
205 # Run MACS2 | |
206 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir, | |
207 stderr=open(stderr_filen,'wb')) | |
208 proc.wait() | |
209 | |
210 # Run R script to create PDF from model script | |
211 if os.path.exists(os.path.join(working_dir,"%s_model.r" % experiment_name)): | |
212 cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % \ | |
213 (experiment_name, experiment_name) | |
214 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir) | |
215 proc.wait() | |
216 | |
217 # Convert XLS to interval, if requested | |
218 if output_xls_to_interval_peaks_file is not None: | |
219 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) | |
220 if os.path.exists(peaks_xls_file): | |
221 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, | |
222 header='peaks file') | |
223 | |
224 # Create bigWig from bedGraph, if requested | |
225 if output_bigwig is not None: | |
226 treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) | |
227 if os.path.exists(treat_bedgraph_file): | |
228 make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, | |
229 chrom_sizes_file,working_dir) | |
230 | |
231 # Move MACS2 output files from working dir to their final destinations | |
232 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) | |
233 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) | |
234 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) | |
235 move_file(working_dir,"%s_peaks.broadPeak" % experiment_name,output_broadpeaks) | |
236 move_file(working_dir,"%s_peaks.gappedPeak" % experiment_name,output_gappedpeaks) | |
237 move_file(working_dir,"%s_treat_pileup.bdg" % experiment_name,output_treat_pileup) | |
238 move_file(working_dir,"%s_control_lambda.bdg" % experiment_name,output_lambda_bedgraph) | |
239 move_file(working_dir,"bdgcmp_out.bdg",output_bdgcmp) | |
240 | |
241 # Move remaining file to the 'extra files' path and link from the HTML | |
242 # file to allow user to access them from within Galaxy | |
243 html_file = open(output_extra_html,'wb') | |
244 html_file.write('<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name) | |
245 # Make the 'extra files' directory | |
246 os.mkdir(output_extra_path) | |
247 # Move the files | |
248 for filen in sorted(os.listdir(working_dir)): | |
249 shutil.move(os.path.join(working_dir,filen), | |
250 os.path.join(output_extra_path,filen)) | |
251 html_file.write( '<li><a href="%s">%s</a></li>\n' % (filen,filen)) | |
252 # All files moved, close out HTML | |
253 html_file.write( '</ul></p>\n' ) | |
254 # Append any stderr output | |
255 html_file.write('<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % | |
256 open(stderr_filen,'rb').read()) | |
257 html_file.write('</body></html>\n') | |
258 html_file.close() | |
259 | |
260 # Clean up the working directory and files | |
261 os.unlink(stderr_filen) | |
262 os.rmdir(working_dir) |