Mercurial > repos > bgruening > bismark
comparison bismark_methylation_extractor.py @ 0:62c6da72dd4a draft
Uploaded
author | bgruening |
---|---|
date | Sat, 06 Jul 2013 09:57:36 -0400 |
parents | |
children | 91f07ff056ca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:62c6da72dd4a |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse, os, shutil, subprocess, sys, tempfile, fileinput | |
4 import zipfile | |
5 from glob import glob | |
6 | |
7 def stop_err( msg ): | |
8 sys.stderr.write( "%s\n" % msg ) | |
9 sys.exit() | |
10 | |
11 def zipper(dir, zip_file): | |
12 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) | |
13 root_len = len(os.path.abspath(dir)) | |
14 for root, dirs, files in os.walk(dir): | |
15 archive_root = os.path.abspath(root)[root_len:] | |
16 for f in files: | |
17 fullpath = os.path.join(root, f) | |
18 archive_name = os.path.join(archive_root, f) | |
19 zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) | |
20 zip.close() | |
21 return zip_file | |
22 | |
23 def __main__(): | |
24 #Parse Command Line | |
25 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') | |
26 | |
27 # input options | |
28 parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) | |
29 | |
30 parser.add_argument( '--infile', help='Input file in SAM format.' ) | |
31 parser.add_argument( '--single-end', dest='single_end', action="store_true" ) | |
32 parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) | |
33 | |
34 parser.add_argument( '--report-file', dest='report_file' ) | |
35 parser.add_argument( '--comprehensive', action="store_true" ) | |
36 parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) | |
37 parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) | |
38 parser.add_argument( '--compress' ) | |
39 parser.add_argument( '--ignore-bps', dest='ignore_bps', type=int ) | |
40 | |
41 # OT - original top strand | |
42 parser.add_argument( '--cpg_ot' ) | |
43 parser.add_argument( '--chg_ot' ) | |
44 parser.add_argument( '--chh_ot' ) | |
45 # CTOT - complementary to original top strand | |
46 parser.add_argument( '--cpg_ctot' ) | |
47 parser.add_argument( '--chg_ctot' ) | |
48 parser.add_argument( '--chh_ctot' ) | |
49 # OB - original bottom strand | |
50 parser.add_argument( '--cpg_ob' ) | |
51 parser.add_argument( '--chg_ob' ) | |
52 parser.add_argument( '--chh_ob' ) | |
53 # CTOT - complementary to original bottom strand | |
54 parser.add_argument( '--cpg_ctob' ) | |
55 parser.add_argument( '--chg_ctob' ) | |
56 parser.add_argument( '--chh_ctob' ) | |
57 | |
58 parser.add_argument( '--cpg_context' ) | |
59 parser.add_argument( '--chg_context' ) | |
60 parser.add_argument( '--chh_context' ) | |
61 | |
62 parser.add_argument( '--non_cpg_context' ) | |
63 | |
64 parser.add_argument( '--non_cpg_context_ot' ) | |
65 parser.add_argument( '--non_cpg_context_ctot' ) | |
66 parser.add_argument( '--non_cpg_context_ob' ) | |
67 parser.add_argument( '--non_cpg_context_ctob' ) | |
68 | |
69 args = parser.parse_args() | |
70 | |
71 | |
72 # Build methylation extractor command | |
73 output_dir = tempfile.mkdtemp() | |
74 cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' | |
75 if args.bismark_path: | |
76 # add the path to the bismark perl scripts, that is needed for galaxy | |
77 cmd = os.path.join(args.bismark_path, cmd) | |
78 | |
79 additional_opts = '' | |
80 # Set up all options | |
81 if args.single_end: | |
82 additional_opts += ' --single-end ' | |
83 else: | |
84 additional_opts += ' --paired-end ' | |
85 if args.no_overlap: | |
86 additional_opts += ' --no_overlap ' | |
87 if args.ignore_bps: | |
88 additional_opts += ' --ignore %s ' % args.ignore_bps | |
89 if args.comprehensive: | |
90 additional_opts += ' --comprehensive ' | |
91 if args.merge_non_cpg: | |
92 additional_opts += ' --merge_non_CpG ' | |
93 if args.report_file: | |
94 additional_opts += ' --report ' | |
95 | |
96 | |
97 # Final command: | |
98 cmd = cmd % (output_dir, additional_opts, args.infile) | |
99 | |
100 # Run | |
101 try: | |
102 tmp_out = tempfile.NamedTemporaryFile().name | |
103 tmp_stdout = open( tmp_out, 'wb' ) | |
104 tmp_err = tempfile.NamedTemporaryFile().name | |
105 tmp_stderr = open( tmp_err, 'wb' ) | |
106 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) | |
107 returncode = proc.wait() | |
108 tmp_stderr.close() | |
109 # get stderr, allowing for case where it's very large | |
110 tmp_stderr = open( tmp_err, 'rb' ) | |
111 stderr = '' | |
112 buffsize = 1048576 | |
113 try: | |
114 while True: | |
115 stderr += tmp_stderr.read( buffsize ) | |
116 if not stderr or len( stderr ) % buffsize != 0: | |
117 break | |
118 except OverflowError: | |
119 pass | |
120 tmp_stdout.close() | |
121 tmp_stderr.close() | |
122 if returncode != 0: | |
123 raise Exception, stderr | |
124 | |
125 # TODO: look for errors in program output. | |
126 except Exception, e: | |
127 stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) | |
128 | |
129 | |
130 # collect and copy output files | |
131 | |
132 if args.compress: | |
133 zipper(output_dir, args.compress) | |
134 | |
135 | |
136 if args.cpg_ot: | |
137 shutil.move( glob(os.path.join( output_dir, '*CpG_OT_*'))[0], args.cpg_ot ) | |
138 if args.chg_ot: | |
139 shutil.move( glob(os.path.join( output_dir, '*CHG_OT_*'))[0], args.chg_ot ) | |
140 if args.chh_ot: | |
141 shutil.move( glob(os.path.join( output_dir, '*CHH_OT_*'))[0], args.chh_ot ) | |
142 if args.cpg_ctot: | |
143 shutil.move( glob(os.path.join( output_dir, '*CpG_CTOT_*'))[0], args.cpg_ctot ) | |
144 if args.chg_ctot: | |
145 shutil.move( glob(os.path.join( output_dir, '*CHG_CTOT_*'))[0], args.chg_ctot ) | |
146 if args.chh_ctot: | |
147 shutil.move( glob(os.path.join( output_dir, '*CHH_CTOT_*'))[0], args.chh_ctot ) | |
148 if args.cpg_ob: | |
149 shutil.move( glob(os.path.join( output_dir, '*CpG_OB_*'))[0], args.cpg_ob ) | |
150 if args.chg_ob: | |
151 shutil.move( glob(os.path.join( output_dir, '*CHG_OB_*'))[0], args.chg_ob ) | |
152 if args.chh_ob: | |
153 shutil.move( glob(os.path.join( output_dir, '*CHH_OB_*'))[0], args.chh_ob ) | |
154 if args.cpg_ctob: | |
155 shutil.move( glob(os.path.join( output_dir, '*CpG_CTOB_*'))[0], args.cpg_ctob ) | |
156 if args.chg_ctob: | |
157 shutil.move( glob(os.path.join( output_dir, '*CHG_CTOB_*'))[0], args.chg_ctob ) | |
158 if args.chh_ctob: | |
159 shutil.move( glob(os.path.join( output_dir, '*CHH_CTOB_*'))[0], args.chh_ctob ) | |
160 | |
161 # context-dependent methylation output files | |
162 if args.cpg_context: | |
163 shutil.move( glob(os.path.join( output_dir, '*CpG_context_*'))[0], args.cpg_context ) | |
164 if args.chg_context: | |
165 shutil.move( glob(os.path.join( output_dir, '*CHG_context_*'))[0], args.chg_context ) | |
166 if args.chh_context: | |
167 shutil.move( glob(os.path.join( output_dir, '*CHH_context_*'))[0], args.chh_context ) | |
168 | |
169 if args.non_cpg_context: | |
170 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_context_*'))[0], args.non_cpg_context ) | |
171 | |
172 if args.non_cpg_context_ot: | |
173 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OT_*'))[0], args.non_cpg_context_ot ) | |
174 if args.non_cpg_context_ctot: | |
175 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOT_*'))[0], args.non_cpg_context_ctot ) | |
176 if args.non_cpg_context_ob: | |
177 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OB_*'))[0], args.non_cpg_context_ob ) | |
178 if args.non_cpg_context_ctob: | |
179 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOB_*'))[0], args.non_cpg_context_ctob ) | |
180 | |
181 | |
182 | |
183 if args.report_file: | |
184 shutil.move( glob(os.path.join( output_dir, '*_splitting_report*'))[0], args.report_file ) | |
185 | |
186 | |
187 # Clean up temp dirs | |
188 if os.path.exists( output_dir ): | |
189 shutil.rmtree( output_dir ) | |
190 | |
191 if __name__=="__main__": __main__() |