comparison bismark_methylation_extractor.py @ 0:36d124f44c0a draft

inital commit
author bjoern-gruening
date Tue, 25 Dec 2012 05:45:46 -0500
parents
children 427fb56f2e41
comparison
equal deleted inserted replaced
-1:000000000000 0:36d124f44c0a
1 #!/usr/bin/env python
2
3 import argparse, os, shutil, subprocess, sys, tempfile, fileinput
4 import zipfile
5 from glob import glob
6
7 def stop_err( msg ):
8 sys.stderr.write( "%s\n" % msg )
9 sys.exit()
10
11 def zipper(dir, zip_file):
12 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
13 root_len = len(os.path.abspath(dir))
14 for root, dirs, files in os.walk(dir):
15 archive_root = os.path.abspath(root)[root_len:]
16 for f in files:
17 fullpath = os.path.join(root, f)
18 archive_name = os.path.join(archive_root, f)
19 zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)
20 zip.close()
21 return zip_file
22
23 def __main__():
24 #Parse Command Line
25 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.')
26
27 # input options
28 parser.add_argument( '--infile', help='Input file in SAM format.' )
29 parser.add_argument( '--single-end', dest='single_end', action="store_true" )
30 parser.add_argument( '--paired-end', dest='paired_end', action="store_true" )
31
32 parser.add_argument( '--report-file', dest='report_file' )
33 parser.add_argument( '--comprehensive', action="store_true" )
34 parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" )
35 parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" )
36 parser.add_argument( '--compress' )
37 parser.add_argument( '--ignore-bps', dest='ignore_bps', type=int )
38
39 # OT - original top strand
40 parser.add_argument( '--cpg_ot' )
41 parser.add_argument( '--chg_ot' )
42 parser.add_argument( '--chh_ot' )
43 # CTOT - complementary to original top strand
44 parser.add_argument( '--cpg_ctot' )
45 parser.add_argument( '--chg_ctot' )
46 parser.add_argument( '--chh_ctot' )
47 # OB - original bottom strand
48 parser.add_argument( '--cpg_ob' )
49 parser.add_argument( '--chg_ob' )
50 parser.add_argument( '--chh_ob' )
51 # CTOT - complementary to original bottom strand
52 parser.add_argument( '--cpg_ctob' )
53 parser.add_argument( '--chg_ctob' )
54 parser.add_argument( '--chh_ctob' )
55
56 parser.add_argument( '--cpg_context' )
57 parser.add_argument( '--chg_context' )
58 parser.add_argument( '--chh_context' )
59
60 parser.add_argument( '--non_cpg_context' )
61
62 parser.add_argument( '--non_cpg_context_ot' )
63 parser.add_argument( '--non_cpg_context_ctot' )
64 parser.add_argument( '--non_cpg_context_ob' )
65 parser.add_argument( '--non_cpg_context_ctob' )
66
67 args = parser.parse_args()
68
69
70 # Build methylation extractor command
71 output_dir = tempfile.mkdtemp()
72 cmd = 'bismark_methylation_extractor --no_header -o %s %s %s'
73
74 additional_opts = ''
75 # Set up all options
76 if args.single_end:
77 additional_opts += ' --single-end '
78 else:
79 additional_opts += ' --paired-end '
80 if args.no_overlap:
81 additional_opts += ' --no_overlap '
82 if args.ignore_bps:
83 additional_opts += ' --ignore %s ' % args.ignore_bps
84 if args.comprehensive:
85 additional_opts += ' --comprehensive '
86 if args.merge_non_cpg:
87 additional_opts += ' --merge_non_CpG '
88 if args.report_file:
89 additional_opts += ' --report '
90
91
92 # Final command:
93 cmd = cmd % (output_dir, additional_opts, args.infile)
94
95 # Run
96 try:
97 tmp_out = tempfile.NamedTemporaryFile().name
98 tmp_stdout = open( tmp_out, 'wb' )
99 tmp_err = tempfile.NamedTemporaryFile().name
100 tmp_stderr = open( tmp_err, 'wb' )
101 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
102 returncode = proc.wait()
103 tmp_stderr.close()
104 # get stderr, allowing for case where it's very large
105 tmp_stderr = open( tmp_err, 'rb' )
106 stderr = ''
107 buffsize = 1048576
108 try:
109 while True:
110 stderr += tmp_stderr.read( buffsize )
111 if not stderr or len( stderr ) % buffsize != 0:
112 break
113 except OverflowError:
114 pass
115 tmp_stdout.close()
116 tmp_stderr.close()
117 if returncode != 0:
118 raise Exception, stderr
119
120 # TODO: look for errors in program output.
121 except Exception, e:
122 stop_err( 'Error in bismark methylation extractor:\n' + str( e ) )
123
124
125 # collect and copy output files
126
127 if args.compress:
128 zipper(output_dir, args.compress)
129
130
131 if args.cpg_ot:
132 shutil.move( glob(os.path.join( output_dir, '*CpG_OT_*'))[0], args.cpg_ot )
133 if args.chg_ot:
134 shutil.move( glob(os.path.join( output_dir, '*CHG_OT_*'))[0], args.chg_ot )
135 if args.chh_ot:
136 shutil.move( glob(os.path.join( output_dir, '*CHH_OT_*'))[0], args.chh_ot )
137 if args.cpg_ctot:
138 shutil.move( glob(os.path.join( output_dir, '*CpG_CTOT_*'))[0], args.cpg_ctot )
139 if args.chg_ctot:
140 shutil.move( glob(os.path.join( output_dir, '*CHG_CTOT_*'))[0], args.chg_ctot )
141 if args.chh_ctot:
142 shutil.move( glob(os.path.join( output_dir, '*CHH_CTOT_*'))[0], args.chh_ctot )
143 if args.cpg_ob:
144 shutil.move( glob(os.path.join( output_dir, '*CpG_OB_*'))[0], args.cpg_ob )
145 if args.chg_ob:
146 shutil.move( glob(os.path.join( output_dir, '*CHG_OB_*'))[0], args.chg_ob )
147 if args.chh_ob:
148 shutil.move( glob(os.path.join( output_dir, '*CHH_OB_*'))[0], args.chh_ob )
149 if args.cpg_ctob:
150 shutil.move( glob(os.path.join( output_dir, '*CpG_CTOB_*'))[0], args.cpg_ctob )
151 if args.chg_ctob:
152 shutil.move( glob(os.path.join( output_dir, '*CHG_CTOB_*'))[0], args.chg_ctob )
153 if args.chh_ctob:
154 shutil.move( glob(os.path.join( output_dir, '*CHH_CTOB_*'))[0], args.chh_ctob )
155
156 # context-dependent methylation output files
157 if args.cpg_context:
158 shutil.move( glob(os.path.join( output_dir, '*CpG_context_*'))[0], args.cpg_context )
159 if args.chg_context:
160 shutil.move( glob(os.path.join( output_dir, '*CHG_context_*'))[0], args.chg_context )
161 if args.chh_context:
162 shutil.move( glob(os.path.join( output_dir, '*CHH_context_*'))[0], args.chh_context )
163
164 if args.non_cpg_context:
165 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_context_*'))[0], args.non_cpg_context )
166
167 if args.non_cpg_context_ot:
168 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OT_*'))[0], args.non_cpg_context_ot )
169 if args.non_cpg_context_ctot:
170 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOT_*'))[0], args.non_cpg_context_ctot )
171 if args.non_cpg_context_ob:
172 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OB_*'))[0], args.non_cpg_context_ob )
173 if args.non_cpg_context_ctob:
174 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOB_*'))[0], args.non_cpg_context_ctob )
175
176
177
178 if args.report_file:
179 shutil.move( glob(os.path.join( output_dir, '*_splitting_report*'))[0], args.report_file )
180
181
182 # Clean up temp dirs
183 if os.path.exists( output_dir ):
184 shutil.rmtree( output_dir )
185
186 if __name__=="__main__": __main__()