0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse, os, shutil, subprocess, sys, tempfile, fileinput
|
|
4 import zipfile
|
|
5 from glob import glob
|
|
6
|
|
7 def stop_err( msg ):
|
|
8 sys.stderr.write( "%s\n" % msg )
|
|
9 sys.exit()
|
|
10
|
|
11 def zipper(dir, zip_file):
|
|
12 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
|
|
13 root_len = len(os.path.abspath(dir))
|
|
14 for root, dirs, files in os.walk(dir):
|
|
15 archive_root = os.path.abspath(root)[root_len:]
|
|
16 for f in files:
|
|
17 fullpath = os.path.join(root, f)
|
|
18 archive_name = os.path.join(archive_root, f)
|
|
19 zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)
|
|
20 zip.close()
|
|
21 return zip_file
|
|
22
|
|
23 def __main__():
|
|
24 #Parse Command Line
|
|
25 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.')
|
|
26
|
|
27 # input options
|
|
28 parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' )
|
|
29
|
|
30 parser.add_argument( '--infile', help='Input file in SAM format.' )
|
|
31 parser.add_argument( '--single-end', dest='single_end', action="store_true" )
|
|
32 parser.add_argument( '--paired-end', dest='paired_end', action="store_true" )
|
|
33
|
|
34 parser.add_argument( '--report-file', dest='report_file' )
|
|
35 parser.add_argument( '--comprehensive', action="store_true" )
|
|
36 parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" )
|
|
37 parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" )
|
|
38 parser.add_argument( '--compress' )
|
|
39 parser.add_argument( '--ignore-bps', dest='ignore_bps', type=int )
|
|
40
|
|
41 # OT - original top strand
|
|
42 parser.add_argument( '--cpg_ot' )
|
|
43 parser.add_argument( '--chg_ot' )
|
|
44 parser.add_argument( '--chh_ot' )
|
|
45 # CTOT - complementary to original top strand
|
|
46 parser.add_argument( '--cpg_ctot' )
|
|
47 parser.add_argument( '--chg_ctot' )
|
|
48 parser.add_argument( '--chh_ctot' )
|
|
49 # OB - original bottom strand
|
|
50 parser.add_argument( '--cpg_ob' )
|
|
51 parser.add_argument( '--chg_ob' )
|
|
52 parser.add_argument( '--chh_ob' )
|
|
53 # CTOT - complementary to original bottom strand
|
|
54 parser.add_argument( '--cpg_ctob' )
|
|
55 parser.add_argument( '--chg_ctob' )
|
|
56 parser.add_argument( '--chh_ctob' )
|
|
57
|
|
58 parser.add_argument( '--cpg_context' )
|
|
59 parser.add_argument( '--chg_context' )
|
|
60 parser.add_argument( '--chh_context' )
|
|
61
|
|
62 parser.add_argument( '--non_cpg_context' )
|
|
63
|
|
64 parser.add_argument( '--non_cpg_context_ot' )
|
|
65 parser.add_argument( '--non_cpg_context_ctot' )
|
|
66 parser.add_argument( '--non_cpg_context_ob' )
|
|
67 parser.add_argument( '--non_cpg_context_ctob' )
|
|
68
|
|
69 args = parser.parse_args()
|
|
70
|
|
71
|
|
72 # Build methylation extractor command
|
|
73 output_dir = tempfile.mkdtemp()
|
|
74 cmd = 'bismark_methylation_extractor --no_header -o %s %s %s'
|
|
75 if args.bismark_path:
|
|
76 # add the path to the bismark perl scripts, that is needed for galaxy
|
|
77 cmd = os.path.join(args.bismark_path, cmd)
|
|
78
|
|
79 additional_opts = ''
|
|
80 # Set up all options
|
|
81 if args.single_end:
|
|
82 additional_opts += ' --single-end '
|
|
83 else:
|
|
84 additional_opts += ' --paired-end '
|
|
85 if args.no_overlap:
|
|
86 additional_opts += ' --no_overlap '
|
|
87 if args.ignore_bps:
|
|
88 additional_opts += ' --ignore %s ' % args.ignore_bps
|
|
89 if args.comprehensive:
|
|
90 additional_opts += ' --comprehensive '
|
|
91 if args.merge_non_cpg:
|
|
92 additional_opts += ' --merge_non_CpG '
|
|
93 if args.report_file:
|
|
94 additional_opts += ' --report '
|
|
95
|
|
96
|
|
97 # Final command:
|
|
98 cmd = cmd % (output_dir, additional_opts, args.infile)
|
|
99
|
|
100 # Run
|
|
101 try:
|
|
102 tmp_out = tempfile.NamedTemporaryFile().name
|
|
103 tmp_stdout = open( tmp_out, 'wb' )
|
|
104 tmp_err = tempfile.NamedTemporaryFile().name
|
|
105 tmp_stderr = open( tmp_err, 'wb' )
|
|
106 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
|
|
107 returncode = proc.wait()
|
|
108 tmp_stderr.close()
|
|
109 # get stderr, allowing for case where it's very large
|
|
110 tmp_stderr = open( tmp_err, 'rb' )
|
|
111 stderr = ''
|
|
112 buffsize = 1048576
|
|
113 try:
|
|
114 while True:
|
|
115 stderr += tmp_stderr.read( buffsize )
|
|
116 if not stderr or len( stderr ) % buffsize != 0:
|
|
117 break
|
|
118 except OverflowError:
|
|
119 pass
|
|
120 tmp_stdout.close()
|
|
121 tmp_stderr.close()
|
|
122 if returncode != 0:
|
|
123 raise Exception, stderr
|
|
124
|
|
125 # TODO: look for errors in program output.
|
|
126 except Exception, e:
|
|
127 stop_err( 'Error in bismark methylation extractor:\n' + str( e ) )
|
|
128
|
|
129
|
|
130 # collect and copy output files
|
|
131
|
|
132 if args.compress:
|
|
133 zipper(output_dir, args.compress)
|
|
134
|
|
135
|
|
136 if args.cpg_ot:
|
|
137 shutil.move( glob(os.path.join( output_dir, '*CpG_OT_*'))[0], args.cpg_ot )
|
|
138 if args.chg_ot:
|
|
139 shutil.move( glob(os.path.join( output_dir, '*CHG_OT_*'))[0], args.chg_ot )
|
|
140 if args.chh_ot:
|
|
141 shutil.move( glob(os.path.join( output_dir, '*CHH_OT_*'))[0], args.chh_ot )
|
|
142 if args.cpg_ctot:
|
|
143 shutil.move( glob(os.path.join( output_dir, '*CpG_CTOT_*'))[0], args.cpg_ctot )
|
|
144 if args.chg_ctot:
|
|
145 shutil.move( glob(os.path.join( output_dir, '*CHG_CTOT_*'))[0], args.chg_ctot )
|
|
146 if args.chh_ctot:
|
|
147 shutil.move( glob(os.path.join( output_dir, '*CHH_CTOT_*'))[0], args.chh_ctot )
|
|
148 if args.cpg_ob:
|
|
149 shutil.move( glob(os.path.join( output_dir, '*CpG_OB_*'))[0], args.cpg_ob )
|
|
150 if args.chg_ob:
|
|
151 shutil.move( glob(os.path.join( output_dir, '*CHG_OB_*'))[0], args.chg_ob )
|
|
152 if args.chh_ob:
|
|
153 shutil.move( glob(os.path.join( output_dir, '*CHH_OB_*'))[0], args.chh_ob )
|
|
154 if args.cpg_ctob:
|
|
155 shutil.move( glob(os.path.join( output_dir, '*CpG_CTOB_*'))[0], args.cpg_ctob )
|
|
156 if args.chg_ctob:
|
|
157 shutil.move( glob(os.path.join( output_dir, '*CHG_CTOB_*'))[0], args.chg_ctob )
|
|
158 if args.chh_ctob:
|
|
159 shutil.move( glob(os.path.join( output_dir, '*CHH_CTOB_*'))[0], args.chh_ctob )
|
|
160
|
|
161 # context-dependent methylation output files
|
|
162 if args.cpg_context:
|
|
163 shutil.move( glob(os.path.join( output_dir, '*CpG_context_*'))[0], args.cpg_context )
|
|
164 if args.chg_context:
|
|
165 shutil.move( glob(os.path.join( output_dir, '*CHG_context_*'))[0], args.chg_context )
|
|
166 if args.chh_context:
|
|
167 shutil.move( glob(os.path.join( output_dir, '*CHH_context_*'))[0], args.chh_context )
|
|
168
|
|
169 if args.non_cpg_context:
|
|
170 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_context_*'))[0], args.non_cpg_context )
|
|
171
|
|
172 if args.non_cpg_context_ot:
|
|
173 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OT_*'))[0], args.non_cpg_context_ot )
|
|
174 if args.non_cpg_context_ctot:
|
|
175 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOT_*'))[0], args.non_cpg_context_ctot )
|
|
176 if args.non_cpg_context_ob:
|
|
177 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OB_*'))[0], args.non_cpg_context_ob )
|
|
178 if args.non_cpg_context_ctob:
|
|
179 shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOB_*'))[0], args.non_cpg_context_ctob )
|
|
180
|
|
181
|
|
182
|
|
183 if args.report_file:
|
|
184 shutil.move( glob(os.path.join( output_dir, '*_splitting_report*'))[0], args.report_file )
|
|
185
|
|
186
|
|
187 # Clean up temp dirs
|
|
188 if os.path.exists( output_dir ):
|
|
189 shutil.rmtree( output_dir )
|
|
190
|
|
191 if __name__=="__main__": __main__()
|