Mercurial > repos > bgruening > bismark
comparison bismark_methylation_extractor.py @ 8:9bfe38410155 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
author | bgruening |
---|---|
date | Wed, 22 Aug 2018 08:09:42 -0400 |
parents | |
children | ff6ee551b153 |
comparison
equal
deleted
inserted
replaced
7:fcadce4d9a06 | 8:9bfe38410155 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import logging | |
5 import math | |
6 import os | |
7 import re | |
8 import shutil | |
9 import subprocess | |
10 import sys | |
11 import tempfile | |
12 import zipfile | |
13 from glob import glob | |
14 | |
15 | |
16 def stop_err(logger, msg): | |
17 logger.critical(msg) | |
18 sys.exit(1) | |
19 | |
20 | |
21 def log_subprocess_output(logger, pipe): | |
22 for line in iter(pipe.readline, b''): | |
23 logger.debug(line.decode().rstrip()) | |
24 | |
25 | |
26 def zipper(dir, zip_file): | |
27 output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') | |
28 bedgraph_regex = re.compile('.*bedGraph.gz') | |
29 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) | |
30 root_len = len(os.path.abspath(dir)) | |
31 for root, dirs, files in os.walk(dir): | |
32 archive_root = os.path.abspath(root)[root_len:] | |
33 for f in files: | |
34 if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): | |
35 fullpath = os.path.join(root, f) | |
36 archive_name = os.path.join(archive_root, f) | |
37 zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) | |
38 zip.close() | |
39 return zip_file | |
40 | |
41 | |
42 def build_genome_dir(genome_file): | |
43 tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') | |
44 genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) | |
45 try: | |
46 # Create a hard link pointing to genome_file named 'genome_path'.fa. | |
47 os.symlink(genome_file, genome_path + '.fa') | |
48 except Exception as e: | |
49 if os.path.exists(tmp_genome_dir): | |
50 shutil.rmtree(tmp_genome_dir) | |
51 stop_err('Error in linking the reference database!\n%s' % e) | |
52 return tmp_genome_dir | |
53 | |
54 | |
55 def __main__(): | |
56 # Parse Command Line | |
57 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') | |
58 | |
59 # input options | |
60 parser.add_argument('--infile', help='Input file in SAM or BAM format.') | |
61 parser.add_argument('--single-end', dest='single_end', action="store_true") | |
62 parser.add_argument('--paired-end', dest='paired_end', action="store_true") | |
63 | |
64 parser.add_argument('--multicore', dest='multicore', type=int, default=1) | |
65 parser.add_argument('--splitting_report', dest='splitting_report') | |
66 parser.add_argument('--mbias_report', dest='mbias_report') | |
67 parser.add_argument('--cytosine_report', dest="cytosine_report") | |
68 parser.add_argument('--genome_file', dest="genome_file") | |
69 parser.add_argument('--cx_context', action="store_true") | |
70 | |
71 parser.add_argument('--comprehensive', action="store_true") | |
72 parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true") | |
73 parser.add_argument('--no-overlap', dest='no_overlap', action="store_true") | |
74 parser.add_argument('--compress', dest='compress') | |
75 parser.add_argument('--ignore', dest='ignore', type=int) | |
76 parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) | |
77 parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) | |
78 parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) | |
79 parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str) | |
80 args = parser.parse_args() | |
81 | |
82 logger = logging.getLogger('bismark_methylation_extractor_wrapper') | |
83 logger.setLevel(logging.DEBUG) | |
84 ch = logging.StreamHandler(sys.stdout) | |
85 if args.log_report: | |
86 ch.setLevel(logging.WARNING) | |
87 handler = logging.FileHandler(args.log_report) | |
88 handler.setLevel(logging.DEBUG) | |
89 logger.addHandler(handler) | |
90 else: | |
91 ch.setLevel(logging.DEBUG) | |
92 logger.addHandler(ch) | |
93 | |
94 # Build methylation extractor command | |
95 output_dir = tempfile.mkdtemp() | |
96 cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir] | |
97 # Set up all options | |
98 if args.multicore > 3: | |
99 # divide multicore by 3 here since bismark will spawn ~3 jobs. | |
100 cmd.extend(['--multicore', str(math.ceil(args.multicore / 3))]) | |
101 if args.single_end: | |
102 cmd.append('--single-end') | |
103 else: | |
104 cmd.append('--paired-end') | |
105 if args.no_overlap: | |
106 cmd.append('--no_overlap') | |
107 if args.ignore: | |
108 cmd.extend(['--ignore', str(args.ignore)]) | |
109 if args.ignore_r2: | |
110 cmd.extend(['--ignore_r2', str(args.ignore_r2)]) | |
111 if args.ignore_3prime: | |
112 cmd.extend(['--ignore_3prime', str(args.ignore_3prime)]) | |
113 if args.ignore_3prime_r2: | |
114 cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)]) | |
115 if args.comprehensive: | |
116 cmd.append('--comprehensive') | |
117 if args.merge_non_cpg: | |
118 cmd.append('--merge_non_CpG') | |
119 if args.splitting_report: | |
120 cmd.append('--report') | |
121 tmp_genome_dir = None | |
122 if args.cytosine_report: | |
123 tmp_genome_dir = build_genome_dir(args.genome_file) | |
124 if args.cx_context: | |
125 cmd.extend( | |
126 ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir]) | |
127 else: | |
128 cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir]) | |
129 | |
130 cmd.append(args.infile) | |
131 | |
132 # Run | |
133 logger.info("Methylation extractor run with: '%s'", " ".join(cmd)) | |
134 prev_dir = os.getcwd() | |
135 os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found | |
136 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | |
137 with process.stdout: | |
138 log_subprocess_output(logger, process.stdout) | |
139 exitcode = process.wait() | |
140 if exitcode != 0: | |
141 stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr) | |
142 logger.info("Finished methylation extractor.") | |
143 # collect and copy output files | |
144 logger.debug("Zip output files to '%s'.", args.compress) | |
145 os.chdir(prev_dir) | |
146 zipper(output_dir, args.compress) | |
147 | |
148 # cytosine report | |
149 if args.cytosine_report: | |
150 logger.debug("Collecting cytosine report.") | |
151 if args.cx_context: | |
152 shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report) | |
153 else: | |
154 shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) | |
155 # splitting report | |
156 if args.splitting_report: | |
157 logger.debug("Collecting splitting report.") | |
158 shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report) | |
159 if args.mbias_report: | |
160 logger.debug("Collecting M-Bias file.") | |
161 shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) | |
162 | |
163 # Clean up temp dirs | |
164 logger.debug("Cleanup temp dirs.") | |
165 if os.path.exists(output_dir): | |
166 shutil.rmtree(output_dir) | |
167 if tmp_genome_dir and os.path.exists(tmp_genome_dir): | |
168 shutil.rmtree(tmp_genome_dir) | |
169 logger.info("Done.") | |
170 | |
171 if __name__ == "__main__": __main__() |