Mercurial > repos > bgruening > bismark
comparison bismark_methylation_extractor.py @ 21:120b7b35e442 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
author | bgruening |
---|---|
date | Thu, 22 Apr 2021 17:05:46 +0000 |
parents | ff6ee551b153 |
children | 8c191acde702 |
comparison
equal
deleted
inserted
replaced
20:ff6ee551b153 | 21:120b7b35e442 |
---|---|
17 logger.critical(msg) | 17 logger.critical(msg) |
18 sys.exit(1) | 18 sys.exit(1) |
19 | 19 |
20 | 20 |
21 def log_subprocess_output(logger, pipe): | 21 def log_subprocess_output(logger, pipe): |
22 for line in iter(pipe.readline, b''): | 22 for line in iter(pipe.readline, b""): |
23 logger.debug(line.decode().rstrip()) | 23 logger.debug(line.decode().rstrip()) |
24 | 24 |
25 | 25 |
26 def zipper(dir, zip_file): | 26 def zipper(dir, zip_file): |
27 output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') | 27 output_files_regex = re.compile("^(Non_)?C[pH][GH]_.*") |
28 bedgraph_regex = re.compile('.*bedGraph.gz') | 28 bedgraph_regex = re.compile(".*bedGraph.gz") |
29 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) | 29 zip = zipfile.ZipFile(zip_file, "w", compression=zipfile.ZIP_DEFLATED) |
30 root_len = len(os.path.abspath(dir)) | 30 root_len = len(os.path.abspath(dir)) |
31 for root, dirs, files in os.walk(dir): | 31 for root, dirs, files in os.walk(dir): |
32 archive_root = os.path.abspath(root)[root_len:] | 32 archive_root = os.path.abspath(root)[root_len:] |
33 for f in files: | 33 for f in files: |
34 if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): | 34 if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): |
38 zip.close() | 38 zip.close() |
39 return zip_file | 39 return zip_file |
40 | 40 |
41 | 41 |
42 def build_genome_dir(genome_file): | 42 def build_genome_dir(genome_file): |
43 tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') | 43 tmp_genome_dir = tempfile.mkdtemp(prefix="tmp") |
44 genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) | 44 genome_path = os.path.join( |
45 tmp_genome_dir, ".".join(os.path.split(genome_file)[1].split(".")[:-1]) | |
46 ) | |
45 try: | 47 try: |
46 # Create a hard link pointing to genome_file named 'genome_path'.fa. | 48 # Create a hard link pointing to genome_file named 'genome_path'.fa. |
47 os.symlink(genome_file, genome_path + '.fa') | 49 os.symlink(genome_file, genome_path + ".fa") |
48 except Exception as e: | 50 except Exception as e: |
49 if os.path.exists(tmp_genome_dir): | 51 if os.path.exists(tmp_genome_dir): |
50 shutil.rmtree(tmp_genome_dir) | 52 shutil.rmtree(tmp_genome_dir) |
51 stop_err('Error in linking the reference database!\n%s' % e) | 53 stop_err("Error in linking the reference database!\n%s" % e) |
52 return tmp_genome_dir | 54 return tmp_genome_dir |
53 | 55 |
54 | 56 |
55 def __main__(): | 57 def __main__(): |
56 # Parse Command Line | 58 # Parse Command Line |
57 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') | 59 parser = argparse.ArgumentParser( |
60 description="Wrapper for the bismark methylation caller." | |
61 ) | |
58 | 62 |
59 # input options | 63 # input options |
60 parser.add_argument('--infile', help='Input file in SAM or BAM format.') | 64 parser.add_argument("--infile", help="Input file in SAM or BAM format.") |
61 parser.add_argument('--single-end', dest='single_end', action="store_true") | 65 parser.add_argument("--single-end", dest="single_end", action="store_true") |
62 parser.add_argument('--paired-end', dest='paired_end', action="store_true") | 66 parser.add_argument("--paired-end", dest="paired_end", action="store_true") |
63 | 67 |
64 parser.add_argument('--multicore', dest='multicore', type=int, default=1) | 68 parser.add_argument("--multicore", dest="multicore", type=int, default=1) |
65 parser.add_argument('--splitting_report', dest='splitting_report') | 69 parser.add_argument("--splitting_report", dest="splitting_report") |
66 parser.add_argument('--mbias_report', dest='mbias_report') | 70 parser.add_argument("--mbias_report", dest="mbias_report") |
67 parser.add_argument('--cytosine_report', dest="cytosine_report") | 71 parser.add_argument("--cytosine_report", dest="cytosine_report") |
68 parser.add_argument('--genome_file', dest="genome_file") | 72 parser.add_argument("--genome_file", dest="genome_file") |
69 parser.add_argument('--cx_context', action="store_true") | 73 parser.add_argument("--cx_context", action="store_true") |
70 | 74 |
71 parser.add_argument('--comprehensive', action="store_true") | 75 parser.add_argument("--comprehensive", action="store_true") |
72 parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true") | 76 parser.add_argument("--merge-non-cpg", dest="merge_non_cpg", action="store_true") |
73 parser.add_argument('--no-overlap', dest='no_overlap', action="store_true") | 77 parser.add_argument("--no-overlap", dest="no_overlap", action="store_true") |
74 parser.add_argument('--compress', dest='compress') | 78 parser.add_argument("--compress", dest="compress") |
75 parser.add_argument('--ignore', dest='ignore', type=int) | 79 parser.add_argument("--ignore", dest="ignore", type=int) |
76 parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) | 80 parser.add_argument("--ignore_r2", dest="ignore_r2", type=int) |
77 parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) | 81 parser.add_argument("--ignore_3prime", dest="ignore_3prime", type=int) |
78 parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) | 82 parser.add_argument("--ignore_3prime_r2", dest="ignore_3prime_r2", type=int) |
79 parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str) | 83 parser.add_argument( |
84 "--log_report", dest="log_report", metavar="log_filename", type=str | |
85 ) | |
80 args = parser.parse_args() | 86 args = parser.parse_args() |
81 | 87 |
82 logger = logging.getLogger('bismark_methylation_extractor_wrapper') | 88 logger = logging.getLogger("bismark_methylation_extractor_wrapper") |
83 logger.setLevel(logging.DEBUG) | 89 logger.setLevel(logging.DEBUG) |
84 ch = logging.StreamHandler(sys.stdout) | 90 ch = logging.StreamHandler(sys.stdout) |
85 if args.log_report: | 91 if args.log_report: |
86 ch.setLevel(logging.WARNING) | 92 ch.setLevel(logging.WARNING) |
87 handler = logging.FileHandler(args.log_report) | 93 handler = logging.FileHandler(args.log_report) |
91 ch.setLevel(logging.DEBUG) | 97 ch.setLevel(logging.DEBUG) |
92 logger.addHandler(ch) | 98 logger.addHandler(ch) |
93 | 99 |
94 # Build methylation extractor command | 100 # Build methylation extractor command |
95 output_dir = tempfile.mkdtemp() | 101 output_dir = tempfile.mkdtemp() |
96 cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir] | 102 cmd = ["bismark_methylation_extractor", "--no_header", "-o", output_dir] |
97 # Set up all options | 103 # Set up all options |
98 if args.multicore > 3: | 104 if args.multicore > 3: |
99 # divide multicore by 3 here since bismark will spawn ~3 jobs. | 105 # divide multicore by 3 here since bismark will spawn ~3 jobs. |
100 cmd.extend(['--multicore', str(int(math.floor(args.multicore / 3)))]) | 106 cmd.extend(["--multicore", str(int(math.floor(args.multicore / 3)))]) |
101 if args.single_end: | 107 if args.single_end: |
102 cmd.append('--single-end') | 108 cmd.append("--single-end") |
103 else: | 109 else: |
104 cmd.append('--paired-end') | 110 cmd.append("--paired-end") |
105 if args.no_overlap: | 111 if args.no_overlap: |
106 cmd.append('--no_overlap') | 112 cmd.append("--no_overlap") |
107 if args.ignore: | 113 if args.ignore: |
108 cmd.extend(['--ignore', str(args.ignore)]) | 114 cmd.extend(["--ignore", str(args.ignore)]) |
109 if args.ignore_r2: | 115 if args.ignore_r2: |
110 cmd.extend(['--ignore_r2', str(args.ignore_r2)]) | 116 cmd.extend(["--ignore_r2", str(args.ignore_r2)]) |
111 if args.ignore_3prime: | 117 if args.ignore_3prime: |
112 cmd.extend(['--ignore_3prime', str(args.ignore_3prime)]) | 118 cmd.extend(["--ignore_3prime", str(args.ignore_3prime)]) |
113 if args.ignore_3prime_r2: | 119 if args.ignore_3prime_r2: |
114 cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)]) | 120 cmd.extend(["--ignore_3prime_r2", str(args.ignore_3prime_r2)]) |
115 if args.comprehensive: | 121 if args.comprehensive: |
116 cmd.append('--comprehensive') | 122 cmd.append("--comprehensive") |
117 if args.merge_non_cpg: | 123 if args.merge_non_cpg: |
118 cmd.append('--merge_non_CpG') | 124 cmd.append("--merge_non_CpG") |
119 if args.splitting_report: | 125 if args.splitting_report: |
120 cmd.append('--report') | 126 cmd.append("--report") |
121 tmp_genome_dir = None | 127 tmp_genome_dir = None |
122 if args.cytosine_report: | 128 if args.cytosine_report: |
123 tmp_genome_dir = build_genome_dir(args.genome_file) | 129 tmp_genome_dir = build_genome_dir(args.genome_file) |
124 if args.cx_context: | 130 if args.cx_context: |
125 cmd.extend( | 131 cmd.extend( |
126 ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir]) | 132 [ |
133 "--bedGraph", | |
134 "--CX_context", | |
135 "--cytosine_report", | |
136 "--CX_context", | |
137 "--genome_folder", | |
138 tmp_genome_dir, | |
139 ] | |
140 ) | |
127 else: | 141 else: |
128 cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir]) | 142 cmd.extend( |
143 ["--bedGraph", "--cytosine_report", "--genome_folder", tmp_genome_dir] | |
144 ) | |
129 | 145 |
130 cmd.append(args.infile) | 146 cmd.append(args.infile) |
131 | 147 |
132 # Run | 148 # Run |
133 logger.info("Methylation extractor run with: '%s'", " ".join(cmd)) | 149 logger.info("Methylation extractor run with: '%s'", " ".join(cmd)) |
134 prev_dir = os.getcwd() | 150 prev_dir = os.getcwd() |
135 os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found | 151 os.chdir( |
152 output_dir | |
153 ) # needed due to a bug in bismark where the coverage file cannot be found | |
136 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 154 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
137 with process.stdout: | 155 with process.stdout: |
138 log_subprocess_output(logger, process.stdout) | 156 log_subprocess_output(logger, process.stdout) |
139 exitcode = process.wait() | 157 exitcode = process.wait() |
140 if exitcode != 0: | 158 if exitcode != 0: |
141 stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr) | 159 stop_err( |
160 logger, | |
161 "Bismark methylation extractor error (also check the log file if any)!\n%s" | |
162 % process.stderr, | |
163 ) | |
142 logger.info("Finished methylation extractor.") | 164 logger.info("Finished methylation extractor.") |
143 # collect and copy output files | 165 # collect and copy output files |
144 logger.debug("Zip output files to '%s'.", args.compress) | 166 logger.debug("Zip output files to '%s'.", args.compress) |
145 os.chdir(prev_dir) | 167 os.chdir(prev_dir) |
146 zipper(output_dir, args.compress) | 168 zipper(output_dir, args.compress) |
147 | 169 |
148 # cytosine report | 170 # cytosine report |
149 if args.cytosine_report: | 171 if args.cytosine_report: |
150 logger.debug("Collecting cytosine report.") | 172 logger.debug("Collecting cytosine report.") |
151 if args.cx_context: | 173 if args.cx_context: |
152 shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report) | 174 shutil.move( |
175 glob(os.path.join(output_dir, "*CX_report.txt"))[0], | |
176 args.cytosine_report, | |
177 ) | |
153 else: | 178 else: |
154 shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) | 179 shutil.move( |
180 glob(os.path.join(output_dir, "*CpG_report.txt"))[0], | |
181 args.cytosine_report, | |
182 ) | |
155 # splitting report | 183 # splitting report |
156 if args.splitting_report: | 184 if args.splitting_report: |
157 logger.debug("Collecting splitting report.") | 185 logger.debug("Collecting splitting report.") |
158 shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report) | 186 shutil.move( |
187 glob(os.path.join(output_dir, "*_splitting_report.txt"))[0], | |
188 args.splitting_report, | |
189 ) | |
159 if args.mbias_report: | 190 if args.mbias_report: |
160 logger.debug("Collecting M-Bias file.") | 191 logger.debug("Collecting M-Bias file.") |
161 shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) | 192 shutil.move(glob(os.path.join(output_dir, "*M-bias.txt"))[0], args.mbias_report) |
162 | 193 |
163 # Clean up temp dirs | 194 # Clean up temp dirs |
164 logger.debug("Cleanup temp dirs.") | 195 logger.debug("Cleanup temp dirs.") |
165 if os.path.exists(output_dir): | 196 if os.path.exists(output_dir): |
166 shutil.rmtree(output_dir) | 197 shutil.rmtree(output_dir) |
167 if tmp_genome_dir and os.path.exists(tmp_genome_dir): | 198 if tmp_genome_dir and os.path.exists(tmp_genome_dir): |
168 shutil.rmtree(tmp_genome_dir) | 199 shutil.rmtree(tmp_genome_dir) |
169 logger.info("Done.") | 200 logger.info("Done.") |
170 | 201 |
171 if __name__ == "__main__": __main__() | 202 |
203 if __name__ == "__main__": | |
204 __main__() |