comparison bismark_methylation_extractor.py @ 21:120b7b35e442 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
author bgruening
date Thu, 22 Apr 2021 17:05:46 +0000
parents ff6ee551b153
children 8c191acde702
comparison
equal deleted inserted replaced
20:ff6ee551b153 21:120b7b35e442
17 logger.critical(msg) 17 logger.critical(msg)
18 sys.exit(1) 18 sys.exit(1)
19 19
20 20
21 def log_subprocess_output(logger, pipe): 21 def log_subprocess_output(logger, pipe):
22 for line in iter(pipe.readline, b''): 22 for line in iter(pipe.readline, b""):
23 logger.debug(line.decode().rstrip()) 23 logger.debug(line.decode().rstrip())
24 24
25 25
26 def zipper(dir, zip_file): 26 def zipper(dir, zip_file):
27 output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') 27 output_files_regex = re.compile("^(Non_)?C[pH][GH]_.*")
28 bedgraph_regex = re.compile('.*bedGraph.gz') 28 bedgraph_regex = re.compile(".*bedGraph.gz")
29 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) 29 zip = zipfile.ZipFile(zip_file, "w", compression=zipfile.ZIP_DEFLATED)
30 root_len = len(os.path.abspath(dir)) 30 root_len = len(os.path.abspath(dir))
31 for root, dirs, files in os.walk(dir): 31 for root, dirs, files in os.walk(dir):
32 archive_root = os.path.abspath(root)[root_len:] 32 archive_root = os.path.abspath(root)[root_len:]
33 for f in files: 33 for f in files:
34 if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): 34 if re.search(output_files_regex, f) or re.search(bedgraph_regex, f):
38 zip.close() 38 zip.close()
39 return zip_file 39 return zip_file
40 40
41 41
42 def build_genome_dir(genome_file): 42 def build_genome_dir(genome_file):
43 tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') 43 tmp_genome_dir = tempfile.mkdtemp(prefix="tmp")
44 genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) 44 genome_path = os.path.join(
45 tmp_genome_dir, ".".join(os.path.split(genome_file)[1].split(".")[:-1])
46 )
45 try: 47 try:
46 # Create a hard link pointing to genome_file named 'genome_path'.fa. 48 # Create a hard link pointing to genome_file named 'genome_path'.fa.
47 os.symlink(genome_file, genome_path + '.fa') 49 os.symlink(genome_file, genome_path + ".fa")
48 except Exception as e: 50 except Exception as e:
49 if os.path.exists(tmp_genome_dir): 51 if os.path.exists(tmp_genome_dir):
50 shutil.rmtree(tmp_genome_dir) 52 shutil.rmtree(tmp_genome_dir)
51 stop_err('Error in linking the reference database!\n%s' % e) 53 stop_err("Error in linking the reference database!\n%s" % e)
52 return tmp_genome_dir 54 return tmp_genome_dir
53 55
54 56
55 def __main__(): 57 def __main__():
56 # Parse Command Line 58 # Parse Command Line
57 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') 59 parser = argparse.ArgumentParser(
60 description="Wrapper for the bismark methylation caller."
61 )
58 62
59 # input options 63 # input options
60 parser.add_argument('--infile', help='Input file in SAM or BAM format.') 64 parser.add_argument("--infile", help="Input file in SAM or BAM format.")
61 parser.add_argument('--single-end', dest='single_end', action="store_true") 65 parser.add_argument("--single-end", dest="single_end", action="store_true")
62 parser.add_argument('--paired-end', dest='paired_end', action="store_true") 66 parser.add_argument("--paired-end", dest="paired_end", action="store_true")
63 67
64 parser.add_argument('--multicore', dest='multicore', type=int, default=1) 68 parser.add_argument("--multicore", dest="multicore", type=int, default=1)
65 parser.add_argument('--splitting_report', dest='splitting_report') 69 parser.add_argument("--splitting_report", dest="splitting_report")
66 parser.add_argument('--mbias_report', dest='mbias_report') 70 parser.add_argument("--mbias_report", dest="mbias_report")
67 parser.add_argument('--cytosine_report', dest="cytosine_report") 71 parser.add_argument("--cytosine_report", dest="cytosine_report")
68 parser.add_argument('--genome_file', dest="genome_file") 72 parser.add_argument("--genome_file", dest="genome_file")
69 parser.add_argument('--cx_context', action="store_true") 73 parser.add_argument("--cx_context", action="store_true")
70 74
71 parser.add_argument('--comprehensive', action="store_true") 75 parser.add_argument("--comprehensive", action="store_true")
72 parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true") 76 parser.add_argument("--merge-non-cpg", dest="merge_non_cpg", action="store_true")
73 parser.add_argument('--no-overlap', dest='no_overlap', action="store_true") 77 parser.add_argument("--no-overlap", dest="no_overlap", action="store_true")
74 parser.add_argument('--compress', dest='compress') 78 parser.add_argument("--compress", dest="compress")
75 parser.add_argument('--ignore', dest='ignore', type=int) 79 parser.add_argument("--ignore", dest="ignore", type=int)
76 parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) 80 parser.add_argument("--ignore_r2", dest="ignore_r2", type=int)
77 parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) 81 parser.add_argument("--ignore_3prime", dest="ignore_3prime", type=int)
78 parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) 82 parser.add_argument("--ignore_3prime_r2", dest="ignore_3prime_r2", type=int)
79 parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str) 83 parser.add_argument(
84 "--log_report", dest="log_report", metavar="log_filename", type=str
85 )
80 args = parser.parse_args() 86 args = parser.parse_args()
81 87
82 logger = logging.getLogger('bismark_methylation_extractor_wrapper') 88 logger = logging.getLogger("bismark_methylation_extractor_wrapper")
83 logger.setLevel(logging.DEBUG) 89 logger.setLevel(logging.DEBUG)
84 ch = logging.StreamHandler(sys.stdout) 90 ch = logging.StreamHandler(sys.stdout)
85 if args.log_report: 91 if args.log_report:
86 ch.setLevel(logging.WARNING) 92 ch.setLevel(logging.WARNING)
87 handler = logging.FileHandler(args.log_report) 93 handler = logging.FileHandler(args.log_report)
91 ch.setLevel(logging.DEBUG) 97 ch.setLevel(logging.DEBUG)
92 logger.addHandler(ch) 98 logger.addHandler(ch)
93 99
94 # Build methylation extractor command 100 # Build methylation extractor command
95 output_dir = tempfile.mkdtemp() 101 output_dir = tempfile.mkdtemp()
96 cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir] 102 cmd = ["bismark_methylation_extractor", "--no_header", "-o", output_dir]
97 # Set up all options 103 # Set up all options
98 if args.multicore > 3: 104 if args.multicore > 3:
99 # divide multicore by 3 here since bismark will spawn ~3 jobs. 105 # divide multicore by 3 here since bismark will spawn ~3 jobs.
100 cmd.extend(['--multicore', str(int(math.floor(args.multicore / 3)))]) 106 cmd.extend(["--multicore", str(int(math.floor(args.multicore / 3)))])
101 if args.single_end: 107 if args.single_end:
102 cmd.append('--single-end') 108 cmd.append("--single-end")
103 else: 109 else:
104 cmd.append('--paired-end') 110 cmd.append("--paired-end")
105 if args.no_overlap: 111 if args.no_overlap:
106 cmd.append('--no_overlap') 112 cmd.append("--no_overlap")
107 if args.ignore: 113 if args.ignore:
108 cmd.extend(['--ignore', str(args.ignore)]) 114 cmd.extend(["--ignore", str(args.ignore)])
109 if args.ignore_r2: 115 if args.ignore_r2:
110 cmd.extend(['--ignore_r2', str(args.ignore_r2)]) 116 cmd.extend(["--ignore_r2", str(args.ignore_r2)])
111 if args.ignore_3prime: 117 if args.ignore_3prime:
112 cmd.extend(['--ignore_3prime', str(args.ignore_3prime)]) 118 cmd.extend(["--ignore_3prime", str(args.ignore_3prime)])
113 if args.ignore_3prime_r2: 119 if args.ignore_3prime_r2:
114 cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)]) 120 cmd.extend(["--ignore_3prime_r2", str(args.ignore_3prime_r2)])
115 if args.comprehensive: 121 if args.comprehensive:
116 cmd.append('--comprehensive') 122 cmd.append("--comprehensive")
117 if args.merge_non_cpg: 123 if args.merge_non_cpg:
118 cmd.append('--merge_non_CpG') 124 cmd.append("--merge_non_CpG")
119 if args.splitting_report: 125 if args.splitting_report:
120 cmd.append('--report') 126 cmd.append("--report")
121 tmp_genome_dir = None 127 tmp_genome_dir = None
122 if args.cytosine_report: 128 if args.cytosine_report:
123 tmp_genome_dir = build_genome_dir(args.genome_file) 129 tmp_genome_dir = build_genome_dir(args.genome_file)
124 if args.cx_context: 130 if args.cx_context:
125 cmd.extend( 131 cmd.extend(
126 ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir]) 132 [
133 "--bedGraph",
134 "--CX_context",
135 "--cytosine_report",
136 "--CX_context",
137 "--genome_folder",
138 tmp_genome_dir,
139 ]
140 )
127 else: 141 else:
128 cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir]) 142 cmd.extend(
143 ["--bedGraph", "--cytosine_report", "--genome_folder", tmp_genome_dir]
144 )
129 145
130 cmd.append(args.infile) 146 cmd.append(args.infile)
131 147
132 # Run 148 # Run
133 logger.info("Methylation extractor run with: '%s'", " ".join(cmd)) 149 logger.info("Methylation extractor run with: '%s'", " ".join(cmd))
134 prev_dir = os.getcwd() 150 prev_dir = os.getcwd()
135 os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found 151 os.chdir(
152 output_dir
153 ) # needed due to a bug in bismark where the coverage file cannot be found
136 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 154 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
137 with process.stdout: 155 with process.stdout:
138 log_subprocess_output(logger, process.stdout) 156 log_subprocess_output(logger, process.stdout)
139 exitcode = process.wait() 157 exitcode = process.wait()
140 if exitcode != 0: 158 if exitcode != 0:
141 stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr) 159 stop_err(
160 logger,
161 "Bismark methylation extractor error (also check the log file if any)!\n%s"
162 % process.stderr,
163 )
142 logger.info("Finished methylation extractor.") 164 logger.info("Finished methylation extractor.")
143 # collect and copy output files 165 # collect and copy output files
144 logger.debug("Zip output files to '%s'.", args.compress) 166 logger.debug("Zip output files to '%s'.", args.compress)
145 os.chdir(prev_dir) 167 os.chdir(prev_dir)
146 zipper(output_dir, args.compress) 168 zipper(output_dir, args.compress)
147 169
148 # cytosine report 170 # cytosine report
149 if args.cytosine_report: 171 if args.cytosine_report:
150 logger.debug("Collecting cytosine report.") 172 logger.debug("Collecting cytosine report.")
151 if args.cx_context: 173 if args.cx_context:
152 shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report) 174 shutil.move(
175 glob(os.path.join(output_dir, "*CX_report.txt"))[0],
176 args.cytosine_report,
177 )
153 else: 178 else:
154 shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) 179 shutil.move(
180 glob(os.path.join(output_dir, "*CpG_report.txt"))[0],
181 args.cytosine_report,
182 )
155 # splitting report 183 # splitting report
156 if args.splitting_report: 184 if args.splitting_report:
157 logger.debug("Collecting splitting report.") 185 logger.debug("Collecting splitting report.")
158 shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report) 186 shutil.move(
187 glob(os.path.join(output_dir, "*_splitting_report.txt"))[0],
188 args.splitting_report,
189 )
159 if args.mbias_report: 190 if args.mbias_report:
160 logger.debug("Collecting M-Bias file.") 191 logger.debug("Collecting M-Bias file.")
161 shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) 192 shutil.move(glob(os.path.join(output_dir, "*M-bias.txt"))[0], args.mbias_report)
162 193
163 # Clean up temp dirs 194 # Clean up temp dirs
164 logger.debug("Cleanup temp dirs.") 195 logger.debug("Cleanup temp dirs.")
165 if os.path.exists(output_dir): 196 if os.path.exists(output_dir):
166 shutil.rmtree(output_dir) 197 shutil.rmtree(output_dir)
167 if tmp_genome_dir and os.path.exists(tmp_genome_dir): 198 if tmp_genome_dir and os.path.exists(tmp_genome_dir):
168 shutil.rmtree(tmp_genome_dir) 199 shutil.rmtree(tmp_genome_dir)
169 logger.info("Done.") 200 logger.info("Done.")
170 201
171 if __name__ == "__main__": __main__() 202
203 if __name__ == "__main__":
204 __main__()