Mercurial > repos > bgruening > bismark
diff bismark_methylation_extractor.py @ 21:120b7b35e442 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
author | bgruening |
---|---|
date | Thu, 22 Apr 2021 17:05:46 +0000 |
parents | ff6ee551b153 |
children | 8c191acde702 |
line wrap: on
line diff
--- a/bismark_methylation_extractor.py Fri Oct 04 11:33:27 2019 -0400 +++ b/bismark_methylation_extractor.py Thu Apr 22 17:05:46 2021 +0000 @@ -19,14 +19,14 @@ def log_subprocess_output(logger, pipe): - for line in iter(pipe.readline, b''): + for line in iter(pipe.readline, b""): logger.debug(line.decode().rstrip()) def zipper(dir, zip_file): - output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') - bedgraph_regex = re.compile('.*bedGraph.gz') - zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) + output_files_regex = re.compile("^(Non_)?C[pH][GH]_.*") + bedgraph_regex = re.compile(".*bedGraph.gz") + zip = zipfile.ZipFile(zip_file, "w", compression=zipfile.ZIP_DEFLATED) root_len = len(os.path.abspath(dir)) for root, dirs, files in os.walk(dir): archive_root = os.path.abspath(root)[root_len:] @@ -40,46 +40,52 @@ def build_genome_dir(genome_file): - tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') - genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) + tmp_genome_dir = tempfile.mkdtemp(prefix="tmp") + genome_path = os.path.join( + tmp_genome_dir, ".".join(os.path.split(genome_file)[1].split(".")[:-1]) + ) try: # Create a hard link pointing to genome_file named 'genome_path'.fa. - os.symlink(genome_file, genome_path + '.fa') + os.symlink(genome_file, genome_path + ".fa") except Exception as e: if os.path.exists(tmp_genome_dir): shutil.rmtree(tmp_genome_dir) - stop_err('Error in linking the reference database!\n%s' % e) + stop_err("Error in linking the reference database!\n%s" % e) return tmp_genome_dir def __main__(): # Parse Command Line - parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') + parser = argparse.ArgumentParser( + description="Wrapper for the bismark methylation caller." + ) # input options - parser.add_argument('--infile', help='Input file in SAM or BAM format.') - parser.add_argument('--single-end', dest='single_end', action="store_true") - parser.add_argument('--paired-end', dest='paired_end', action="store_true") + parser.add_argument("--infile", help="Input file in SAM or BAM format.") + parser.add_argument("--single-end", dest="single_end", action="store_true") + parser.add_argument("--paired-end", dest="paired_end", action="store_true") + + parser.add_argument("--multicore", dest="multicore", type=int, default=1) + parser.add_argument("--splitting_report", dest="splitting_report") + parser.add_argument("--mbias_report", dest="mbias_report") + parser.add_argument("--cytosine_report", dest="cytosine_report") + parser.add_argument("--genome_file", dest="genome_file") + parser.add_argument("--cx_context", action="store_true") - parser.add_argument('--multicore', dest='multicore', type=int, default=1) - parser.add_argument('--splitting_report', dest='splitting_report') - parser.add_argument('--mbias_report', dest='mbias_report') - parser.add_argument('--cytosine_report', dest="cytosine_report") - parser.add_argument('--genome_file', dest="genome_file") - parser.add_argument('--cx_context', action="store_true") - - parser.add_argument('--comprehensive', action="store_true") - parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true") - parser.add_argument('--no-overlap', dest='no_overlap', action="store_true") - parser.add_argument('--compress', dest='compress') - parser.add_argument('--ignore', dest='ignore', type=int) - parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) - parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) - parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) - parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str) + parser.add_argument("--comprehensive", action="store_true") + parser.add_argument("--merge-non-cpg", dest="merge_non_cpg", action="store_true") + parser.add_argument("--no-overlap", dest="no_overlap", action="store_true") + parser.add_argument("--compress", dest="compress") + parser.add_argument("--ignore", dest="ignore", type=int) + parser.add_argument("--ignore_r2", dest="ignore_r2", type=int) + parser.add_argument("--ignore_3prime", dest="ignore_3prime", type=int) + parser.add_argument("--ignore_3prime_r2", dest="ignore_3prime_r2", type=int) + parser.add_argument( + "--log_report", dest="log_report", metavar="log_filename", type=str + ) args = parser.parse_args() - logger = logging.getLogger('bismark_methylation_extractor_wrapper') + logger = logging.getLogger("bismark_methylation_extractor_wrapper") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) if args.log_report: @@ -93,52 +99,68 @@ # Build methylation extractor command output_dir = tempfile.mkdtemp() - cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir] + cmd = ["bismark_methylation_extractor", "--no_header", "-o", output_dir] # Set up all options if args.multicore > 3: # divide multicore by 3 here since bismark will spawn ~3 jobs. - cmd.extend(['--multicore', str(int(math.floor(args.multicore / 3)))]) + cmd.extend(["--multicore", str(int(math.floor(args.multicore / 3)))]) if args.single_end: - cmd.append('--single-end') + cmd.append("--single-end") else: - cmd.append('--paired-end') + cmd.append("--paired-end") if args.no_overlap: - cmd.append('--no_overlap') + cmd.append("--no_overlap") if args.ignore: - cmd.extend(['--ignore', str(args.ignore)]) + cmd.extend(["--ignore", str(args.ignore)]) if args.ignore_r2: - cmd.extend(['--ignore_r2', str(args.ignore_r2)]) + cmd.extend(["--ignore_r2", str(args.ignore_r2)]) if args.ignore_3prime: - cmd.extend(['--ignore_3prime', str(args.ignore_3prime)]) + cmd.extend(["--ignore_3prime", str(args.ignore_3prime)]) if args.ignore_3prime_r2: - cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)]) + cmd.extend(["--ignore_3prime_r2", str(args.ignore_3prime_r2)]) if args.comprehensive: - cmd.append('--comprehensive') + cmd.append("--comprehensive") if args.merge_non_cpg: - cmd.append('--merge_non_CpG') + cmd.append("--merge_non_CpG") if args.splitting_report: - cmd.append('--report') + cmd.append("--report") tmp_genome_dir = None if args.cytosine_report: tmp_genome_dir = build_genome_dir(args.genome_file) if args.cx_context: cmd.extend( - ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir]) + [ + "--bedGraph", + "--CX_context", + "--cytosine_report", + "--CX_context", + "--genome_folder", + tmp_genome_dir, + ] + ) else: - cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir]) + cmd.extend( + ["--bedGraph", "--cytosine_report", "--genome_folder", tmp_genome_dir] + ) cmd.append(args.infile) # Run logger.info("Methylation extractor run with: '%s'", " ".join(cmd)) prev_dir = os.getcwd() - os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found + os.chdir( + output_dir + ) # needed due to a bug in bismark where the coverage file cannot be found process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) with process.stdout: log_subprocess_output(logger, process.stdout) exitcode = process.wait() if exitcode != 0: - stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr) + stop_err( + logger, + "Bismark methylation extractor error (also check the log file if any)!\n%s" + % process.stderr, + ) logger.info("Finished methylation extractor.") # collect and copy output files logger.debug("Zip output files to '%s'.", args.compress) @@ -149,16 +171,25 @@ if args.cytosine_report: logger.debug("Collecting cytosine report.") if args.cx_context: - shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report) + shutil.move( + glob(os.path.join(output_dir, "*CX_report.txt"))[0], + args.cytosine_report, + ) else: - shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) + shutil.move( + glob(os.path.join(output_dir, "*CpG_report.txt"))[0], + args.cytosine_report, + ) # splitting report if args.splitting_report: logger.debug("Collecting splitting report.") - shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report) + shutil.move( + glob(os.path.join(output_dir, "*_splitting_report.txt"))[0], + args.splitting_report, + ) if args.mbias_report: logger.debug("Collecting M-Bias file.") - shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) + shutil.move(glob(os.path.join(output_dir, "*M-bias.txt"))[0], args.mbias_report) # Clean up temp dirs logger.debug("Cleanup temp dirs.") @@ -168,4 +199,6 @@ shutil.rmtree(tmp_genome_dir) logger.info("Done.") -if __name__ == "__main__": __main__() + +if __name__ == "__main__": + __main__()