diff bismark_methylation_extractor.py @ 21:120b7b35e442 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
author bgruening
date Thu, 22 Apr 2021 17:05:46 +0000
parents ff6ee551b153
children 8c191acde702
line wrap: on
line diff
--- a/bismark_methylation_extractor.py	Fri Oct 04 11:33:27 2019 -0400
+++ b/bismark_methylation_extractor.py	Thu Apr 22 17:05:46 2021 +0000
@@ -19,14 +19,14 @@
 
 
 def log_subprocess_output(logger, pipe):
-    for line in iter(pipe.readline, b''):
+    for line in iter(pipe.readline, b""):
         logger.debug(line.decode().rstrip())
 
 
 def zipper(dir, zip_file):
-    output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*')
-    bedgraph_regex = re.compile('.*bedGraph.gz')
-    zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
+    output_files_regex = re.compile("^(Non_)?C[pH][GH]_.*")
+    bedgraph_regex = re.compile(".*bedGraph.gz")
+    zip = zipfile.ZipFile(zip_file, "w", compression=zipfile.ZIP_DEFLATED)
     root_len = len(os.path.abspath(dir))
     for root, dirs, files in os.walk(dir):
         archive_root = os.path.abspath(root)[root_len:]
@@ -40,46 +40,52 @@
 
 
 def build_genome_dir(genome_file):
-    tmp_genome_dir = tempfile.mkdtemp(prefix='tmp')
-    genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1]))
+    tmp_genome_dir = tempfile.mkdtemp(prefix="tmp")
+    genome_path = os.path.join(
+        tmp_genome_dir, ".".join(os.path.split(genome_file)[1].split(".")[:-1])
+    )
     try:
         # Create a hard link pointing to genome_file named 'genome_path'.fa.
-        os.symlink(genome_file, genome_path + '.fa')
+        os.symlink(genome_file, genome_path + ".fa")
     except Exception as e:
         if os.path.exists(tmp_genome_dir):
             shutil.rmtree(tmp_genome_dir)
-        stop_err('Error in linking the reference database!\n%s' % e)
+        stop_err("Error in linking the reference database!\n%s" % e)
     return tmp_genome_dir
 
 
 def __main__():
     # Parse Command Line
-    parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.')
+    parser = argparse.ArgumentParser(
+        description="Wrapper for the bismark methylation caller."
+    )
 
     # input options
-    parser.add_argument('--infile', help='Input file in SAM or BAM format.')
-    parser.add_argument('--single-end', dest='single_end', action="store_true")
-    parser.add_argument('--paired-end', dest='paired_end', action="store_true")
+    parser.add_argument("--infile", help="Input file in SAM or BAM format.")
+    parser.add_argument("--single-end", dest="single_end", action="store_true")
+    parser.add_argument("--paired-end", dest="paired_end", action="store_true")
+
+    parser.add_argument("--multicore", dest="multicore", type=int, default=1)
+    parser.add_argument("--splitting_report", dest="splitting_report")
+    parser.add_argument("--mbias_report", dest="mbias_report")
+    parser.add_argument("--cytosine_report", dest="cytosine_report")
+    parser.add_argument("--genome_file", dest="genome_file")
+    parser.add_argument("--cx_context", action="store_true")
 
-    parser.add_argument('--multicore', dest='multicore', type=int, default=1)
-    parser.add_argument('--splitting_report', dest='splitting_report')
-    parser.add_argument('--mbias_report', dest='mbias_report')
-    parser.add_argument('--cytosine_report', dest="cytosine_report")
-    parser.add_argument('--genome_file', dest="genome_file")
-    parser.add_argument('--cx_context', action="store_true")
-
-    parser.add_argument('--comprehensive', action="store_true")
-    parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true")
-    parser.add_argument('--no-overlap', dest='no_overlap', action="store_true")
-    parser.add_argument('--compress', dest='compress')
-    parser.add_argument('--ignore', dest='ignore', type=int)
-    parser.add_argument('--ignore_r2', dest='ignore_r2', type=int)
-    parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int)
-    parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int)
-    parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str)
+    parser.add_argument("--comprehensive", action="store_true")
+    parser.add_argument("--merge-non-cpg", dest="merge_non_cpg", action="store_true")
+    parser.add_argument("--no-overlap", dest="no_overlap", action="store_true")
+    parser.add_argument("--compress", dest="compress")
+    parser.add_argument("--ignore", dest="ignore", type=int)
+    parser.add_argument("--ignore_r2", dest="ignore_r2", type=int)
+    parser.add_argument("--ignore_3prime", dest="ignore_3prime", type=int)
+    parser.add_argument("--ignore_3prime_r2", dest="ignore_3prime_r2", type=int)
+    parser.add_argument(
+        "--log_report", dest="log_report", metavar="log_filename", type=str
+    )
     args = parser.parse_args()
 
-    logger = logging.getLogger('bismark_methylation_extractor_wrapper')
+    logger = logging.getLogger("bismark_methylation_extractor_wrapper")
     logger.setLevel(logging.DEBUG)
     ch = logging.StreamHandler(sys.stdout)
     if args.log_report:
@@ -93,52 +99,68 @@
 
     # Build methylation extractor command
     output_dir = tempfile.mkdtemp()
-    cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir]
+    cmd = ["bismark_methylation_extractor", "--no_header", "-o", output_dir]
     # Set up all options
     if args.multicore > 3:
         # divide multicore by 3 here since bismark will spawn ~3 jobs.
-        cmd.extend(['--multicore', str(int(math.floor(args.multicore / 3)))])
+        cmd.extend(["--multicore", str(int(math.floor(args.multicore / 3)))])
     if args.single_end:
-        cmd.append('--single-end')
+        cmd.append("--single-end")
     else:
-        cmd.append('--paired-end')
+        cmd.append("--paired-end")
     if args.no_overlap:
-        cmd.append('--no_overlap')
+        cmd.append("--no_overlap")
     if args.ignore:
-        cmd.extend(['--ignore', str(args.ignore)])
+        cmd.extend(["--ignore", str(args.ignore)])
     if args.ignore_r2:
-        cmd.extend(['--ignore_r2', str(args.ignore_r2)])
+        cmd.extend(["--ignore_r2", str(args.ignore_r2)])
     if args.ignore_3prime:
-        cmd.extend(['--ignore_3prime', str(args.ignore_3prime)])
+        cmd.extend(["--ignore_3prime", str(args.ignore_3prime)])
     if args.ignore_3prime_r2:
-        cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)])
+        cmd.extend(["--ignore_3prime_r2", str(args.ignore_3prime_r2)])
     if args.comprehensive:
-        cmd.append('--comprehensive')
+        cmd.append("--comprehensive")
     if args.merge_non_cpg:
-        cmd.append('--merge_non_CpG')
+        cmd.append("--merge_non_CpG")
     if args.splitting_report:
-        cmd.append('--report')
+        cmd.append("--report")
     tmp_genome_dir = None
     if args.cytosine_report:
         tmp_genome_dir = build_genome_dir(args.genome_file)
         if args.cx_context:
             cmd.extend(
-                ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir])
+                [
+                    "--bedGraph",
+                    "--CX_context",
+                    "--cytosine_report",
+                    "--CX_context",
+                    "--genome_folder",
+                    tmp_genome_dir,
+                ]
+            )
         else:
-            cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir])
+            cmd.extend(
+                ["--bedGraph", "--cytosine_report", "--genome_folder", tmp_genome_dir]
+            )
 
     cmd.append(args.infile)
 
     # Run
     logger.info("Methylation extractor run with: '%s'", " ".join(cmd))
     prev_dir = os.getcwd()
-    os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found
+    os.chdir(
+        output_dir
+    )  # needed due to a bug in bismark where the coverage file cannot be found
     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     with process.stdout:
         log_subprocess_output(logger, process.stdout)
     exitcode = process.wait()
     if exitcode != 0:
-        stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr)
+        stop_err(
+            logger,
+            "Bismark methylation extractor error (also check the log file if any)!\n%s"
+            % process.stderr,
+        )
     logger.info("Finished methylation extractor.")
     # collect and copy output files
     logger.debug("Zip output files to '%s'.", args.compress)
@@ -149,16 +171,25 @@
     if args.cytosine_report:
         logger.debug("Collecting cytosine report.")
         if args.cx_context:
-            shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report)
+            shutil.move(
+                glob(os.path.join(output_dir, "*CX_report.txt"))[0],
+                args.cytosine_report,
+            )
         else:
-            shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report)
+            shutil.move(
+                glob(os.path.join(output_dir, "*CpG_report.txt"))[0],
+                args.cytosine_report,
+            )
     # splitting report
     if args.splitting_report:
         logger.debug("Collecting splitting report.")
-        shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report)
+        shutil.move(
+            glob(os.path.join(output_dir, "*_splitting_report.txt"))[0],
+            args.splitting_report,
+        )
     if args.mbias_report:
         logger.debug("Collecting M-Bias file.")
-        shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report)
+        shutil.move(glob(os.path.join(output_dir, "*M-bias.txt"))[0], args.mbias_report)
 
     # Clean up temp dirs
     logger.debug("Cleanup temp dirs.")
@@ -168,4 +199,6 @@
         shutil.rmtree(tmp_genome_dir)
     logger.info("Done.")
 
-if __name__ == "__main__": __main__()
+
+if __name__ == "__main__":
+    __main__()