annotate bismark_deduplicate_wrapper.py @ 21:120b7b35e442 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
author bgruening
date Thu, 22 Apr 2021 17:05:46 +0000
parents 9bfe38410155
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
1 #!/usr/bin/python
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
2
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
3 import argparse
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
4 import logging
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
5 import os
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
6 import shutil
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
7 import signal
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
8 import subprocess
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
9 import sys
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
10 import tempfile
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
11
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
12
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
13 def stop_err(logger, msg):
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
14 logger.critical(msg)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
15 sys.exit(1)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
16
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
17
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
18 def restore_sigpipe():
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
19 """
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
20 Needed to handle samtools view
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
21 """
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
22 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
23
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
24
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
25 def log_subprocess_output(logger, pipe):
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
26 for line in iter(pipe.readline, b""):
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
27 logger.debug(line.decode().rstrip())
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
28
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
29
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
30 def get_arg():
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
31 parser = argparse.ArgumentParser()
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
32 parser.add_argument("--single_or_paired", dest="single_or_paired")
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
33 parser.add_argument("--input", dest="input", metavar="input")
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
34 parser.add_argument(
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
35 "--output_report", dest="output_report", metavar="output_report"
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
36 )
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
37 parser.add_argument("--output_bam", dest="output_bam", metavar="output_report")
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
38 parser.add_argument(
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
39 "--log_report", dest="log_report", metavar="log_filename", type=str
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
40 )
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
41 args = parser.parse_args()
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
42 return args
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
43
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
44
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
45 def __main__():
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
46 args = get_arg()
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
47
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
48 logger = logging.getLogger("bismark_deduplicate_wrapper")
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
49 logger.setLevel(logging.DEBUG)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
50 ch = logging.StreamHandler(sys.stdout)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
51 if args.log_report:
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
52 ch.setLevel(logging.WARNING)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
53 handler = logging.FileHandler(args.log_report)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
54 handler.setLevel(logging.DEBUG)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
55 logger.addHandler(handler)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
56 else:
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
57 ch.setLevel(logging.DEBUG)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
58 logger.addHandler(ch)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
59
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
60 # ensure the input has a .bam suffix
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
61 tmp_dir = tempfile.mkdtemp(prefix="tmp", suffix="")
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
62 os.chdir(tmp_dir)
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
63 default_reads_name = "submitted_reads.bam"
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
64 os.symlink(args.input, default_reads_name)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
65
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
66 single_or_paired = "-s" if args.single_or_paired == "single" else "-p"
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
67 cmd = ["deduplicate_bismark", single_or_paired, default_reads_name, "--bam"]
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
68 logger.info("Deduplicating with: '%s'", " ".join(cmd))
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
69 process = subprocess.Popen(
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
70 cmd,
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
71 stdout=subprocess.PIPE,
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
72 stderr=subprocess.STDOUT,
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
73 preexec_fn=restore_sigpipe,
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
74 )
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
75 proc_out, proc_err = process.communicate()
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
76 logger.info(proc_out)
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
77 if process.returncode != 0:
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
78 stop_err(
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
79 logger,
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
80 "Bismark deduplication error (also check the log file if any)!\n%s"
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
81 % proc_err,
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
82 )
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
83
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
84 deduplicated_out_name = "submitted_reads.deduplicated.bam"
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
85 deduplicated_report_name = "submitted_reads.deduplication_report.txt"
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
86 logger.debug("Moving '%s' to galaxy: '%s'.", deduplicated_out_name, args.output_bam)
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
87 shutil.move(deduplicated_out_name, args.output_bam)
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
88 logger.debug(
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
89 "Moving '%s' to galaxy: '%s'.", deduplicated_report_name, args.output_report
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
90 )
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
91 shutil.move("submitted_reads.deduplication_report.txt", args.output_report)
8
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
92 logger.debug("Done.")
9bfe38410155 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 51299fa62f0566a4a897b1c149db564631282fff
bgruening
parents:
diff changeset
93
21
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
94
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
95 if __name__ == "__main__":
120b7b35e442 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
bgruening
parents: 8
diff changeset
96 __main__()