diff bin/bsf-call @ 0:06f8460885ff

migrate from GitHub
author yutaka-saito
date Sun, 19 Apr 2015 20:51:13 +0900
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/bsf-call	Sun Apr 19 20:51:13 2015 +0900
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+"""
+Bisulfighter::bsf-call
+
+Bisulfighter (http://epigenome.cbrc.jp/bisulfighter)
+by National Institute of Advanced Industrial Science and Technology (AIST)
+is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
+http://creativecommons.org/licenses/by-nc-sa/3.0/
+"""
+
+__version__= "1.3"
+
+from optparse import OptionParser
+import os
+import sys
+import re
+
+prog = 'bsf-call'
+usage = """%prog [options] refgenome read1 read2 ...
+  example: %prog -o experiment.txt hg38.fa paired-sample1-1.fastq,paired-sample1-2.fastq"""
+description = "A mapping of the read bisulfite treated by LAST, to detect methylated cytosine (mC) of the results, and outputs the detection result to the file."
+
+op = OptionParser(prog=prog, usage=usage, description=description, version="%s-%s" % (prog, __version__))
+
+op.add_option("-c", "--coverage", type="int", default=5, metavar="C",
+              help="threshold of read coverate (default: %default)")
+
+# op.add_option("-d", "--pe-direction", type="string", default="ff",
+#               help="direction of paired-end probes: ff, fr, rf, rr (default: %default)")
+# 
+op.add_option("-l", "--lower-bound", type="float", default=0.05, metavar="L",
+              help="threshold of mC ratio (default: %default)")
+
+op.add_option("-p", "--multi-thread", type="int", default=1, metavar="P",
+              help="number of threads (default: %default)")
+
+op.add_option("-s", "", type="int", default=150, metavar="S",
+              help="threshold of the alignment score at filtering (default: %default)")
+
+op.add_option("-m", "", type="float", default=1e-9, metavar="M",
+              help="threshold of the mismap probability at filtering (default: %default)")
+
+# op.add_option("", "--last", type="string", default="", metavar="OPT1,OPT2,...",
+#               help="options for LAST (lastal command)")
+# 
+op.add_option("-o", "", type="string", default="bsf-call.out", metavar="FILE",
+              help="output file (default: bsf-call.out)")
+
+op.add_option("-W", "", type="string", default="./bsfwork", metavar="WORKDIR",
+              help="work directory (default: ./bsfwork)")
+
+op.add_option("", "--work-auto", action="store_true", dest="auto_create_work_dir", default=False,
+              help="create work directory automatically")
+
+# op.add_option("-n", "", action="store_true", dest="use_cluster", default=False,
+#               help="run bsf-call on pc cluster")
+# 
+# op.add_option("-q", "", type="string", default="", metavar="QUEUE_LIST",
+#               help="queue list")
+# 
+op.add_option("-M", "", type="string", metavar="MAPPING_DIR",
+              help="mapping result directory")
+
+op.add_option("-T", "", type="string", metavar="LOCAL_DIR",
+              help="local directory")
+
+# op.add_option("-r", "", type="string", default="100M", metavar="SPLIT_READ_SIZE",
+#               help="split read size")
+# 
+# op.add_option("", "--bam", action="store_true", dest="read_bam", default=False,
+#               help="read BAM file for mC detection")
+# 
+# op.add_option("", "--sam", action="store_true", dest="read_sam", default=False,
+#               help="read SAM file for mC detection")
+# 
+# op.add_option("-z", "--compress-prog", type="string", dest="z", metavar="COMPRESS_PROG", default="bzip2", 
+#               help="compression program")
+
+options, args = op.parse_args()
+
+errors = []
+
+work_dir = None
+if options.W:
+    work_dir = options.W
+else:
+    if not options.auto_create_work_dir:
+        work_dir = "bsfwork"
+
+if options.M:
+    if len(args) < 1:
+        op.error("\n  Reference genome is not specified.")
+
+    for result_dir in options.M.split(","):
+        if not os.path.exists(result_dir):
+            errors.append("Mapping result directory: '%s' does not exist." % options.M)
+
+    # if options.read_bam and options.read_sam:
+    #     errors.append("--bam and --sam cannot be placed simultaneously.")
+
+    ref_genome = args[0]
+    reads = None
+else:
+    if len(args) < 2:
+        op.error("\n  Reference genome and read sequence is not specified.")
+
+    # if options.read_bam:
+    #     errors.append("--bam option is specified but -M option is not specified.")
+
+    # if options.read_sam:
+    #     errors.append("--sam option is specified but -M option is not specified.")
+
+    ref_genome = args[0]
+    reads = args[1:]
+
+    for read_files in reads:
+        for read_file in read_files.split(','):
+            if not os.path.exists(read_file):
+                errors.append("Read file: '%s' does not exists." % read_file)
+
+    if work_dir and os.path.exists(work_dir):
+        errors.append("Working directory: '%s' already exists." % work_dir)
+
+if not os.path.exists(ref_genome):
+    errors.append("Reference genome: '%s' does not exists." % ref_genome)
+
+# if options.read_bam or options.read_sam:
+#     try:
+#         import pysam
+#     except:
+#         errors.append("--bam or --sam is specified but pysam is not installed.")
+
+if len(errors) > 0:
+    op.error("\n  " + "\n  ".join(errors))
+
+cmd_opts = {}
+cmd_opts["coverage"] = options.coverage
+# cmd_opts["pe_direction"] = options.pe_direction
+cmd_opts["num_threads"] = options.multi_thread
+cmd_opts["lower_bound"] = options.lower_bound
+cmd_opts["aln_score_thres"] = options.s
+cmd_opts["aln_mismap_prob_thres"] = options.m
+cmd_opts["output"] = options.o
+# cmd_opts["last_opts"] = options.last
+cmd_opts["work_dir"] = work_dir
+# cmd_opts["use_cluster"] = options.use_cluster
+# cmd_opts["queue_list"] = options.q
+cmd_opts["mapping_dir"] = options.M
+cmd_opts["local_dir"] = options.T
+# cmd_opts["split_read_size"] = options.r
+# cmd_opts["read_bam"] = options.read_bam
+# cmd_opts["read_sam"] = options.read_sam
+# cmd_opts["compress_prog"] = options.z
+
+try:
+    sys.path.append('.');
+    import bsfcall
+except ImportError:
+    errors.append("\"import bsfcall\" failed. Please be sure you have bsfcall.py in your python library path.");
+
+import subprocess
+
+# if not checkRunnable('last-map-probs'):
+#     sys.exit(1)
+# if not checkRunnable('last-pair-probs'):
+#     sys.exit(1)
+# if not checkRunnable(options.z):
+#     sys.exit(1)
+
+if len(errors) > 0:
+    op.error("\n  " + "\n  ".join(errors))
+
+# if cmd_opts["use_cluster"]:
+#     cmd_opts["num_threads"] = 1
+#     bsf_call = bsfcall.BsfCallCluster(ref_genome, reads, cmd_opts)
+# else:
+#     bsf_call = bsfcall.BsfCall(ref_genome, reads, cmd_opts)
+bsf_call = bsfcall.BsfCall(ref_genome, reads, cmd_opts)
+
+bsf_call.execute()
+
+sys.exit(0)
+
+def checkRunnable(cmd):
+    for outline, errline in runProcess(cmd.split()):
+        if (re.match(r'error: subProcess:', errline) is not None):
+            print >>sys.stderr, '\"%s\" is not found.' % cmd
+            return False
+    return True
+
+def runProcess(exe):
+    try:
+        p = subprocess.Popen(exe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    except:
+        yield (None, 'error: runProcess: \'%s\' failed.' % ' '.join(exe))
+        return
+    while (True):
+        retcode = p.poll()
+        if (retcode is not None):
+           break
+        outline = p.stdout.readline()
+        errline = p.stderr.readline()
+        yield (outline, errline)
+