Mercurial > repos > bioitcore > chimerascan

--- a/chimerascan.xml	Tue Sep 12 14:13:20 2017 -0400
+++ b/chimerascan.xml	Wed Sep 13 14:42:12 2017 -0400
@@ -1,8 +1,8 @@
 <tool id="chimerascan" name="ChimeraScan">
     <description>A tool for identifying chimeric transcription in sequencing data.</description>
     <command detect_errors="exit_code"><![CDATA[
-            bash $__tool_directory__/run.sh &&
-            python $__tool_directory__/chimerascan_run.py -p 8 $__tool_directory__/myindex
+            bash $__tool_directory__/run.sh $__tool_directory__ &&
+            python chimerascan_run.py -p 8 myindex
             #if $input_type_conditional.chimerascan_input_type == "paired"
                     $input_type_conditional.input_1 $input_type_conditional.input_2
             #else
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chimerascan/chimerascan_run.py	Wed Sep 13 14:42:12 2017 -0400
@@ -0,0 +1,1032 @@
+#!/usr/bin/env python
+'''
+Created on Jan 5, 2011
+
+@author: mkiyer
+
+chimerascan: chimeric transcript discovery using RNA-seq
+
+Copyright (C) 2011 Matthew Iyer
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+'''
+from chimerascan import __version__
+
+__author__ = "Matthew Iyer"
+__copyright__ = "Copyright 2011, chimerascan project"
+__credits__ = ["Matthew Iyer", "Christopher Maher"]
+__license__ = "GPL"
+__maintainer__ = "Matthew Iyer"
+__email__ = "mkiyer@med.umich.edu"
+__status__ = "beta"
+
+###
+#
+# Modified by
+#	Baekdoo Kim(baegi7942@gmail.com)
+#
+###
+
+import logging
+import os
+import subprocess
+import sys
+import shutil
+from optparse import OptionParser, OptionGroup
+import xml.etree.ElementTree as etree
+
+# check for python version 2.6.0 or greater
+if sys.version_info < (2,6,0):
+    sys.stderr.write("You need python 2.6 or later to run chimerascan\n")
+    sys.exit(1)
+
+# local imports
+from chimerascan import pysam
+import chimerascan.lib.config as config
+from chimerascan.lib.config import JOB_SUCCESS, JOB_ERROR, MIN_SEGMENT_LENGTH
+from chimerascan.lib.base import LibraryTypes, check_executable, \
+    parse_bool, indent_xml, up_to_date
+from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT
+from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution
+
+from chimerascan.pipeline.fastq_inspect_reads import inspect_reads, detect_read_length, get_min_max_read_lengths
+from chimerascan.pipeline.align_bowtie import align_pe, align_sr, trim_align_pe_sr
+from chimerascan.pipeline.find_discordant_reads import find_discordant_fragments
+from chimerascan.pipeline.discordant_reads_to_bedpe import discordant_reads_to_bedpe, sort_bedpe
+from chimerascan.pipeline.nominate_chimeras import nominate_chimeras
+from chimerascan.pipeline.chimeras_to_breakpoints import chimeras_to_breakpoints
+from chimerascan.pipeline.nominate_spanning_reads import nominate_encomp_spanning_reads, extract_single_mapped_reads, nominate_single_mapped_spanning_reads
+from chimerascan.pipeline.merge_spanning_alignments import merge_spanning_alignments
+from chimerascan.pipeline.resolve_discordant_reads import resolve_discordant_reads
+from chimerascan.pipeline.filter_chimeras import filter_chimeras, filter_highest_coverage_isoforms, filter_encompassing_chimeras
+from chimerascan.pipeline.filter_homologous_genes import filter_homologous_genes
+from chimerascan.pipeline.write_output import write_output
+
+# defaults for bowtie
+DEFAULT_NUM_PROCESSORS = config.BASE_PROCESSORS
+DEFAULT_BOWTIE_PATH = ""
+DEFAULT_BOWTIE_ARGS = "--best --strata"
+DEFAULT_DISCORD_BOWTIE_ARGS = "--best"
+DEFAULT_MULTIHITS = 100
+DEFAULT_MISMATCHES = 2
+DEFAULT_DISCORD_MISMATCHES = 3
+DEFAULT_SEGMENT_LENGTH = 25
+DEFAULT_TRIM5 = 0
+DEFAULT_TRIM3 = 0
+DEFAULT_MIN_FRAG_LENGTH = 0
+DEFAULT_MAX_FRAG_LENGTH = 1000
+DEFAULT_NUM_SAMPLES_TO_DETERMINE_READ_LENGTHS = 10000
+DEFAULT_FASTQ_QUAL_FORMAT = SANGER_FORMAT
+DEFAULT_LIBRARY_TYPE = LibraryTypes.FR_UNSTRANDED
+
+DEFAULT_ISIZE_MEAN = 200
+DEFAULT_ISIZE_STDEV = 40
+DEFAULT_HOMOLOGY_MISMATCHES = config.BREAKPOINT_HOMOLOGY_MISMATCHES
+DEFAULT_ANCHOR_MIN = 4
+DEFAULT_ANCHOR_LENGTH = 8
+DEFAULT_ANCHOR_MISMATCHES = 0
+DEFAULT_FILTER_ISIZE_PROB = 0.01
+DEFAULT_FILTER_UNIQUE_FRAGS = 2.0
+DEFAULT_FILTER_ISOFORM_FRACTION = 0.01
+NUM_POSITIONAL_ARGS = 4
+DEFAULT_KEEP_TMP = True
+
+class RunConfig(object):
+
+    attrs = (("num_processors", int, DEFAULT_NUM_PROCESSORS),
+             ("quals", str, DEFAULT_FASTQ_QUAL_FORMAT),
+             ("keep_tmp", parse_bool, DEFAULT_KEEP_TMP),
+             ("bowtie_path", str, DEFAULT_BOWTIE_PATH),
+             ("bowtie_args", str, DEFAULT_BOWTIE_ARGS),
+             ("discord_bowtie_args", str, DEFAULT_DISCORD_BOWTIE_ARGS),
+             ("multihits", int, DEFAULT_MULTIHITS),
+             ("mismatches", int, DEFAULT_MISMATCHES),
+             ("discord_mismatches", int, DEFAULT_DISCORD_MISMATCHES),
+             ("segment_length", int, DEFAULT_SEGMENT_LENGTH),
+             ("trim5", int, DEFAULT_TRIM5),
+             ("trim3", int, DEFAULT_TRIM3),
+             ("min_fragment_length", int, DEFAULT_MIN_FRAG_LENGTH),
+             ("max_fragment_length", int, DEFAULT_MAX_FRAG_LENGTH),
+             ("num_samples_to_determine_read_lengths", int,
+                    DEFAULT_NUM_SAMPLES_TO_DETERMINE_READ_LENGTHS),
+             ("library_type", str, DEFAULT_LIBRARY_TYPE),
+             ("isize_mean", int, DEFAULT_ISIZE_MEAN),
+             ("isize_stdev", float, DEFAULT_ISIZE_STDEV),
+             ("homology_mismatches", int, DEFAULT_HOMOLOGY_MISMATCHES),
+             ("anchor_min", int, DEFAULT_ANCHOR_MIN),
+             ("anchor_length", int, DEFAULT_ANCHOR_LENGTH),
+             ("anchor_mismatches", int, DEFAULT_ANCHOR_MISMATCHES),
+             ("filter_unique_frags", float, DEFAULT_FILTER_UNIQUE_FRAGS),
+             ("filter_isize_prob", float, DEFAULT_FILTER_ISIZE_PROB),
+             ("filter_isoform_fraction", float, DEFAULT_FILTER_ISOFORM_FRACTION),
+             ("filter_false_pos_file", float, ""))
+
+    def __init__(self):
+        self.output_dir = None
+        self.fastq_files = None
+        self.index_dir = None
+        self.output_file_name = None
+	self.output_file_path = None
+	for attrname, attrtype, attrdefault in self.attrs:
+            setattr(self, attrname, None)
+
+    def from_xml(self, xmlfile):
+        tree = etree.parse(xmlfile)
+        root = tree.getroot()
+        # required arguments
+        self.output_dir = root.findtext('output_dir')
+        fastq_files = {}
+        for mate_elem in root.findall("fastq_files/*"):
+            mate = int(mate_elem.get("mate"))
+            fastq_files[mate] = mate_elem.text
+        self.fastq_files = [fastq_files[mate] for mate in xrange(len(fastq_files))]
+        self.index_dir = root.findtext('index')
+        # optional arguments
+        for attrname, attrtype, attrdefault in self.attrs:
+            val = root.findtext(attrname, attrdefault)
+            setattr(self, attrname, attrtype(val))
+
+    def to_xml(self):
+        root = etree.Element("chimerascan_config")
+        # output dir
+        elem = etree.SubElement(root, "output_dir")
+        elem.text = self.output_dir
+        # fastq files
+        elem = etree.SubElement(root, "fastq_files")
+        for mate,fastq_file in enumerate(self.fastq_files):
+            file_elem = etree.SubElement(elem, "file", mate=str(mate))
+            file_elem.text = fastq_file
+        # index
+        elem = etree.SubElement(root, "index")
+        elem.text = self.index_dir
+        # optional arguments
+        for attrname, attrtype, attrdefault in self.attrs:
+            val = getattr(self, attrname)
+            elem = etree.SubElement(root, attrname)
+            elem.text = str(val)
+        # indent for pretty printing
+        indent_xml(root)
+        return etree.tostring(root)
+
+    @staticmethod
+    def get_option_parser():
+        parser = OptionParser(usage="%prog [options] [--config <config_file> "
+                              " | <index> <mate1.fq> <mate2.fq> <output_dir>]",
+                              version="%s" % __version__)
+        # standard options
+        parser.add_option("--config", dest="config_file",
+                          help="Path to configuration XML file")
+        parser.add_option("-v", "--verbose", dest="verbose",
+                          action="store_true", default=False,
+                          help="enable verbose logging output "
+                          "[default=%default]")
+        parser.add_option("-p", "--processors", dest="num_processors",
+                          type="int", default=DEFAULT_NUM_PROCESSORS,
+                          help="Number of processor cores to allocate to "
+                          "chimerascan [default=%default]")
+        parser.add_option("--keep-tmp", dest="keep_tmp", action="store_true",
+                          default=DEFAULT_KEEP_TMP,
+                          help="DO NOT delete intermediate files after run "
+                          "[default=%default]")
+        parser.add_option("--rm-tmp", dest="keep_tmp", action="store_false",
+                          help="Delete intermediate files after run "
+                          "[default=%default]")
+        parser.add_option("--quals", dest="quals",
+                          choices=FASTQ_QUAL_FORMATS,
+                          default=DEFAULT_FASTQ_QUAL_FORMAT, metavar="FMT",
+                          help="FASTQ quality score format [default=%default]")
+        parser.add_option('--library-type', dest="library_type",
+                          choices=LibraryTypes.choices(),
+                          default=DEFAULT_LIBRARY_TYPE,
+                          help="Library type [default=%default]")
+        parser.add_option("--isize-mean", dest="isize_mean", type="int",
+                          default=DEFAULT_ISIZE_MEAN, metavar="N",
+                          help="Mean insert size to sample from when "
+                          "insert size distribution cannot be determined "
+                          "empirically [default=%default]")
+        parser.add_option("--isize-stdev", dest="isize_stdev", type="float",
+                          default=DEFAULT_ISIZE_STDEV, metavar="N",
+                          help="Insert size standard deviation to sample "
+                          "from when insert size distribution cannot be "
+                          "determined empirically [default=%default]")
+        parser.add_option("-n", "--num-samples-to-determine-read-lengths",
+                dest="num_samples_to_determine_read_lengths",
+                default=10000, type="int",
+                help="Number of samples that will be read from the input "
+                     "fastqs in order to determine the min/max read lengths")
+        # alignment options
+        bowtie_group = OptionGroup(parser, "Bowtie options",
+                                   "Adjust these options to change "
+                                   "bowtie alignment settings")
+        bowtie_group.add_option("--bowtie-path",
+                                dest="bowtie_path",
+                                default=DEFAULT_BOWTIE_PATH,
+                                help="Path to directory containing 'bowtie' "
+                                "[default: assumes bowtie is in the current "
+                                "PATH]")
+        bowtie_group.add_option("--trim5", type="int", dest="trim5",
+                                default=DEFAULT_TRIM5, metavar="N",
+                                help="Trim N bases from 5' end of read")
+        bowtie_group.add_option("--trim3", type="int", dest="trim3",
+                                default=DEFAULT_TRIM3, metavar="N",
+                                help="Trim N bases from 3' end of read")
+        bowtie_group.add_option("--min-fragment-length", type="int",
+                                dest="min_fragment_length",
+                                default=DEFAULT_MIN_FRAG_LENGTH,
+                                help="Smallest expected fragment length "
+                                "[default=%default]")
+        bowtie_group.add_option("--max-fragment-length", type="int",
+                                dest="max_fragment_length",
+                                default=DEFAULT_MAX_FRAG_LENGTH,
+                                help="Largest expected fragment length (reads less"
+                                " than this fragment length are assumed to be"
+                                " unspliced and contiguous) [default=%default]")
+        bowtie_group.add_option("--multihits", type="int", dest="multihits",
+                                default=DEFAULT_MULTIHITS, metavar="N",
+                                help="Ignore reads that map to more than N "
+                                "locations [default=%default]")
+        bowtie_group.add_option("--initial-mismatches", type="int", dest="mismatches",
+                                default=DEFAULT_MISMATCHES, metavar="N",
+                                help="Aligned reads must have <= N mismatches "
+                                "[default=%default]")
+        bowtie_group.add_option("--initial-bowtie-args", dest="bowtie_args",
+                                default=DEFAULT_BOWTIE_ARGS,
+                                help="Additional arguments to pass to bowtie "
+                                "aligner (see Bowtie manual for options) "
+                                "[default='%default']")
+        bowtie_group.add_option("--discord-bowtie-args", dest="discord_bowtie_args",
+                                default=DEFAULT_DISCORD_BOWTIE_ARGS,
+                                help="Additional arguments to pass to bowtie "
+                                "aligner for discordant alignment phase (see "
+                                "Bowtie manual for options) [default='%default']")
+        bowtie_group.add_option("--discord-mismatches", type="int", dest="discord_mismatches",
+                                default=DEFAULT_DISCORD_MISMATCHES, metavar="N",
+                                help="Discordant aligned reads must have <= N mismatches "
+                                "[default=%default]")
+        bowtie_group.add_option("--segment-length", type="int",
+                                dest="segment_length",
+                                default=DEFAULT_SEGMENT_LENGTH,
+                                metavar="N",
+                                help="Size of read segments during discordant "
+                                "alignment phase [default=%default]")
+        parser.add_option_group(bowtie_group)
+        # filtering options
+        filter_group = OptionGroup(parser, "Filtering options",
+                                   "Adjust these options to change "
+                                   "filtering behavior")
+        filter_group.add_option("--homology-mismatches", type="int",
+                                dest="homology_mismatches",
+                                default=DEFAULT_HOMOLOGY_MISMATCHES,
+                                help="Number of mismatches to tolerate at "
+                                "breakpoints when computing homology "
+                                "[default=%default]", metavar="N")
+        filter_group.add_option("--anchor-min", type="int", dest="anchor_min",
+                                default=DEFAULT_ANCHOR_MIN, metavar="N",
+                                help="Minimum breakpoint overlap (bp) required "
+                                "to call spanning reads [default=%default]")
+        filter_group.add_option("--anchor-length", type="int", dest="anchor_length",
+                                default=DEFAULT_ANCHOR_LENGTH, metavar="N",
+                                help="Size (bp) of anchor region where mismatch "
+                                "checks are enforced [default=%default]")
+        filter_group.add_option("--anchor-mismatches", type="int", dest="anchor_mismatches",
+                                default=DEFAULT_ANCHOR_MISMATCHES,
+                                metavar="N",
+                                help="Number of mismatches allowed within anchor "
+                                "region [default=%default]")
+        filter_group.add_option("--filter-unique-frags", type="float",
+                                default=DEFAULT_FILTER_UNIQUE_FRAGS,
+                                dest="filter_unique_frags", metavar="N",
+                                help="Filter chimeras with less than N unique "
+                                "aligned fragments [default=%default]")
+        filter_group.add_option("--filter-isize-prob", type="float",
+                                default=DEFAULT_FILTER_ISIZE_PROB,
+                                dest="filter_isize_prob", metavar="X",
+                                help="Filter chimeras when probability of "
+                                "observing the putative insert size is less "
+                                "than X (0.0-1.0) [default=%default]")
+        filter_group.add_option("--filter-isoform-fraction", type="float",
+                                default=DEFAULT_FILTER_ISOFORM_FRACTION, metavar="X",
+                                help="Filter chimeras with expression ratio "
+                                "less than X (0.0-1.0) relative to the wild-type "
+                                "transcript levels [default=%default]")
+        filter_group.add_option("--filter-false-pos", default="",
+                                dest="filter_false_pos_file",
+                                help="File containing known false positive "
+                                "chimeric transcript pairs to filter out")
+        parser.add_option_group(filter_group)
+        return parser
+
+    def from_args(self, args, parser=None):
+        if parser is None:
+            parser = self.get_option_parser()
+        options, args = parser.parse_args(args=args)
+        # parse config file options/args
+        if options.config_file is not None:
+            self.from_xml(options.config_file)
+        # reset logging to verbose
+        if options.verbose:
+            logging.getLogger().setLevel(logging.DEBUG)
+        # check command line arguments
+        if self.index_dir is None:
+            self.index_dir = args[0]
+        if self.fastq_files is None:
+            if len(args) < 2:
+                parser.error("fastq files not specified in config file or command line")
+            self.fastq_files = args[1:3]
+        if self.output_dir is None:
+            if len(args) < 3:
+                parser.error("output dir not specified in config file or command line")
+            self.output_file_path = args[3]
+	    self.output_file_name = os.path.basename(self.output_file_path)
+	    self.output_dir = "%s/chimerascan_tmp" % os.path.dirname(self.output_file_path)
+        # optional arguments
+        # set rest of options, overriding if attribute is undefined
+        # or set to something other than the default
+        for attrname, attrtype, attrdefault in self.attrs:
+            if ((getattr(self, attrname) is None) or
+                (getattr(options, attrname) != attrdefault)):
+                setattr(self, attrname, getattr(options, attrname))
+
+    def check_config(self, min_read_length, max_read_length):
+        config_passed = True
+        # check that seed length < read length
+        if self.segment_length > min_read_length:
+            logging.error("seed length (%d) cannot be longer than "
+                    "read length (%d)" % (self.segment_length, min_read_length))
+            config_passed = False
+        # check that output dir is not a regular file
+        if os.path.exists(self.output_dir) and (not os.path.isdir(self.output_dir)):
+            logging.error("Output directory name '%s' exists and is not a valid directory" %
+                          (self.output_dir))
+            config_passed = False
+        if check_executable(os.path.join(self.bowtie_path, "bowtie-build")):
+            logging.debug("Checking for 'bowtie-build' binary... found")
+        else:
+            logging.error("bowtie-build binary not found or not executable")
+            config_passed = False
+        # check that bowtie program exists
+        if check_executable(os.path.join(self.bowtie_path, "bowtie")):
+            logging.debug("Checking for 'bowtie' binary... found")
+        else:
+            logging.error("bowtie binary not found or not executable")
+            config_passed = False
+        # check that alignment index exists
+        if os.path.isdir(self.index_dir):
+            logging.debug("Checking for chimerascan index directory... found")
+            # check that alignment index file exists
+            align_index_file = os.path.join(self.index_dir, config.BOWTIE_INDEX_FILE)
+            if os.path.isfile(align_index_file):
+                logging.debug("Checking for bowtie index file... found")
+            else:
+                logging.error("chimerascan bowtie index file '%s' invalid" % (align_index_file))
+                config_passed = False
+        else:
+            logging.error("chimerascan alignment index directory '%s' not valid" %
+                          (self.index_dir))
+            config_passed = False
+        # check for sufficient processors
+        if self.num_processors < config.BASE_PROCESSORS:
+            logging.warning("Please specify >=2 processes using '-p' to allow program to run efficiently")
+        return config_passed
+
+def run_chimerascan(runconfig):
+    """
+    main function for running the chimerascan pipeline
+    """
+    # print a welcome message
+    title_string = "Running chimerascan version %s" % (__version__)
+    logging.info(title_string)
+    logging.info("-" * len(title_string))
+
+    # check that input fastq files exist
+    for mate, fastq_file in enumerate(runconfig.fastq_files):
+        if not os.path.isfile(fastq_file):
+            logging.error("mate '%d' fastq file '%s' is not valid" %
+                          (mate, fastq_file))
+            logging.error("Invalid run configuration, aborting.")
+            return JOB_ERROR
+
+    # determine min/max read lengths
+    min_read_length, max_read_length = get_min_max_read_lengths(
+            runconfig.fastq_files,
+            num_samples=runconfig.num_samples_to_determine_read_lengths)
+
+    trimming = runconfig.trim5 + runconfig.trim3
+    min_read_length = min_read_length - trimming
+    max_read_length = max_read_length - trimming
+    logging.debug("min_read_length after trimming: %s" % (min_read_length))
+    logging.debug("max_read_length after trimming: %s" % (max_read_length))
+
+    # validate run configuration
+    config_passed = runconfig.check_config(min_read_length, max_read_length)
+    if not config_passed:
+        logging.error("Invalid run configuration, aborting.")
+        return JOB_ERROR
+
+    # create output dir if it does not exist
+    if not os.path.exists(runconfig.output_dir):
+        os.makedirs(runconfig.output_dir)
+        logging.info("Created output directory: %s" % (runconfig.output_dir))
+    # create log dir if it does not exist
+    log_dir = os.path.join(runconfig.output_dir, config.LOG_DIR)
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+        logging.debug("Created directory for log files: %s" % (log_dir))
+    # create tmp dir if it does not exist
+    tmp_dir = os.path.join(runconfig.output_dir, config.TMP_DIR)
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+        logging.debug("Created directory for tmp files: %s" % (tmp_dir))
+    # write the run config to a file
+    xmlstring = runconfig.to_xml()
+    runconfig_xml_file = os.path.join(runconfig.output_dir, config.RUNCONFIG_XML_FILE)
+    logging.info("Writing run configuration to XML file: %s" % (runconfig_xml_file))
+    fh = open(runconfig_xml_file, "w")
+    print >>fh, xmlstring
+    fh.close()
+    # gather and parse run parameters
+    bowtie_index = os.path.join(runconfig.index_dir, config.ALIGN_INDEX)
+    bowtie_bin = os.path.join(runconfig.bowtie_path, "bowtie")
+    bowtie_build_bin = os.path.join(runconfig.bowtie_path, "bowtie-build")
+
+    # minimum fragment length cannot be smaller than the read length
+    min_fragment_length = max(runconfig.min_fragment_length,
+                              min_read_length)
+    #
+    # Process and inspect the FASTQ files, performing several alterations
+    # to the reads:
+    #
+    # 1) rename them from long string to numbers to save space throughout
+    # the pipeline
+    # 2) ensure the "/1" and "/2" suffixes exist to denote reads in a pair
+    # 3) convert quality scores to sanger format
+    # 4) #TODO# trim poor quality reads
+    # (requires support for variable length reads)
+    #
+    converted_fastq_files = [os.path.join(tmp_dir, fq)
+                             for fq in config.CONVERTED_FASTQ_FILES]
+    msg = "Processing FASTQ files"
+    if (all(up_to_date(cfq, fq) for cfq,fq in
+            zip(converted_fastq_files, runconfig.fastq_files))):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        converted_fastq_prefix = \
+            os.path.join(tmp_dir, config.CONVERTED_FASTQ_PREFIX)
+        try:
+            retcode = inspect_reads(runconfig.fastq_files,
+                                    converted_fastq_prefix,
+                                    quals=runconfig.quals)
+            if retcode != config.JOB_SUCCESS:
+                logging.error("%s step failed" % (msg))
+                return JOB_ERROR
+        except Exception as e:
+            logging.info("Cleaning up after error %s" % (str(e)))
+            for fq in converted_fastq_files:
+                if os.path.isfile(fq):
+                    os.remove(fq)
+    #
+    # Initial Bowtie alignment step
+    #
+    # align in paired-end mode, trying to resolve as many reads as possible
+    # this effectively rules out the vast majority of reads as candidate
+    # fusions
+    #
+    unaligned_fastq_param = os.path.join(tmp_dir, config.UNALIGNED_FASTQ_PARAM)
+    unaligned_fastq_files = [os.path.join(tmp_dir, fq)
+                             for fq in config.UNALIGNED_FASTQ_FILES]
+    maxmultimap_fastq_param = os.path.join(tmp_dir, config.MAXMULTIMAP_FASTQ_PARAM)
+    aligned_bam_file = os.path.join(runconfig.output_dir, config.ALIGNED_READS_BAM_FILE)
+    aligned_log_file = os.path.join(log_dir, "bowtie_alignment.log")
+    msg = "Aligning reads with bowtie"
+    print [up_to_date(aligned_bam_file, fq) for fq in converted_fastq_files]
+    print [up_to_date(a,b) for a,b in zip(unaligned_fastq_files, converted_fastq_files)]
+    if (all(up_to_date(aligned_bam_file, fq) for fq in converted_fastq_files) and
+        all(up_to_date(a,b) for a,b in zip(unaligned_fastq_files, converted_fastq_files))):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = align_pe(converted_fastq_files,
+                           bowtie_index,
+                           aligned_bam_file,
+                           unaligned_fastq_param,
+                           maxmultimap_fastq_param,
+                           min_fragment_length=min_fragment_length,
+                           max_fragment_length=runconfig.max_fragment_length,
+                           trim5=runconfig.trim5,
+                           trim3=runconfig.trim3,
+                           library_type=runconfig.library_type,
+                           num_processors=runconfig.num_processors,
+                           quals=SANGER_FORMAT,
+                           multihits=runconfig.multihits,
+                           mismatches=runconfig.mismatches,
+                           bowtie_bin=bowtie_bin,
+                           bowtie_args=runconfig.bowtie_args,
+                           log_file=aligned_log_file,
+                           keep_unmapped=False)
+        if retcode != 0:
+            logging.error("Bowtie failed with error code %d" % (retcode))
+            return JOB_ERROR
+    #
+    # Sort aligned reads by position
+    #
+    msg = "Sorting aligned reads"
+    sorted_aligned_bam_file = os.path.join(runconfig.output_dir,
+                                           config.SORTED_ALIGNED_READS_BAM_FILE)
+    if (up_to_date(sorted_aligned_bam_file, aligned_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        sorted_aligned_bam_prefix = os.path.splitext(sorted_aligned_bam_file)[0]
+        pysam.sort("-m", str(int(1e9)), aligned_bam_file, sorted_aligned_bam_prefix)
+    #
+    # Index BAM file
+    #
+    msg = "Indexing BAM file"
+    sorted_aligned_bam_index_file = sorted_aligned_bam_file + ".bai"
+    if (up_to_date(sorted_aligned_bam_index_file, sorted_aligned_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        pysam.index(sorted_aligned_bam_file)
+    #
+    # Get insert size distribution
+    #
+    isize_dist_file = os.path.join(runconfig.output_dir,
+                                   config.ISIZE_DIST_FILE)
+    if up_to_date(isize_dist_file, aligned_bam_file):
+        logging.info("[SKIPPED] Profiling insert size distribution")
+        isize_dist = InsertSizeDistribution.from_file(open(isize_dist_file, "r"))
+    else:
+        logging.info("Profiling insert size distribution")
+        max_isize_samples = config.ISIZE_MAX_SAMPLES
+        bamfh = pysam.Samfile(aligned_bam_file, "rb")
+        isize_dist = InsertSizeDistribution.from_bam(bamfh, min_isize=min_fragment_length,
+                                                     max_isize=runconfig.max_fragment_length,
+                                                     max_samples=max_isize_samples)
+        bamfh.close()
+        # if not enough samples, use a normal distribution instead
+        # of the empirical distribution
+        if isize_dist.n < config.ISIZE_MIN_SAMPLES:
+            logging.warning("Not enough fragments to sample insert size "
+                            "distribution empirically.  Using mean=%d "
+                            "stdev=%f instead" %
+                            (runconfig.isize_mean,
+                             runconfig.isize_stdev))
+            isize_dist = InsertSizeDistribution.from_random(runconfig.isize_mean,
+                                                            runconfig.isize_stdev,
+                                                            min_isize=runconfig.min_fragment_length,
+                                                            max_isize=runconfig.max_fragment_length,
+                                                            samples=max_isize_samples)
+        isize_dist.to_file(open(isize_dist_file, "w"))
+    logging.info("Insert size samples=%d mean=%f std=%f median=%d mode=%d" %
+                 (isize_dist.n, isize_dist.mean(), isize_dist.std(),
+                  isize_dist.isize_at_percentile(50.0), isize_dist.mode()))
+    #
+    # Realignment step
+    #
+    # trim and realign all the initially unaligned reads, in order to
+    # increase sensitivity to detect reads spanning fusion junctions
+    #
+    realigned_bam_file = os.path.join(tmp_dir, config.REALIGNED_BAM_FILE)
+    realigned_log_file = os.path.join(log_dir, "bowtie_trimmed_realignment.log")
+    msg = "Trimming and re-aligning initially unmapped reads"
+    if all(up_to_date(realigned_bam_file, fq)
+           for fq in unaligned_fastq_files):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        trim_align_pe_sr(unaligned_fastq_files,
+                         bowtie_index,
+                         realigned_bam_file,
+                         unaligned_fastq_param=None,
+                         maxmultimap_fastq_param=None,
+                         trim5=runconfig.trim5,
+                         library_type=runconfig.library_type,
+                         num_processors=runconfig.num_processors,
+                         quals=SANGER_FORMAT,
+                         multihits=runconfig.multihits,
+                         mismatches=runconfig.discord_mismatches,
+                         bowtie_bin=bowtie_bin,
+                         bowtie_args=runconfig.discord_bowtie_args,
+                         log_file=realigned_log_file,
+                         segment_length=runconfig.segment_length,
+                         keep_unmapped=True)
+    #
+    # Find discordant reads
+    #
+    # iterate through realigned reads and divide them into groups of
+    # concordant, discordant within a gene (isoforms), discordant
+    # between different genes, and discordant in the genome
+    #
+    gene_paired_bam_file = os.path.join(tmp_dir, config.GENE_PAIRED_BAM_FILE)
+    genome_paired_bam_file = os.path.join(tmp_dir, config.GENOME_PAIRED_BAM_FILE)
+    realigned_unmapped_bam_file = os.path.join(tmp_dir, config.REALIGNED_UNMAPPED_BAM_FILE)
+    realigned_complex_bam_file = os.path.join(tmp_dir, config.REALIGNED_COMPLEX_BAM_FILE)
+    msg = "Classifying concordant and discordant read pairs"
+    if (up_to_date(gene_paired_bam_file, realigned_bam_file) and
+        up_to_date(genome_paired_bam_file, realigned_bam_file) and
+        up_to_date(realigned_unmapped_bam_file, realigned_bam_file) and
+        up_to_date(realigned_complex_bam_file, realigned_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        find_discordant_fragments(realigned_bam_file,
+                                  gene_paired_bam_file=gene_paired_bam_file,
+                                  genome_paired_bam_file=genome_paired_bam_file,
+                                  unmapped_bam_file=realigned_unmapped_bam_file,
+                                  complex_bam_file=realigned_complex_bam_file,
+                                  index_dir=runconfig.index_dir,
+                                  max_isize=runconfig.max_fragment_length,
+                                  library_type=runconfig.library_type)
+
+    #
+    # Write a BEDPE file from discordant reads
+    #
+    discordant_bedpe_file = os.path.join(tmp_dir, config.DISCORDANT_BEDPE_FILE)
+    msg = "Converting discordant reads to BEDPE format"
+    if (up_to_date(discordant_bedpe_file, gene_paired_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        discordant_reads_to_bedpe(runconfig.index_dir,
+                                  gene_paired_bam_file,
+                                  discordant_bedpe_file)
+    #
+    # Sort BEDPE file
+    #
+    sorted_discordant_bedpe_file = os.path.join(tmp_dir, config.SORTED_DISCORDANT_BEDPE_FILE)
+    msg = "Sorting BEDPE file"
+    if (up_to_date(sorted_discordant_bedpe_file, discordant_bedpe_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        # sort BEDPE file by paired chromosome/position
+        sort_bedpe(input_file=discordant_bedpe_file,
+                   output_file=sorted_discordant_bedpe_file,
+                   tmp_dir=tmp_dir)
+    #
+    # Nominate chimeric genes
+    #
+    # iterate through pairs of reads and nominate chimeras
+    #
+    encompassing_chimera_file = os.path.join(tmp_dir, config.ENCOMPASSING_CHIMERA_FILE)
+    msg = "Nominating chimeras from discordant reads"
+    if (up_to_date(encompassing_chimera_file, sorted_discordant_bedpe_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = nominate_chimeras(runconfig.index_dir,
+                                    isize_dist_file=isize_dist_file,
+                                    input_file=sorted_discordant_bedpe_file,
+                                    output_file=encompassing_chimera_file,
+                                    trim_bp=config.EXON_JUNCTION_TRIM_BP,
+                                    max_read_length=max_read_length,
+                                    homology_mismatches=runconfig.homology_mismatches)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("Failed with error code %d" % (retcode))
+            return config.JOB_ERROR
+    #
+    # Filter chimeras with few supporting fragments
+    #
+    filtered_encompassing_chimera_file = os.path.join(tmp_dir, config.FILTERED_ENCOMPASSING_CHIMERA_FILE)
+    msg = "Filtering encompassing chimeras with few supporting reads"
+    if (up_to_date(filtered_encompassing_chimera_file, encompassing_chimera_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = filter_encompassing_chimeras(encompassing_chimera_file,
+                                               filtered_encompassing_chimera_file,
+                                               runconfig.filter_unique_frags)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("Failed with error code %d" % (retcode))
+            return config.JOB_ERROR
+    #
+    # Make a breakpoint FASTA file and map of breakpoints to chimeras
+    # for use in spanning read alignment
+    #
+    breakpoint_chimera_file = os.path.join(tmp_dir, config.BREAKPOINT_CHIMERA_FILE)
+    breakpoint_map_file = os.path.join(tmp_dir, config.BREAKPOINT_MAP_FILE)
+    breakpoint_fasta_file = os.path.join(tmp_dir, config.BREAKPOINT_FASTA_FILE)
+    msg = "Extracting breakpoint sequences from chimeras"
+    if (up_to_date(breakpoint_chimera_file, filtered_encompassing_chimera_file) and
+        up_to_date(breakpoint_map_file, filtered_encompassing_chimera_file) and
+        up_to_date(breakpoint_fasta_file, filtered_encompassing_chimera_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        chimeras_to_breakpoints(input_file=filtered_encompassing_chimera_file,
+                                breakpoint_sorted_chimera_file=breakpoint_chimera_file,
+                                breakpoint_map_file=breakpoint_map_file,
+                                breakpoint_fasta_file=breakpoint_fasta_file,
+                                tmp_dir=tmp_dir)
+    #
+    # Build a bowtie index to align and detect spanning reads
+    #
+    breakpoint_bowtie_index = os.path.join(tmp_dir, config.BREAKPOINT_BOWTIE_INDEX)
+    breakpoint_bowtie_index_file = os.path.join(tmp_dir, config.BREAKPOINT_BOWTIE_INDEX_FILE)
+    msg = "Building bowtie index of breakpoint spanning sequences"
+    if (up_to_date(breakpoint_bowtie_index_file, breakpoint_fasta_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        args = [bowtie_build_bin, breakpoint_fasta_file,
+                breakpoint_bowtie_index]
+        f = open(os.path.join(log_dir, "breakpoint_bowtie_index.log"), "w")
+        retcode = subprocess.call(args, stdout=f, stderr=f)
+        f.close()
+        if retcode != config.JOB_SUCCESS:
+            logging.error("'bowtie-build' failed to create breakpoint index")
+            if os.path.exists(breakpoint_bowtie_index_file):
+                os.remove(breakpoint_bowtie_index_file)
+            return config.JOB_ERROR
+    #
+    # Extract reads that were encompassing when trimmed but may span
+    # breakpoints when extended.
+    #
+    encomp_spanning_fastq_file = os.path.join(tmp_dir, config.ENCOMP_SPANNING_FASTQ_FILE)
+    msg = "Extracting encompassing reads that may extend past breakpoints"
+    if (up_to_date(encomp_spanning_fastq_file, filtered_encompassing_chimera_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = nominate_encomp_spanning_reads(filtered_encompassing_chimera_file,
+                                                 encomp_spanning_fastq_file)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("Failed to extract breakpoint-spanning reads")
+            if os.path.exists(encomp_spanning_fastq_file):
+                os.remove(encomp_spanning_fastq_file)
+            return config.JOB_ERROR
+    #
+    # Process unmapped reads to predict spanning reads where one read
+    # maps to a chimera and the other is unmapped
+    #
+    # First, annotate the single-mapped reads in the BAM file
+    #
+    single_mapped_bam_file = os.path.join(tmp_dir, config.SINGLE_MAPPED_BAM_FILE)
+    unaligned_spanning_fastq_file = os.path.join(tmp_dir, config.UNALIGNED_SPANNING_FASTQ_FILE)
+    msg = "Separating unmapped and single-mapping reads that may span breakpoints"
+    if (up_to_date(single_mapped_bam_file, realigned_unmapped_bam_file) and
+        up_to_date(single_mapped_bam_file, filtered_encompassing_chimera_file) and
+        up_to_date(unaligned_spanning_fastq_file, filtered_encompassing_chimera_file, nzsize=False)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = extract_single_mapped_reads(chimera_file=filtered_encompassing_chimera_file,
+                                              unmapped_bam_file=realigned_unmapped_bam_file,
+                                              single_mapped_bam_file=single_mapped_bam_file,
+                                              unmapped_fastq_file=unaligned_spanning_fastq_file,
+                                              library_type=runconfig.library_type,
+                                              tmp_dir=tmp_dir)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("\tFailed")
+            if os.path.exists(unaligned_spanning_fastq_file):
+                os.remove(unaligned_spanning_fastq_file)
+            if os.path.exists(single_mapped_bam_file):
+                os.remove(single_mapped_bam_file)
+            return config.JOB_ERROR
+    #
+    # Parse the single-mapped reads and choose putative spanning reads
+    #
+    msg = "Extracting single-mapped reads that may span breakpoints"
+    single_mapped_spanning_fastq_file = os.path.join(tmp_dir, config.SINGLEMAP_SPANNING_FASTQ_FILE)
+    if (up_to_date(single_mapped_spanning_fastq_file, filtered_encompassing_chimera_file, nzsize=False)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = nominate_single_mapped_spanning_reads(chimera_file=filtered_encompassing_chimera_file,
+                                                        single_mapped_bam_file=single_mapped_bam_file,
+                                                        single_mapped_fastq_file=single_mapped_spanning_fastq_file,
+                                                        tmp_dir=tmp_dir)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("Failed to extract unmapped reads")
+            if os.path.exists(single_mapped_spanning_fastq_file):
+                os.remove(single_mapped_spanning_fastq_file)
+            return config.JOB_ERROR
+    #
+    # Re-align encompassing reads that overlap breakpoint junctions
+    #
+    encomp_spanning_bam_file = os.path.join(tmp_dir, config.ENCOMP_SPANNING_BAM_FILE)
+    encomp_spanning_log_file = os.path.join(log_dir, "bowtie_encomp_spanning.log")
+    msg = "Realigning encompassing reads to breakpoints"
+    if (up_to_date(encomp_spanning_bam_file, breakpoint_bowtie_index_file) and
+        up_to_date(encomp_spanning_bam_file, encomp_spanning_fastq_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = align_sr(encomp_spanning_fastq_file,
+                           bowtie_index=breakpoint_bowtie_index,
+                           output_bam_file=encomp_spanning_bam_file,
+                           unaligned_fastq_param=None,
+                           maxmultimap_fastq_param=None,
+                           trim5=0,
+                           trim3=0,
+                           library_type=runconfig.library_type,
+                           num_processors=runconfig.num_processors,
+                           quals=SANGER_FORMAT,
+                           multihits=runconfig.multihits,
+                           mismatches=runconfig.mismatches,
+                           bowtie_bin=bowtie_bin,
+                           bowtie_args=runconfig.bowtie_args,
+                           log_file=encomp_spanning_log_file,
+                           keep_unmapped=False)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("Bowtie failed with error code %d" % (retcode))
+            return config.JOB_ERROR
+    #
+    # Sort encomp/spanning reads by reference/position
+    #
+    msg = "Sorting/indexing encompassing/spanning alignments"
+    sorted_encomp_spanning_bam_file = os.path.join(tmp_dir, config.SORTED_ENCOMP_SPANNING_BAM_FILE)
+    if (up_to_date(sorted_encomp_spanning_bam_file, encomp_spanning_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        sorted_encomp_spanning_bam_prefix = os.path.splitext(sorted_encomp_spanning_bam_file)[0]
+        pysam.sort("-m", str(int(1e9)), encomp_spanning_bam_file, sorted_encomp_spanning_bam_prefix)
+        pysam.index(sorted_encomp_spanning_bam_file)
+    #
+    # Align single-mapping reads that may overlap breakpoint junctions
+    #
+    singlemap_spanning_bam_file = os.path.join(tmp_dir, config.SINGLEMAP_SPANNING_BAM_FILE)
+    singlemap_spanning_log_file = os.path.join(log_dir, "bowtie_singlemap_spanning.log")
+    msg = "Realigning single-mapping reads to breakpoints"
+    if (up_to_date(singlemap_spanning_bam_file, breakpoint_bowtie_index_file) and
+        up_to_date(singlemap_spanning_bam_file, single_mapped_spanning_fastq_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode= align_sr(single_mapped_spanning_fastq_file,
+                          bowtie_index=breakpoint_bowtie_index,
+                          output_bam_file=singlemap_spanning_bam_file,
+                          unaligned_fastq_param=None,
+                          maxmultimap_fastq_param=None,
+                          trim5=0,
+                          trim3=0,
+                          library_type=runconfig.library_type,
+                          num_processors=runconfig.num_processors,
+                          quals=SANGER_FORMAT,
+                          multihits=runconfig.multihits,
+                          mismatches=runconfig.mismatches,
+                          bowtie_bin=bowtie_bin,
+                          bowtie_args=runconfig.bowtie_args,
+                          log_file=singlemap_spanning_log_file,
+                          keep_unmapped=False)
+        if retcode != config.JOB_SUCCESS:
+            logging.error("Bowtie failed with error code %d" % (retcode))
+            return config.JOB_ERROR
+    #
+    # Sort encomp/spanning reads by reference/position
+    #
+    msg = "Sorting/indexing single-mapping/spanning alignments"
+    sorted_singlemap_spanning_bam_file = os.path.join(tmp_dir, config.SORTED_SINGLEMAP_SPANNING_BAM_FILE)
+    if (up_to_date(sorted_singlemap_spanning_bam_file, singlemap_spanning_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        sorted_singlemap_spanning_bam_prefix = os.path.splitext(sorted_singlemap_spanning_bam_file)[0]
+        pysam.sort("-m", str(int(1e9)), singlemap_spanning_bam_file, sorted_singlemap_spanning_bam_prefix)
+        pysam.index(sorted_singlemap_spanning_bam_file)
+    #
+    # Merge spanning read alignment information
+    #
+    spanning_chimera_file = os.path.join(tmp_dir, config.SPANNING_CHIMERA_FILE)
+    msg = "Merging spanning read information"
+    if (up_to_date(spanning_chimera_file, breakpoint_chimera_file) and
+        up_to_date(spanning_chimera_file, encomp_spanning_bam_file) and
+        up_to_date(spanning_chimera_file, sorted_singlemap_spanning_bam_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        merge_spanning_alignments(breakpoint_chimera_file=breakpoint_chimera_file,
+                                  encomp_bam_file=sorted_encomp_spanning_bam_file,
+                                  singlemap_bam_file=sorted_singlemap_spanning_bam_file,
+                                  output_chimera_file=spanning_chimera_file,
+                                  anchor_min=runconfig.anchor_min,
+                                  anchor_length=runconfig.anchor_length,
+                                  anchor_mismatches=runconfig.anchor_mismatches,
+                                  library_type=runconfig.library_type,
+                                  tmp_dir=tmp_dir)
+    #
+    # Resolve reads mapping to multiple chimeras
+    # TODO: work in progress
+    #
+    resolved_spanning_chimera_file = os.path.join(tmp_dir, config.RESOLVED_SPANNING_CHIMERA_FILE)
+    msg = "Resolving ambiguous read mappings"
+    if (up_to_date(resolved_spanning_chimera_file, spanning_chimera_file)):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        resolve_discordant_reads(input_file=spanning_chimera_file,
+                                 output_file=resolved_spanning_chimera_file,
+                                 isize_dist=isize_dist,
+                                 min_isize_prob=runconfig.filter_isize_prob,
+                                 tmp_dir=tmp_dir)
+    #
+    # Filter chimeras
+    #
+    filtered_chimera_file = os.path.join(tmp_dir, config.FILTERED_CHIMERA_FILE)
+    msg = "Filtering chimeras"
+    if up_to_date(filtered_chimera_file, resolved_spanning_chimera_file):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        # get insert size at prob
+        filter_chimeras(input_file=resolved_spanning_chimera_file,
+                        output_file=filtered_chimera_file,
+                        index_dir=runconfig.index_dir,
+                        bam_file=sorted_aligned_bam_file,
+                        unique_frags=runconfig.filter_unique_frags,
+                        isoform_fraction=runconfig.filter_isoform_fraction,
+                        false_pos_file=runconfig.filter_false_pos_file)
+    #
+    # Filter homologous genes
+    #
+    homolog_filtered_chimera_file = os.path.join(tmp_dir, config.HOMOLOG_FILTERED_CHIMERA_FILE)
+    msg = "Filtering homologous chimeras"
+    if up_to_date(homolog_filtered_chimera_file, filtered_chimera_file):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        min_isize = isize_dist.isize_at_percentile(1.0)
+        max_isize = isize_dist.isize_at_percentile(99.0)
+        filter_homologous_genes(input_file=filtered_chimera_file,
+                                output_file=homolog_filtered_chimera_file,
+                                index_dir=runconfig.index_dir,
+                                homolog_segment_length=runconfig.segment_length-1,
+                                min_isize=min_isize,
+                                max_isize=max_isize,
+                                bowtie_bin=bowtie_bin,
+                                num_processors=runconfig.num_processors,
+                                tmp_dir=tmp_dir)
+    #
+    # Choose best isoform for chimeras that share the same breakpoint
+    #
+    best_isoform_chimera_file = os.path.join(tmp_dir, config.BEST_FILTERED_CHIMERA_FILE)
+    msg = "Choosing best isoform for each chimera"
+    if up_to_date(best_isoform_chimera_file, homolog_filtered_chimera_file):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        retcode = filter_highest_coverage_isoforms(index_dir=runconfig.index_dir,
+                                                   input_file=homolog_filtered_chimera_file,
+                                                   output_file=best_isoform_chimera_file)
+    #
+    # Write user-friendly output file
+    #
+    chimera_output_file = os.path.join(runconfig.output_dir, config.CHIMERA_OUTPUT_FILE)
+    #msg = "Writing chimeras to file %s" % (chimera_output_file)
+    if up_to_date(chimera_output_file, best_isoform_chimera_file):
+        logging.info("[SKIPPED] %s" % (msg))
+    else:
+        logging.info(msg)
+        write_output(best_isoform_chimera_file,
+                     bam_file=sorted_aligned_bam_file,
+                     output_file=chimera_output_file,
+                     index_dir=runconfig.index_dir)
+
+    #
+    # Move output to Galaxy data file
+    #
+    cmd = "mv %s/chimerascan_tmp/chimeras.bedpe %s/%s" % (os.path.dirname(runconfig.output_file_path), os.path.dirname(runconfig.output_file_path), runconfig.output_file_name)
+    p = subprocess.check_output(cmd.split())
+
+    #
+    # Cleanup
+    #
+    if not runconfig.keep_tmp:
+        logging.info("Cleaning up temporary files")
+        shutil.rmtree(tmp_dir)
+    cmd_rm = "rm -r %s/chimerascan_tmp" % os.path.dirname(runconfig.output_file_path)
+    p = subprocess.check_output(cmd_rm.split())
+
+    #
+    # Done
+    #
+    logging.info("Finished run.")
+    return JOB_SUCCESS
+
+
+def main():
+    logging.basicConfig(level=logging.INFO,
+                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    # parse run parameters in config file and command line
+    runconfig = RunConfig()
+    runconfig.from_args(sys.argv[1:])
+    # run chimerascan
+    sys.exit(run_chimerascan(runconfig))
+
+if __name__ == '__main__':
+    main()
+