Mercurial > repos > lparsons > cutadapt
diff cutadapt_galaxy_wrapper.py @ 0:8b064ea16722
Initial version with multiple adapter support
author | Lance Parsons <lparsons@princeton.edu> |
---|---|
date | Fri, 13 May 2011 15:54:01 -0400 |
parents | |
children | 7ed26fc9fa8a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cutadapt_galaxy_wrapper.py Fri May 13 15:54:01 2011 -0400 @@ -0,0 +1,131 @@ +#!/usr/bin/env python +""" +SYNOPSIS + + cutadapt_galaxy_wrapper.py + -i input_file + -o output_file + [-f format (fastq/fastq/etc.)] + [-a 3' adapter sequence] + [-b 3' or 5' anywhere adapter sequence] + [-e error_rate] + [-n count] + [-O overlap_length] + [--discard discard trimmed reads] + [-m minimum read length] + [-M maximum read length] + [-h,--help] [-v,--verbose] [--version] + +DESCRIPTION + + Wrapper for cutadapt running as a galaxy tool + +AUTHOR + + Lance Parsons <lparsons@princeton.edu> + +LICENSE + + This script is in the public domain, free from copyrights or restrictions. + +VERSION + + $Id$ +""" + +import sys, os, traceback, optparse, shutil, subprocess, tempfile +import re +#from pexpect import run, spawn + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + +def main (): + + global options, args + # Setup Parameters + params = [] + if options.adapters != None: + params.append("-a %s" % " -a ".join(options.adapters)) + if options.anywhere_adapters != None: + params.append("-b %s" % " -b ".join(options.anywhere_adapters)) + if options.output_file != None: + params.append("-o %s" % options.output_file) + if options.error_rate != None: + params.append("-e %s" % options.error_rate) + if options.count != None: + params.append("-n %s" % options.count) + if options.overlap_length != None: + params.append("-O %s" % options.overlap_length) + if options.discard_trimmed: + params.append("--discard") + if options.minimum_length != None: + params.append("-m %s" % options.minimum_length) + if options.maximum_length != None: + params.append("-M %s" % options.maximum_length) + + # cutadapt relies on the extension to determine file format: .fasta or .fastq + input_name = '.'.join((options.input,options.format)) + # make temp directory + tmp_dir = tempfile.mkdtemp() + + try: + # make a link to the input file in the tmp_dir + input_file = os.path.join(tmp_dir,os.path.basename(input_name)) + os.symlink( options.input, input_file) + + # generate commandline + cmd = 'cutadapt %s %s' % (' '.join(params),input_file) + proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = proc.communicate() + returncode = proc.returncode + if returncode != 0: + raise Exception, 'Execution of cutadapt failed.\n%s' % stderrdata + print stderrdata + + finally: + # clean up temp dir + if os.path.exists( input_name ): + os.remove( input_name ) + if os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +if __name__ == '__main__': + try: + parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), usage=globals()['__doc__'], version='$Id$') + parser.add_option( '-i', '--input', dest='input', help='The sequence input file' ) + parser.add_option( '-f', '--format', dest='format', default='fastq', + help='The sequence input file format (default: fastq)' ) + parser.add_option ('-a', '--adapter', action='append', dest='adapters', help='3\' adapter sequence(s)') + parser.add_option ('-b', '--anywhere', action='append', dest='anywhere_adapters', help='5\' or 3\' "anywhere" adapter sequence(s)') + parser.add_option ('-e', '--error-rate', dest='error_rate', help='Maximum allowed error rate') + parser.add_option ('-n', '--times', dest='count', help='Try to remove adapters COUNT times') + parser.add_option ('-O', '--overlap', dest='overlap_length', help='Minimum overlap length') + parser.add_option ('--discard', '--discard-trimmed', dest='discard_trimmed', action='store_true', default=False, help='Discard reads that contain the adapter') + parser.add_option ('-m', '--minimum-length', dest='minimum_length', help='Discard reads that are shorter than LENGTH') + parser.add_option ('-M', '--maximum-length', dest='maximum_length', help='Discard reads that are longer than LENGTH') + parser.add_option ('-o', '--output', dest='output_file', help='The modified sequences are written to the file') + (options, args) = parser.parse_args() + if options.input == None: + stop_err("Misssing option --input") + if options.output_file == None: + stop_err("Misssing option --output") + if not os.path.exists(options.input): + stop_err("Unable to read intput file: %s" % options.input) + #if len(args) < 1: + # parser.error ('missing argument') + main() + sys.exit(0) + except KeyboardInterrupt, e: # Ctrl-C + raise e + except SystemExit, e: # sys.exit() + raise e + except Exception, e: + print 'ERROR, UNEXPECTED EXCEPTION' + print str(e) + traceback.print_exc() + os._exit(1) +