Mercurial > repos > lparsons > cutadapt
comparison cutadapt_galaxy_wrapper.py @ 0:8b064ea16722
Initial version with multiple adapter support
author | Lance Parsons <lparsons@princeton.edu> |
---|---|
date | Fri, 13 May 2011 15:54:01 -0400 |
parents | |
children | 7ed26fc9fa8a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8b064ea16722 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 SYNOPSIS | |
4 | |
5 cutadapt_galaxy_wrapper.py | |
6 -i input_file | |
7 -o output_file | |
8 [-f format (fastq/fastq/etc.)] | |
9 [-a 3' adapter sequence] | |
10 [-b 3' or 5' anywhere adapter sequence] | |
11 [-e error_rate] | |
12 [-n count] | |
13 [-O overlap_length] | |
14 [--discard discard trimmed reads] | |
15 [-m minimum read length] | |
16 [-M maximum read length] | |
17 [-h,--help] [-v,--verbose] [--version] | |
18 | |
19 DESCRIPTION | |
20 | |
21 Wrapper for cutadapt running as a galaxy tool | |
22 | |
23 AUTHOR | |
24 | |
25 Lance Parsons <lparsons@princeton.edu> | |
26 | |
27 LICENSE | |
28 | |
29 This script is in the public domain, free from copyrights or restrictions. | |
30 | |
31 VERSION | |
32 | |
33 $Id$ | |
34 """ | |
35 | |
36 import sys, os, traceback, optparse, shutil, subprocess, tempfile | |
37 import re | |
38 #from pexpect import run, spawn | |
39 | |
40 def stop_err( msg ): | |
41 sys.stderr.write( '%s\n' % msg ) | |
42 sys.exit() | |
43 | |
44 def main (): | |
45 | |
46 global options, args | |
47 # Setup Parameters | |
48 params = [] | |
49 if options.adapters != None: | |
50 params.append("-a %s" % " -a ".join(options.adapters)) | |
51 if options.anywhere_adapters != None: | |
52 params.append("-b %s" % " -b ".join(options.anywhere_adapters)) | |
53 if options.output_file != None: | |
54 params.append("-o %s" % options.output_file) | |
55 if options.error_rate != None: | |
56 params.append("-e %s" % options.error_rate) | |
57 if options.count != None: | |
58 params.append("-n %s" % options.count) | |
59 if options.overlap_length != None: | |
60 params.append("-O %s" % options.overlap_length) | |
61 if options.discard_trimmed: | |
62 params.append("--discard") | |
63 if options.minimum_length != None: | |
64 params.append("-m %s" % options.minimum_length) | |
65 if options.maximum_length != None: | |
66 params.append("-M %s" % options.maximum_length) | |
67 | |
68 # cutadapt relies on the extension to determine file format: .fasta or .fastq | |
69 input_name = '.'.join((options.input,options.format)) | |
70 # make temp directory | |
71 tmp_dir = tempfile.mkdtemp() | |
72 | |
73 try: | |
74 # make a link to the input file in the tmp_dir | |
75 input_file = os.path.join(tmp_dir,os.path.basename(input_name)) | |
76 os.symlink( options.input, input_file) | |
77 | |
78 # generate commandline | |
79 cmd = 'cutadapt %s %s' % (' '.join(params),input_file) | |
80 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, | |
81 stdout=subprocess.PIPE, | |
82 stderr=subprocess.PIPE) | |
83 (stdoutdata, stderrdata) = proc.communicate() | |
84 returncode = proc.returncode | |
85 if returncode != 0: | |
86 raise Exception, 'Execution of cutadapt failed.\n%s' % stderrdata | |
87 print stderrdata | |
88 | |
89 finally: | |
90 # clean up temp dir | |
91 if os.path.exists( input_name ): | |
92 os.remove( input_name ) | |
93 if os.path.exists( tmp_dir ): | |
94 shutil.rmtree( tmp_dir ) | |
95 | |
96 if __name__ == '__main__': | |
97 try: | |
98 parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), usage=globals()['__doc__'], version='$Id$') | |
99 parser.add_option( '-i', '--input', dest='input', help='The sequence input file' ) | |
100 parser.add_option( '-f', '--format', dest='format', default='fastq', | |
101 help='The sequence input file format (default: fastq)' ) | |
102 parser.add_option ('-a', '--adapter', action='append', dest='adapters', help='3\' adapter sequence(s)') | |
103 parser.add_option ('-b', '--anywhere', action='append', dest='anywhere_adapters', help='5\' or 3\' "anywhere" adapter sequence(s)') | |
104 parser.add_option ('-e', '--error-rate', dest='error_rate', help='Maximum allowed error rate') | |
105 parser.add_option ('-n', '--times', dest='count', help='Try to remove adapters COUNT times') | |
106 parser.add_option ('-O', '--overlap', dest='overlap_length', help='Minimum overlap length') | |
107 parser.add_option ('--discard', '--discard-trimmed', dest='discard_trimmed', action='store_true', default=False, help='Discard reads that contain the adapter') | |
108 parser.add_option ('-m', '--minimum-length', dest='minimum_length', help='Discard reads that are shorter than LENGTH') | |
109 parser.add_option ('-M', '--maximum-length', dest='maximum_length', help='Discard reads that are longer than LENGTH') | |
110 parser.add_option ('-o', '--output', dest='output_file', help='The modified sequences are written to the file') | |
111 (options, args) = parser.parse_args() | |
112 if options.input == None: | |
113 stop_err("Misssing option --input") | |
114 if options.output_file == None: | |
115 stop_err("Misssing option --output") | |
116 if not os.path.exists(options.input): | |
117 stop_err("Unable to read intput file: %s" % options.input) | |
118 #if len(args) < 1: | |
119 # parser.error ('missing argument') | |
120 main() | |
121 sys.exit(0) | |
122 except KeyboardInterrupt, e: # Ctrl-C | |
123 raise e | |
124 except SystemExit, e: # sys.exit() | |
125 raise e | |
126 except Exception, e: | |
127 print 'ERROR, UNEXPECTED EXCEPTION' | |
128 print str(e) | |
129 traceback.print_exc() | |
130 os._exit(1) | |
131 |