Mercurial > repos > lparsons > cutadapt
annotate cutadapt_galaxy_wrapper.py @ 4:0a872e59164c
Added discard_stderr_wrapper.sh script to catch report and redirect to stdout
author | Lance Parsons <lparsons@princeton.edu> |
---|---|
date | Wed, 25 May 2011 19:33:40 -0400 |
parents | 7ed26fc9fa8a |
children |
rev | line source |
---|---|
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
2 """ |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
3 SYNOPSIS |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
4 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
5 cutadapt_galaxy_wrapper.py |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
6 -i input_file |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
7 -o output_file |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
8 [-f format (fastq/fastq/etc.)] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
9 [-a 3' adapter sequence] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
10 [-b 3' or 5' anywhere adapter sequence] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
11 [-e error_rate] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
12 [-n count] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
13 [-O overlap_length] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
14 [--discard discard trimmed reads] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
15 [-m minimum read length] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
16 [-M maximum read length] |
3
7ed26fc9fa8a
Updated for cutadapt 0.9.4, no longer need python wrapper
Lance Parsons <lparsons@princeton.edu>
parents:
0
diff
changeset
|
17 [-q quality cutoff] |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
18 [-h,--help] [-v,--verbose] [--version] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
19 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
20 DESCRIPTION |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
21 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
22 Wrapper for cutadapt running as a galaxy tool |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
23 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
24 AUTHOR |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
25 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
26 Lance Parsons <lparsons@princeton.edu> |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
27 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
28 LICENSE |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
29 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
30 This script is in the public domain, free from copyrights or restrictions. |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
31 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
32 VERSION |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
33 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
34 $Id$ |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
35 """ |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
36 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
37 import sys, os, traceback, optparse, shutil, subprocess, tempfile |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
38 import re |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
39 #from pexpect import run, spawn |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
40 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
41 def stop_err( msg ): |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
42 sys.stderr.write( '%s\n' % msg ) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
43 sys.exit() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
44 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
45 def main (): |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
46 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
47 global options, args |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
48 # Setup Parameters |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
49 params = [] |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
50 if options.adapters != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
51 params.append("-a %s" % " -a ".join(options.adapters)) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
52 if options.anywhere_adapters != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
53 params.append("-b %s" % " -b ".join(options.anywhere_adapters)) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
54 if options.output_file != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
55 params.append("-o %s" % options.output_file) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
56 if options.error_rate != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
57 params.append("-e %s" % options.error_rate) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
58 if options.count != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
59 params.append("-n %s" % options.count) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
60 if options.overlap_length != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
61 params.append("-O %s" % options.overlap_length) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
62 if options.discard_trimmed: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
63 params.append("--discard") |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
64 if options.minimum_length != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
65 params.append("-m %s" % options.minimum_length) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
66 if options.maximum_length != None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
67 params.append("-M %s" % options.maximum_length) |
3
7ed26fc9fa8a
Updated for cutadapt 0.9.4, no longer need python wrapper
Lance Parsons <lparsons@princeton.edu>
parents:
0
diff
changeset
|
68 if options.cutoff != None: |
7ed26fc9fa8a
Updated for cutadapt 0.9.4, no longer need python wrapper
Lance Parsons <lparsons@princeton.edu>
parents:
0
diff
changeset
|
69 params.append("-q %s" % options.cutoff) |
7ed26fc9fa8a
Updated for cutadapt 0.9.4, no longer need python wrapper
Lance Parsons <lparsons@princeton.edu>
parents:
0
diff
changeset
|
70 |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
71 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
72 # cutadapt relies on the extension to determine file format: .fasta or .fastq |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
73 input_name = '.'.join((options.input,options.format)) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
74 # make temp directory |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
75 tmp_dir = tempfile.mkdtemp() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
76 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
77 try: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
78 # make a link to the input file in the tmp_dir |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
79 input_file = os.path.join(tmp_dir,os.path.basename(input_name)) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
80 os.symlink( options.input, input_file) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
81 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
82 # generate commandline |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
83 cmd = 'cutadapt %s %s' % (' '.join(params),input_file) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
84 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
85 stdout=subprocess.PIPE, |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
86 stderr=subprocess.PIPE) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
87 (stdoutdata, stderrdata) = proc.communicate() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
88 returncode = proc.returncode |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
89 if returncode != 0: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
90 raise Exception, 'Execution of cutadapt failed.\n%s' % stderrdata |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
91 print stderrdata |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
92 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
93 finally: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
94 # clean up temp dir |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
95 if os.path.exists( input_name ): |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
96 os.remove( input_name ) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
97 if os.path.exists( tmp_dir ): |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
98 shutil.rmtree( tmp_dir ) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
99 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
100 if __name__ == '__main__': |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
101 try: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
102 parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), usage=globals()['__doc__'], version='$Id$') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
103 parser.add_option( '-i', '--input', dest='input', help='The sequence input file' ) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
104 parser.add_option( '-f', '--format', dest='format', default='fastq', |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
105 help='The sequence input file format (default: fastq)' ) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
106 parser.add_option ('-a', '--adapter', action='append', dest='adapters', help='3\' adapter sequence(s)') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
107 parser.add_option ('-b', '--anywhere', action='append', dest='anywhere_adapters', help='5\' or 3\' "anywhere" adapter sequence(s)') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
108 parser.add_option ('-e', '--error-rate', dest='error_rate', help='Maximum allowed error rate') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
109 parser.add_option ('-n', '--times', dest='count', help='Try to remove adapters COUNT times') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
110 parser.add_option ('-O', '--overlap', dest='overlap_length', help='Minimum overlap length') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
111 parser.add_option ('--discard', '--discard-trimmed', dest='discard_trimmed', action='store_true', default=False, help='Discard reads that contain the adapter') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
112 parser.add_option ('-m', '--minimum-length', dest='minimum_length', help='Discard reads that are shorter than LENGTH') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
113 parser.add_option ('-M', '--maximum-length', dest='maximum_length', help='Discard reads that are longer than LENGTH') |
3
7ed26fc9fa8a
Updated for cutadapt 0.9.4, no longer need python wrapper
Lance Parsons <lparsons@princeton.edu>
parents:
0
diff
changeset
|
114 parser.add_option ('-q', '--quality-cutoff', dest='cutoff', help='Trim |
7ed26fc9fa8a
Updated for cutadapt 0.9.4, no longer need python wrapper
Lance Parsons <lparsons@princeton.edu>
parents:
0
diff
changeset
|
115 low quality ends from reads before adapter removal') |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
116 parser.add_option ('-o', '--output', dest='output_file', help='The modified sequences are written to the file') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
117 (options, args) = parser.parse_args() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
118 if options.input == None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
119 stop_err("Misssing option --input") |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
120 if options.output_file == None: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
121 stop_err("Misssing option --output") |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
122 if not os.path.exists(options.input): |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
123 stop_err("Unable to read intput file: %s" % options.input) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
124 #if len(args) < 1: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
125 # parser.error ('missing argument') |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
126 main() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
127 sys.exit(0) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
128 except KeyboardInterrupt, e: # Ctrl-C |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
129 raise e |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
130 except SystemExit, e: # sys.exit() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
131 raise e |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
132 except Exception, e: |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
133 print 'ERROR, UNEXPECTED EXCEPTION' |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
134 print str(e) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
135 traceback.print_exc() |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
136 os._exit(1) |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
137 |