annotate trimmer.py @ 2:3983fbf107b6 draft default tip

planemo upload commit 4ec9eb1570ea116d83f5464a786af6e14fb5b57d
author devteam
date Fri, 09 Oct 2015 15:49:26 -0400
parents f862a6e4d096
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
2
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
3 import sys
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
4 import optparse
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
5
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
6 def stop_err( msg ):
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
7 sys.stderr.write( msg )
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
8 sys.exit()
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
9
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
10 def main():
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
11 usage = """%prog [options]
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
12
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
13 options (listed below) default to 'None' if omitted
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
14 """
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
15 parser = optparse.OptionParser(usage=usage)
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
16
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
17 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
18 '-a','--ascii',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
19 dest='ascii',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
20 action='store_true',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
21 default = False,
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
22 help='Use ascii codes to defined ignored beginnings instead of raw characters')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
23
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
24 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
25 '-q','--fastq',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
26 dest='fastq',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
27 action='store_true',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
28 default = False,
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
29 help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
30
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
31 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
32 '-i','--ignore',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
33 dest='ignore',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
34 help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
35
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
36 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
37 '-s','--start',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
38 dest='start',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
39 default = '0',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
40 help='Trim from beginning to here (1-based)')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
41
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
42 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
43 '-e','--end',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
44 dest='end',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
45 default = '0',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
46 help='Trim from here to the ned (1-based)')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
47
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
48 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
49 '-f','--file',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
50 dest='input_txt',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
51 default = False,
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
52 help='Name of file to be chopped. STDIN is default')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
53
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
54 parser.add_option(
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
55 '-c','--column',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
56 dest='col',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
57 default = '0',
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
58 help='Column to chop. If 0 = chop the whole line')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
59
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
60
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
61 options, args = parser.parse_args()
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
62 invalid_starts = []
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
63
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
64 if options.input_txt:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
65 infile = open ( options.input_txt, 'r')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
66 else:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
67 infile = sys.stdin
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
68
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
69 if options.ignore and options.ignore != "None":
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
70 invalid_starts = options.ignore.split(',')
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
71
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
72 if options.ascii and options.ignore and options.ignore != "None":
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
73 for i, item in enumerate( invalid_starts ):
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
74 invalid_starts[i] = chr( int( item ) )
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
75
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
76 col = int( options.col )
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
77
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
78 for i, line in enumerate( infile ):
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
79 line = line.rstrip( '\r\n' )
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
80 if line:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
81
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
82 if options.fastq and i % 2 == 0:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
83 print line
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
84 continue
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
85
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
86
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
87 if line[0] not in invalid_starts:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
88 if col == 0:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
89 if int( options.end ) > 0:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
90 line = line[ int( options.start )-1 : int( options.end ) ]
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
91 else:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
92 line = line[ int( options.start )-1 : ]
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
93 else:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
94 fields = line.split( '\t' )
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
95 if col-1 > len( fields ):
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
96 stop_err('Column %d does not exist. Check input parameters\n' % col)
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
97
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
98 if int( options.end ) > 0:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
99 fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ]
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
100 else:
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
101 fields[col - 1] = fields[col - 1][ int( options.start )-1 : ]
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
102 line = '\t'.join(fields)
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
103 print line
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
104
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
105 if __name__ == "__main__": main()
f862a6e4d096 Uploaded trimmer tarball.
devteam
parents:
diff changeset
106