Mercurial > repos > devteam > trimmer
comparison trimmer.py @ 0:f862a6e4d096
Uploaded trimmer tarball.
| author | devteam |
|---|---|
| date | Tue, 04 Dec 2012 11:10:05 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f862a6e4d096 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import sys | |
| 4 import optparse | |
| 5 | |
| 6 def stop_err( msg ): | |
| 7 sys.stderr.write( msg ) | |
| 8 sys.exit() | |
| 9 | |
| 10 def main(): | |
| 11 usage = """%prog [options] | |
| 12 | |
| 13 options (listed below) default to 'None' if omitted | |
| 14 """ | |
| 15 parser = optparse.OptionParser(usage=usage) | |
| 16 | |
| 17 parser.add_option( | |
| 18 '-a','--ascii', | |
| 19 dest='ascii', | |
| 20 action='store_true', | |
| 21 default = False, | |
| 22 help='Use ascii codes to defined ignored beginnings instead of raw characters') | |
| 23 | |
| 24 parser.add_option( | |
| 25 '-q','--fastq', | |
| 26 dest='fastq', | |
| 27 action='store_true', | |
| 28 default = False, | |
| 29 help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids') | |
| 30 | |
| 31 parser.add_option( | |
| 32 '-i','--ignore', | |
| 33 dest='ignore', | |
| 34 help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled') | |
| 35 | |
| 36 parser.add_option( | |
| 37 '-s','--start', | |
| 38 dest='start', | |
| 39 default = '0', | |
| 40 help='Trim from beginning to here (1-based)') | |
| 41 | |
| 42 parser.add_option( | |
| 43 '-e','--end', | |
| 44 dest='end', | |
| 45 default = '0', | |
| 46 help='Trim from here to the ned (1-based)') | |
| 47 | |
| 48 parser.add_option( | |
| 49 '-f','--file', | |
| 50 dest='input_txt', | |
| 51 default = False, | |
| 52 help='Name of file to be chopped. STDIN is default') | |
| 53 | |
| 54 parser.add_option( | |
| 55 '-c','--column', | |
| 56 dest='col', | |
| 57 default = '0', | |
| 58 help='Column to chop. If 0 = chop the whole line') | |
| 59 | |
| 60 | |
| 61 options, args = parser.parse_args() | |
| 62 invalid_starts = [] | |
| 63 | |
| 64 if options.input_txt: | |
| 65 infile = open ( options.input_txt, 'r') | |
| 66 else: | |
| 67 infile = sys.stdin | |
| 68 | |
| 69 if options.ignore and options.ignore != "None": | |
| 70 invalid_starts = options.ignore.split(',') | |
| 71 | |
| 72 if options.ascii and options.ignore and options.ignore != "None": | |
| 73 for i, item in enumerate( invalid_starts ): | |
| 74 invalid_starts[i] = chr( int( item ) ) | |
| 75 | |
| 76 col = int( options.col ) | |
| 77 | |
| 78 for i, line in enumerate( infile ): | |
| 79 line = line.rstrip( '\r\n' ) | |
| 80 if line: | |
| 81 | |
| 82 if options.fastq and i % 2 == 0: | |
| 83 print line | |
| 84 continue | |
| 85 | |
| 86 | |
| 87 if line[0] not in invalid_starts: | |
| 88 if col == 0: | |
| 89 if int( options.end ) > 0: | |
| 90 line = line[ int( options.start )-1 : int( options.end ) ] | |
| 91 else: | |
| 92 line = line[ int( options.start )-1 : ] | |
| 93 else: | |
| 94 fields = line.split( '\t' ) | |
| 95 if col-1 > len( fields ): | |
| 96 stop_err('Column %d does not exist. Check input parameters\n' % col) | |
| 97 | |
| 98 if int( options.end ) > 0: | |
| 99 fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ] | |
| 100 else: | |
| 101 fields[col - 1] = fields[col - 1][ int( options.start )-1 : ] | |
| 102 line = '\t'.join(fields) | |
| 103 print line | |
| 104 | |
| 105 if __name__ == "__main__": main() | |
| 106 |
