| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import sys | 
|  | 4 import optparse | 
|  | 5 | 
|  | 6 def stop_err( msg ): | 
|  | 7     sys.stderr.write( msg ) | 
|  | 8     sys.exit() | 
|  | 9 | 
|  | 10 def main(): | 
|  | 11     usage = """%prog [options] | 
|  | 12 | 
|  | 13 options (listed below) default to 'None' if omitted | 
|  | 14     """ | 
|  | 15     parser = optparse.OptionParser(usage=usage) | 
|  | 16 | 
|  | 17     parser.add_option( | 
|  | 18         '-a','--ascii', | 
|  | 19         dest='ascii', | 
|  | 20         action='store_true', | 
|  | 21         default = False, | 
|  | 22         help='Use ascii codes to defined ignored beginnings instead of raw characters') | 
|  | 23 | 
|  | 24     parser.add_option( | 
|  | 25         '-q','--fastq', | 
|  | 26         dest='fastq', | 
|  | 27         action='store_true', | 
|  | 28         default = False, | 
|  | 29         help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids') | 
|  | 30 | 
|  | 31     parser.add_option( | 
|  | 32         '-i','--ignore', | 
|  | 33         dest='ignore', | 
|  | 34         help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled') | 
|  | 35 | 
|  | 36     parser.add_option( | 
|  | 37         '-s','--start', | 
|  | 38         dest='start', | 
|  | 39         default = '0', | 
|  | 40         help='Trim from beginning to here (1-based)') | 
|  | 41 | 
|  | 42     parser.add_option( | 
|  | 43         '-e','--end', | 
|  | 44         dest='end', | 
|  | 45         default = '0', | 
|  | 46         help='Trim from here to the ned (1-based)') | 
|  | 47 | 
|  | 48     parser.add_option( | 
|  | 49         '-f','--file', | 
|  | 50         dest='input_txt', | 
|  | 51         default = False, | 
|  | 52         help='Name of file to be chopped. STDIN is default') | 
|  | 53 | 
|  | 54     parser.add_option( | 
|  | 55         '-c','--column', | 
|  | 56         dest='col', | 
|  | 57         default = '0', | 
|  | 58         help='Column to chop. If 0 = chop the whole line') | 
|  | 59 | 
|  | 60 | 
|  | 61     options, args = parser.parse_args() | 
|  | 62     invalid_starts = [] | 
|  | 63 | 
|  | 64     if options.input_txt: | 
|  | 65 		infile = open ( options.input_txt, 'r') | 
|  | 66     else: | 
|  | 67     	infile = sys.stdin | 
|  | 68 | 
|  | 69     if options.ignore and options.ignore != "None": | 
|  | 70         invalid_starts = options.ignore.split(',') | 
|  | 71 | 
|  | 72     if options.ascii and options.ignore and options.ignore != "None": | 
|  | 73         for i, item in enumerate( invalid_starts ): | 
|  | 74             invalid_starts[i] = chr( int( item ) ) | 
|  | 75 | 
|  | 76     col = int( options.col ) | 
|  | 77 | 
|  | 78     for i, line in enumerate( infile ): | 
|  | 79         line = line.rstrip( '\r\n' ) | 
|  | 80         if line: | 
|  | 81 | 
|  | 82             if options.fastq and i % 2 == 0: | 
|  | 83                 print line | 
|  | 84                 continue | 
|  | 85 | 
|  | 86 | 
|  | 87             if line[0] not in invalid_starts: | 
|  | 88                 if col == 0: | 
|  | 89                     if int( options.end ) > 0: | 
|  | 90                         line = line[ int( options.start )-1 : int( options.end ) ] | 
|  | 91                     else: | 
|  | 92                         line = line[ int( options.start )-1 : ] | 
|  | 93                 else: | 
|  | 94                     fields = line.split( '\t' ) | 
|  | 95                     if col-1 > len( fields ): | 
|  | 96                         stop_err('Column %d does not exist. Check input parameters\n' % col) | 
|  | 97 | 
|  | 98                     if int( options.end ) > 0: | 
|  | 99                         fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ] | 
|  | 100                     else: | 
|  | 101                         fields[col - 1] = fields[col - 1][ int( options.start )-1 : ] | 
|  | 102                     line = '\t'.join(fields) | 
|  | 103             print line | 
|  | 104 | 
|  | 105 if __name__ == "__main__": main() | 
|  | 106 |