| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 import sys
 | 
| 
 | 
     4 import optparse
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 def stop_err( msg ):
 | 
| 
 | 
     7     sys.stderr.write( msg )
 | 
| 
 | 
     8     sys.exit()
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 def main():
 | 
| 
 | 
    11     usage = """%prog [options]
 | 
| 
 | 
    12     
 | 
| 
 | 
    13 options (listed below) default to 'None' if omitted
 | 
| 
 | 
    14     """
 | 
| 
 | 
    15     parser = optparse.OptionParser(usage=usage)
 | 
| 
 | 
    16     
 | 
| 
 | 
    17     parser.add_option(
 | 
| 
 | 
    18         '-a','--ascii',
 | 
| 
 | 
    19         dest='ascii',
 | 
| 
 | 
    20         action='store_true',
 | 
| 
 | 
    21         default = False,
 | 
| 
 | 
    22         help='Use ascii codes to defined ignored beginnings instead of raw characters')
 | 
| 
 | 
    23         
 | 
| 
 | 
    24     parser.add_option(
 | 
| 
 | 
    25         '-q','--fastq',
 | 
| 
 | 
    26         dest='fastq',
 | 
| 
 | 
    27         action='store_true',
 | 
| 
 | 
    28         default = False,
 | 
| 
 | 
    29         help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids')
 | 
| 
 | 
    30 
 | 
| 
 | 
    31     parser.add_option(
 | 
| 
 | 
    32         '-i','--ignore',
 | 
| 
 | 
    33         dest='ignore',
 | 
| 
 | 
    34         help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled')
 | 
| 
 | 
    35 
 | 
| 
 | 
    36     parser.add_option(
 | 
| 
 | 
    37         '-s','--start',
 | 
| 
 | 
    38         dest='start',
 | 
| 
 | 
    39         default = '0',
 | 
| 
 | 
    40         help='Trim from beginning to here (1-based)')
 | 
| 
 | 
    41 
 | 
| 
 | 
    42     parser.add_option(
 | 
| 
 | 
    43         '-e','--end',
 | 
| 
 | 
    44         dest='end',
 | 
| 
 | 
    45         default = '0',
 | 
| 
 | 
    46         help='Trim from here to the ned (1-based)')
 | 
| 
 | 
    47 
 | 
| 
 | 
    48     parser.add_option(
 | 
| 
 | 
    49         '-f','--file',
 | 
| 
 | 
    50         dest='input_txt',
 | 
| 
 | 
    51         default = False,
 | 
| 
 | 
    52         help='Name of file to be chopped. STDIN is default')
 | 
| 
 | 
    53             
 | 
| 
 | 
    54     parser.add_option(
 | 
| 
 | 
    55         '-c','--column',
 | 
| 
 | 
    56         dest='col',
 | 
| 
 | 
    57         default = '0',
 | 
| 
 | 
    58         help='Column to chop. If 0 = chop the whole line')
 | 
| 
 | 
    59        
 | 
| 
 | 
    60 
 | 
| 
 | 
    61     options, args = parser.parse_args()
 | 
| 
 | 
    62     invalid_starts = []
 | 
| 
 | 
    63 
 | 
| 
 | 
    64     if options.input_txt:
 | 
| 
 | 
    65 		infile = open ( options.input_txt, 'r')
 | 
| 
 | 
    66     else:
 | 
| 
 | 
    67     	infile = sys.stdin
 | 
| 
 | 
    68     	
 | 
| 
 | 
    69     if options.ignore and options.ignore != "None":
 | 
| 
 | 
    70         invalid_starts = options.ignore.split(',')
 | 
| 
 | 
    71         
 | 
| 
 | 
    72     if options.ascii and options.ignore and options.ignore != "None":
 | 
| 
 | 
    73         for i, item in enumerate( invalid_starts ):
 | 
| 
 | 
    74             invalid_starts[i] = chr( int( item ) )
 | 
| 
 | 
    75 
 | 
| 
 | 
    76     col = int( options.col )
 | 
| 
 | 
    77  
 | 
| 
 | 
    78     for i, line in enumerate( infile ):
 | 
| 
 | 
    79         line = line.rstrip( '\r\n' )
 | 
| 
 | 
    80         if line:
 | 
| 
 | 
    81             
 | 
| 
 | 
    82             if options.fastq and i % 2 == 0:
 | 
| 
 | 
    83                 print line
 | 
| 
 | 
    84                 continue
 | 
| 
 | 
    85                 
 | 
| 
 | 
    86 
 | 
| 
 | 
    87             if line[0] not in invalid_starts:
 | 
| 
 | 
    88                 if col == 0:
 | 
| 
 | 
    89                     if int( options.end ) > 0:
 | 
| 
 | 
    90                         line = line[ int( options.start )-1 : int( options.end ) ]
 | 
| 
 | 
    91                     else:
 | 
| 
 | 
    92                         line = line[ int( options.start )-1 : ]
 | 
| 
 | 
    93                 else:
 | 
| 
 | 
    94                     fields = line.split( '\t' )
 | 
| 
 | 
    95                     if col-1 > len( fields ):
 | 
| 
 | 
    96                         stop_err('Column %d does not exist. Check input parameters\n' % col)
 | 
| 
 | 
    97                         
 | 
| 
 | 
    98                     if int( options.end ) > 0:
 | 
| 
 | 
    99                         fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ]
 | 
| 
 | 
   100                     else:
 | 
| 
 | 
   101                         fields[col - 1] = fields[col - 1][ int( options.start )-1 : ]
 | 
| 
 | 
   102                     line = '\t'.join(fields)
 | 
| 
 | 
   103             print line   
 | 
| 
 | 
   104 
 | 
| 
 | 
   105 if __name__ == "__main__": main()
 | 
| 
 | 
   106 
 |