| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 """
 | 
| 
 | 
     4 Condenses pileup format into ranges of bases.
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 usage: %prog [options]
 | 
| 
 | 
     7    -i, --input=i: Input pileup file
 | 
| 
 | 
     8    -o, --output=o: Output pileup
 | 
| 
 | 
     9    -c, --coverage=c: Coverage
 | 
| 
 | 
    10    -f, --format=f: Pileup format
 | 
| 
 | 
    11    -b, --base=b: Base to select
 | 
| 
 | 
    12    -s, --seq_column=s: Sequence column
 | 
| 
 | 
    13    -l, --loc_column=l: Base location column
 | 
| 
 | 
    14    -r, --base_column=r: Reference base column
 | 
| 
 | 
    15    -C, --cvrg_column=C: Coverage column
 | 
| 
 | 
    16 """
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 from galaxy import eggs
 | 
| 
 | 
    19 import pkg_resources; pkg_resources.require( "bx-python" )
 | 
| 
 | 
    20 from bx.cookbook import doc_optparse
 | 
| 
 | 
    21 import sys
 | 
| 
 | 
    22 
 | 
| 
 | 
    23 def stop_err( msg ):
 | 
| 
 | 
    24     sys.stderr.write( msg )
 | 
| 
 | 
    25     sys.exit()
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 def __main__():
 | 
| 
 | 
    28     strout = ''
 | 
| 
 | 
    29     #Parse Command Line
 | 
| 
 | 
    30     options, args = doc_optparse.parse( __doc__ )
 | 
| 
 | 
    31     coverage = int(options.coverage)
 | 
| 
 | 
    32     fin = file(options.input, 'r')
 | 
| 
 | 
    33     fout = file(options.output, 'w')
 | 
| 
 | 
    34     inLine = fin.readline()
 | 
| 
 | 
    35     if options.format == 'six':
 | 
| 
 | 
    36         seqIndex = 0
 | 
| 
 | 
    37         locIndex = 1
 | 
| 
 | 
    38         baseIndex = 2
 | 
| 
 | 
    39         covIndex = 3
 | 
| 
 | 
    40     elif options.format == 'ten':
 | 
| 
 | 
    41         seqIndex = 0
 | 
| 
 | 
    42         locIndex = 1
 | 
| 
 | 
    43         if options.base == 'first':
 | 
| 
 | 
    44             baseIndex = 2
 | 
| 
 | 
    45         else:
 | 
| 
 | 
    46             baseIndex = 3
 | 
| 
 | 
    47         covIndex = 7
 | 
| 
 | 
    48     else:
 | 
| 
 | 
    49         seqIndex = int(options.seq_column) - 1
 | 
| 
 | 
    50         locIndex = int(options.loc_column) - 1
 | 
| 
 | 
    51         baseIndex = int(options.base_column) - 1
 | 
| 
 | 
    52         covIndex = int(options.cvrg_column) - 1
 | 
| 
 | 
    53     lastSeq = ''
 | 
| 
 | 
    54     lastLoc = -1
 | 
| 
 | 
    55     locs = []
 | 
| 
 | 
    56     startLoc = -1
 | 
| 
 | 
    57     bases = []
 | 
| 
 | 
    58     while inLine.strip() != '':
 | 
| 
 | 
    59         lineParts = inLine.split('\t')
 | 
| 
 | 
    60         try:
 | 
| 
 | 
    61             seq, loc, base, cov = lineParts[seqIndex], int(lineParts[locIndex]), lineParts[baseIndex], int(lineParts[covIndex])
 | 
| 
 | 
    62         except IndexError, ei:
 | 
| 
 | 
    63             if options.format == 'ten':
 | 
| 
 | 
    64                 stop_err( 'It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n' + str( ei ) )
 | 
| 
 | 
    65             else:
 | 
| 
 | 
    66                 stop_err( 'There appears to be something wrong with your column index values.\n' + str( ei ) )
 | 
| 
 | 
    67         except ValueError, ev:
 | 
| 
 | 
    68             if options.format == 'six':
 | 
| 
 | 
    69                 stop_err( 'It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n' + str( ev ) )
 | 
| 
 | 
    70             else:
 | 
| 
 | 
    71                 stop_err( 'There appears to be something wrong with your column index values.\n' + str( ev ) )
 | 
| 
 | 
    72 #        strout += str(startLoc) + '\n'
 | 
| 
 | 
    73 #        strout += str(bases) + '\n'
 | 
| 
 | 
    74 #        strout += '%s\t%s\t%s\t%s\n' % (seq, loc, base, cov)
 | 
| 
 | 
    75         if loc == lastLoc+1 or lastLoc == -1:
 | 
| 
 | 
    76             if cov >= coverage:
 | 
| 
 | 
    77                 if seq == lastSeq or lastSeq == '':
 | 
| 
 | 
    78                     if startLoc == -1:
 | 
| 
 | 
    79                         startLoc = loc
 | 
| 
 | 
    80                     locs.append(loc)
 | 
| 
 | 
    81                     bases.append(base)
 | 
| 
 | 
    82                 else:
 | 
| 
 | 
    83                     if len(bases) > 0:
 | 
| 
 | 
    84                         fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
 | 
| 
 | 
    85                     startLoc = loc
 | 
| 
 | 
    86                     locs = [loc]
 | 
| 
 | 
    87                     bases = [base]
 | 
| 
 | 
    88             else:
 | 
| 
 | 
    89                 if len(bases) > 0:
 | 
| 
 | 
    90                     fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
 | 
| 
 | 
    91                 startLoc = -1
 | 
| 
 | 
    92                 locs = []
 | 
| 
 | 
    93                 bases = []
 | 
| 
 | 
    94         else:
 | 
| 
 | 
    95             if len(bases) > 0:
 | 
| 
 | 
    96                 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
 | 
| 
 | 
    97             if cov >= coverage:
 | 
| 
 | 
    98                 startLoc = loc
 | 
| 
 | 
    99                 locs = [loc]
 | 
| 
 | 
   100                 bases = [base]
 | 
| 
 | 
   101             else:
 | 
| 
 | 
   102                 startLoc = -1
 | 
| 
 | 
   103                 locs = []
 | 
| 
 | 
   104                 bases = []
 | 
| 
 | 
   105         lastSeq = seq
 | 
| 
 | 
   106         lastLoc = loc
 | 
| 
 | 
   107         inLine = fin.readline()
 | 
| 
 | 
   108     if len(bases) > 0:
 | 
| 
 | 
   109         fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases)))
 | 
| 
 | 
   110     fout.close()
 | 
| 
 | 
   111     fin.close()
 | 
| 
 | 
   112     
 | 
| 
 | 
   113 #    import sys
 | 
| 
 | 
   114 #    strout += file(fout.name,'r').read()
 | 
| 
 | 
   115 #    sys.stderr.write(strout)
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 if __name__ == "__main__" : __main__()
 |