Mercurial > repos > xuebing > sharplabtool
comparison tools/samtools/pileup_interval.py @ 0:9071e359b9a3
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 19:37:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9071e359b9a3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Condenses pileup format into ranges of bases. | |
| 5 | |
| 6 usage: %prog [options] | |
| 7 -i, --input=i: Input pileup file | |
| 8 -o, --output=o: Output pileup | |
| 9 -c, --coverage=c: Coverage | |
| 10 -f, --format=f: Pileup format | |
| 11 -b, --base=b: Base to select | |
| 12 -s, --seq_column=s: Sequence column | |
| 13 -l, --loc_column=l: Base location column | |
| 14 -r, --base_column=r: Reference base column | |
| 15 -C, --cvrg_column=C: Coverage column | |
| 16 """ | |
| 17 | |
| 18 from galaxy import eggs | |
| 19 import pkg_resources; pkg_resources.require( "bx-python" ) | |
| 20 from bx.cookbook import doc_optparse | |
| 21 import sys | |
| 22 | |
| 23 def stop_err( msg ): | |
| 24 sys.stderr.write( msg ) | |
| 25 sys.exit() | |
| 26 | |
| 27 def __main__(): | |
| 28 strout = '' | |
| 29 #Parse Command Line | |
| 30 options, args = doc_optparse.parse( __doc__ ) | |
| 31 coverage = int(options.coverage) | |
| 32 fin = file(options.input, 'r') | |
| 33 fout = file(options.output, 'w') | |
| 34 inLine = fin.readline() | |
| 35 if options.format == 'six': | |
| 36 seqIndex = 0 | |
| 37 locIndex = 1 | |
| 38 baseIndex = 2 | |
| 39 covIndex = 3 | |
| 40 elif options.format == 'ten': | |
| 41 seqIndex = 0 | |
| 42 locIndex = 1 | |
| 43 if options.base == 'first': | |
| 44 baseIndex = 2 | |
| 45 else: | |
| 46 baseIndex = 3 | |
| 47 covIndex = 7 | |
| 48 else: | |
| 49 seqIndex = int(options.seq_column) - 1 | |
| 50 locIndex = int(options.loc_column) - 1 | |
| 51 baseIndex = int(options.base_column) - 1 | |
| 52 covIndex = int(options.cvrg_column) - 1 | |
| 53 lastSeq = '' | |
| 54 lastLoc = -1 | |
| 55 locs = [] | |
| 56 startLoc = -1 | |
| 57 bases = [] | |
| 58 while inLine.strip() != '': | |
| 59 lineParts = inLine.split('\t') | |
| 60 try: | |
| 61 seq, loc, base, cov = lineParts[seqIndex], int(lineParts[locIndex]), lineParts[baseIndex], int(lineParts[covIndex]) | |
| 62 except IndexError, ei: | |
| 63 if options.format == 'ten': | |
| 64 stop_err( 'It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n' + str( ei ) ) | |
| 65 else: | |
| 66 stop_err( 'There appears to be something wrong with your column index values.\n' + str( ei ) ) | |
| 67 except ValueError, ev: | |
| 68 if options.format == 'six': | |
| 69 stop_err( 'It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n' + str( ev ) ) | |
| 70 else: | |
| 71 stop_err( 'There appears to be something wrong with your column index values.\n' + str( ev ) ) | |
| 72 # strout += str(startLoc) + '\n' | |
| 73 # strout += str(bases) + '\n' | |
| 74 # strout += '%s\t%s\t%s\t%s\n' % (seq, loc, base, cov) | |
| 75 if loc == lastLoc+1 or lastLoc == -1: | |
| 76 if cov >= coverage: | |
| 77 if seq == lastSeq or lastSeq == '': | |
| 78 if startLoc == -1: | |
| 79 startLoc = loc | |
| 80 locs.append(loc) | |
| 81 bases.append(base) | |
| 82 else: | |
| 83 if len(bases) > 0: | |
| 84 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | |
| 85 startLoc = loc | |
| 86 locs = [loc] | |
| 87 bases = [base] | |
| 88 else: | |
| 89 if len(bases) > 0: | |
| 90 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | |
| 91 startLoc = -1 | |
| 92 locs = [] | |
| 93 bases = [] | |
| 94 else: | |
| 95 if len(bases) > 0: | |
| 96 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | |
| 97 if cov >= coverage: | |
| 98 startLoc = loc | |
| 99 locs = [loc] | |
| 100 bases = [base] | |
| 101 else: | |
| 102 startLoc = -1 | |
| 103 locs = [] | |
| 104 bases = [] | |
| 105 lastSeq = seq | |
| 106 lastLoc = loc | |
| 107 inLine = fin.readline() | |
| 108 if len(bases) > 0: | |
| 109 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | |
| 110 fout.close() | |
| 111 fin.close() | |
| 112 | |
| 113 # import sys | |
| 114 # strout += file(fout.name,'r').read() | |
| 115 # sys.stderr.write(strout) | |
| 116 | |
| 117 if __name__ == "__main__" : __main__() |
