0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os, sys
|
|
4
|
|
5 assert sys.version_info[:2] >= ( 2, 4 )
|
|
6
|
|
7 def reverse_complement(s):
|
|
8 complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
|
|
9 reversed_s = []
|
|
10 for i in s:
|
|
11 reversed_s.append(complement_dna[i])
|
|
12 reversed_s.reverse()
|
|
13 return "".join(reversed_s)
|
|
14
|
|
15 def __main__():
|
|
16 nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4}
|
|
17 coverage = {} # key = (chrom, index)
|
|
18 invalid_lines = 0
|
|
19 invalid_chrom = 0
|
|
20 infile = sys.argv[1]
|
|
21 outfile = sys.argv[2]
|
|
22
|
|
23 for i, line in enumerate( open( infile ) ):
|
|
24 line = line.rstrip('\r\n')
|
|
25 if not line or line.startswith('#'):
|
|
26 continue
|
|
27 fields = line.split()
|
|
28 if len(fields) < 21: # standard number of pslx columns
|
|
29 invalid_lines += 1
|
|
30 continue
|
|
31 if not fields[0].isdigit():
|
|
32 invalid_lines += 1
|
|
33 continue
|
|
34 chrom = fields[13]
|
|
35 if not chrom.startswith( 'chr' ):
|
|
36 invalid_lines += 1
|
|
37 invalid_chrom += 1
|
|
38 continue
|
|
39 try:
|
|
40 block_count = int(fields[17])
|
|
41 except:
|
|
42 invalid_lines += 1
|
|
43 continue
|
|
44 block_size = fields[18].split(',')
|
|
45 chrom_start = fields[20].split(',')
|
|
46
|
|
47 for j in range( block_count ):
|
|
48 try:
|
|
49 this_block_size = int(block_size[j])
|
|
50 this_chrom_start = int(chrom_start[j])
|
|
51 except:
|
|
52 invalid_lines += 1
|
|
53 break
|
|
54 # brut force coverage
|
|
55 for k in range( this_block_size ):
|
|
56 cur_index = this_chrom_start + k
|
|
57 if coverage.has_key( ( chrom, cur_index ) ):
|
|
58 coverage[(chrom, cur_index)] += 1
|
|
59 else:
|
|
60 coverage[(chrom, cur_index)] = 1
|
|
61
|
|
62 # generate a index file
|
|
63 outputfh = open(outfile, 'w')
|
|
64 keys = coverage.keys()
|
|
65 keys.sort()
|
|
66 previous_chrom = ''
|
|
67 for i in keys:
|
|
68 (chrom, location) = i
|
|
69 sum = coverage[(i)]
|
|
70 if chrom != previous_chrom:
|
|
71 outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) )
|
|
72 previous_chrom = chrom
|
|
73 outputfh.write( "%s\t%s\n" % ( location, sum ) )
|
|
74 outputfh.close()
|
|
75
|
|
76 if invalid_lines:
|
|
77 invalid_msg = "Skipped %d invalid lines" % invalid_lines
|
|
78 if invalid_chrom:
|
|
79 invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. "
|
|
80
|
|
81 if __name__ == '__main__': __main__() |