Mercurial > repos > devteam > blat_mapping
comparison blat_mapping.py @ 0:807e3e50845a draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 12:33:35 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:807e3e50845a |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os, sys | |
4 | |
5 assert sys.version_info[:2] >= ( 2, 4 ) | |
6 | |
7 def reverse_complement(s): | |
8 complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."} | |
9 reversed_s = [] | |
10 for i in s: | |
11 reversed_s.append(complement_dna[i]) | |
12 reversed_s.reverse() | |
13 return "".join(reversed_s) | |
14 | |
15 def __main__(): | |
16 nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4} | |
17 coverage = {} # key = (chrom, index) | |
18 invalid_lines = 0 | |
19 invalid_chrom = 0 | |
20 infile = sys.argv[1] | |
21 outfile = sys.argv[2] | |
22 | |
23 for i, line in enumerate( open( infile ) ): | |
24 line = line.rstrip('\r\n') | |
25 if not line or line.startswith('#'): | |
26 continue | |
27 fields = line.split() | |
28 if len(fields) < 21: # standard number of pslx columns | |
29 invalid_lines += 1 | |
30 continue | |
31 if not fields[0].isdigit(): | |
32 invalid_lines += 1 | |
33 continue | |
34 chrom = fields[13] | |
35 if not chrom.startswith( 'chr' ): | |
36 invalid_lines += 1 | |
37 invalid_chrom += 1 | |
38 continue | |
39 try: | |
40 block_count = int(fields[17]) | |
41 except: | |
42 invalid_lines += 1 | |
43 continue | |
44 block_size = fields[18].split(',') | |
45 chrom_start = fields[20].split(',') | |
46 | |
47 for j in range( block_count ): | |
48 try: | |
49 this_block_size = int(block_size[j]) | |
50 this_chrom_start = int(chrom_start[j]) | |
51 except: | |
52 invalid_lines += 1 | |
53 break | |
54 # brut force coverage | |
55 for k in range( this_block_size ): | |
56 cur_index = this_chrom_start + k | |
57 if coverage.has_key( ( chrom, cur_index ) ): | |
58 coverage[(chrom, cur_index)] += 1 | |
59 else: | |
60 coverage[(chrom, cur_index)] = 1 | |
61 | |
62 # generate a index file | |
63 outputfh = open(outfile, 'w') | |
64 keys = coverage.keys() | |
65 keys.sort() | |
66 previous_chrom = '' | |
67 for i in keys: | |
68 (chrom, location) = i | |
69 sum = coverage[(i)] | |
70 if chrom != previous_chrom: | |
71 outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) ) | |
72 previous_chrom = chrom | |
73 outputfh.write( "%s\t%s\n" % ( location, sum ) ) | |
74 outputfh.close() | |
75 | |
76 if invalid_lines: | |
77 invalid_msg = "Skipped %d invalid lines" % invalid_lines | |
78 if invalid_chrom: | |
79 invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. " | |
80 | |
81 if __name__ == '__main__': __main__() |