# HG changeset patch # User devteam # Date 1400517215 14400 # Node ID 807e3e50845ae0df640e3dc31461eca6389312b7 Imported from capsule None diff -r 000000000000 -r 807e3e50845a blat_mapping.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_mapping.py Mon May 19 12:33:35 2014 -0400 @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +import os, sys + +assert sys.version_info[:2] >= ( 2, 4 ) + +def reverse_complement(s): + complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."} + reversed_s = [] + for i in s: + reversed_s.append(complement_dna[i]) + reversed_s.reverse() + return "".join(reversed_s) + +def __main__(): + nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4} + coverage = {} # key = (chrom, index) + invalid_lines = 0 + invalid_chrom = 0 + infile = sys.argv[1] + outfile = sys.argv[2] + + for i, line in enumerate( open( infile ) ): + line = line.rstrip('\r\n') + if not line or line.startswith('#'): + continue + fields = line.split() + if len(fields) < 21: # standard number of pslx columns + invalid_lines += 1 + continue + if not fields[0].isdigit(): + invalid_lines += 1 + continue + chrom = fields[13] + if not chrom.startswith( 'chr' ): + invalid_lines += 1 + invalid_chrom += 1 + continue + try: + block_count = int(fields[17]) + except: + invalid_lines += 1 + continue + block_size = fields[18].split(',') + chrom_start = fields[20].split(',') + + for j in range( block_count ): + try: + this_block_size = int(block_size[j]) + this_chrom_start = int(chrom_start[j]) + except: + invalid_lines += 1 + break + # brut force coverage + for k in range( this_block_size ): + cur_index = this_chrom_start + k + if coverage.has_key( ( chrom, cur_index ) ): + coverage[(chrom, cur_index)] += 1 + else: + coverage[(chrom, cur_index)] = 1 + + # generate a index file + outputfh = open(outfile, 'w') + keys = coverage.keys() + keys.sort() + previous_chrom = '' + for i in keys: + (chrom, location) = i + sum = coverage[(i)] + if chrom != previous_chrom: + outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) ) + previous_chrom = chrom + outputfh.write( "%s\t%s\n" % ( location, sum ) ) + outputfh.close() + + if invalid_lines: + invalid_msg = "Skipped %d invalid lines" % invalid_lines + if invalid_chrom: + invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. " + +if __name__ == '__main__': __main__() \ No newline at end of file diff -r 000000000000 -r 807e3e50845a blat_mapping.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_mapping.xml Mon May 19 12:33:35 2014 -0400 @@ -0,0 +1,42 @@ + + in wiggle format + blat_mapping.py $input1 $output1 + + + + + + + + + + + + + + +.. class:: warningmark + + To generate acceptable files, please use alignment program **BLAT** with option **-out=pslx**. + +.. class:: warningmark + + Please edit the database information by click on the pencil icon next to your dataset. Select the corresponding genome build. + +----- + +**What it does** + + This tool takes **BLAT pslx** output and returns a wig-like file showing the number of reads (coverage) mapped at each chromosome location. Use **Graph/Display Data --> Build custom track** tool to show the coverage mapping in UCSC Genome Browser. + +----- + +**Example** + + Showing reads coverage on human chromosome 22 (partial result) in UCSC Genome Browser Custom Track: + + .. image:: blat_mapping_example.png + :width: 600 + + + diff -r 000000000000 -r 807e3e50845a blat_mapping_example.png Binary file blat_mapping_example.png has changed diff -r 000000000000 -r 807e3e50845a test-data/blat_mapping_test1.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blat_mapping_test1.out Mon May 19 12:33:35 2014 -0400 @@ -0,0 +1,17 @@ +variableStep chrom=chrI +9704438 1 +9704439 1 +9704440 1 +9704441 1 +9704442 1 +9704443 1 +9704444 1 +9704445 1 +9704446 1 +9704447 1 +variableStep chrom=chrII +9704440 1 +9704441 1 +9704442 1 +9704443 1 +9704444 1 diff -r 000000000000 -r 807e3e50845a test-data/blat_mapping_test1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blat_mapping_test1.txt Mon May 19 12:33:35 2014 -0400 @@ -0,0 +1,2 @@ +10 1 0 0 0 0 0 0 + seq1 10 1 10 chrI 15080483 9704438 9704448 1 10, 1, 9704438, ggtttttttt, ggtttttatt, +5 1 0 0 0 0 0 0 + seq2 5 1 5 chrII 15080483 9704440 9704445 1 5, 1, 9704440, ttttt, tttta,