Mercurial > repos > xuebing > sharplabtool
diff tools/maf/maf_to_bed.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/maf/maf_to_bed.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +""" +Read a maf and output intervals for specified list of species. +""" +import sys, os, tempfile +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.align import maf + +assert sys.version_info[:2] >= ( 2, 4 ) + +def __main__(): + + input_filename = sys.argv[1] + output_filename = sys.argv[2] + #where to store files that become additional output + database_tmp_dir = sys.argv[5] + + species = sys.argv[3].split(',') + partial = sys.argv[4] + out_files = {} + primary_spec = None + + if "None" in species: + species = {} + try: + for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ): + for c in m.components: + spec,chrom = maf.src_split( c.src ) + if not spec or not chrom: + spec = chrom = c.src + species[spec] = "" + species = species.keys() + except: + print >>sys.stderr, "Invalid MAF file specified" + return + + if "?" in species: + print >>sys.stderr, "Invalid dbkey specified" + return + + + for i in range( 0, len( species ) ): + spec = species[i] + if i == 0: + out_files[spec] = open( output_filename, 'w' ) + primary_spec = spec + else: + out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) + filename = out_files[spec].name + out_files[spec].close() + out_files[spec] = open( filename, 'w' ) + num_species = len( species ) + + print "Restricted to species:", ",".join( species ) + + file_in = open( input_filename, 'r' ) + maf_reader = maf.Reader( file_in ) + + block_num = -1 + + for i, m in enumerate( maf_reader ): + block_num += 1 + if "None" not in species: + m = m.limit_to_species( species ) + l = m.components + if len(l) < num_species and partial == "partial_disallowed": continue + for c in l: + spec,chrom = maf.src_split( c.src ) + if not spec or not chrom: + spec = chrom = c.src + if spec not in out_files.keys(): + out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) + filename = out_files[spec].name + out_files[spec].close() + out_files[spec] = open( filename, 'w' ) + + if c.strand == "-": + out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) + else: + out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) + + file_in.close() + for file_out in out_files.keys(): + out_files[file_out].close() + + for spec in out_files.keys(): + if spec != primary_spec: + print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] ) + else: + print "#FILE1\t" + spec + "\t" + out_files[spec].name + +if __name__ == "__main__": __main__()