Mercurial > repos > xuebing > sharplabtool
comparison tools/maf/maf_to_bed.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Read a maf and output intervals for specified list of species. | |
5 """ | |
6 import sys, os, tempfile | |
7 from galaxy import eggs | |
8 import pkg_resources; pkg_resources.require( "bx-python" ) | |
9 from bx.align import maf | |
10 | |
11 assert sys.version_info[:2] >= ( 2, 4 ) | |
12 | |
13 def __main__(): | |
14 | |
15 input_filename = sys.argv[1] | |
16 output_filename = sys.argv[2] | |
17 #where to store files that become additional output | |
18 database_tmp_dir = sys.argv[5] | |
19 | |
20 species = sys.argv[3].split(',') | |
21 partial = sys.argv[4] | |
22 out_files = {} | |
23 primary_spec = None | |
24 | |
25 if "None" in species: | |
26 species = {} | |
27 try: | |
28 for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ): | |
29 for c in m.components: | |
30 spec,chrom = maf.src_split( c.src ) | |
31 if not spec or not chrom: | |
32 spec = chrom = c.src | |
33 species[spec] = "" | |
34 species = species.keys() | |
35 except: | |
36 print >>sys.stderr, "Invalid MAF file specified" | |
37 return | |
38 | |
39 if "?" in species: | |
40 print >>sys.stderr, "Invalid dbkey specified" | |
41 return | |
42 | |
43 | |
44 for i in range( 0, len( species ) ): | |
45 spec = species[i] | |
46 if i == 0: | |
47 out_files[spec] = open( output_filename, 'w' ) | |
48 primary_spec = spec | |
49 else: | |
50 out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) | |
51 filename = out_files[spec].name | |
52 out_files[spec].close() | |
53 out_files[spec] = open( filename, 'w' ) | |
54 num_species = len( species ) | |
55 | |
56 print "Restricted to species:", ",".join( species ) | |
57 | |
58 file_in = open( input_filename, 'r' ) | |
59 maf_reader = maf.Reader( file_in ) | |
60 | |
61 block_num = -1 | |
62 | |
63 for i, m in enumerate( maf_reader ): | |
64 block_num += 1 | |
65 if "None" not in species: | |
66 m = m.limit_to_species( species ) | |
67 l = m.components | |
68 if len(l) < num_species and partial == "partial_disallowed": continue | |
69 for c in l: | |
70 spec,chrom = maf.src_split( c.src ) | |
71 if not spec or not chrom: | |
72 spec = chrom = c.src | |
73 if spec not in out_files.keys(): | |
74 out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) | |
75 filename = out_files[spec].name | |
76 out_files[spec].close() | |
77 out_files[spec] = open( filename, 'w' ) | |
78 | |
79 if c.strand == "-": | |
80 out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) | |
81 else: | |
82 out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) | |
83 | |
84 file_in.close() | |
85 for file_out in out_files.keys(): | |
86 out_files[file_out].close() | |
87 | |
88 for spec in out_files.keys(): | |
89 if spec != primary_spec: | |
90 print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] ) | |
91 else: | |
92 print "#FILE1\t" + spec + "\t" + out_files[spec].name | |
93 | |
94 if __name__ == "__main__": __main__() |