0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Read a maf and output intervals for specified list of species.
|
|
5 """
|
|
6 import sys, os, tempfile
|
|
7 from galaxy import eggs
|
|
8 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
9 from bx.align import maf
|
|
10
|
|
11 assert sys.version_info[:2] >= ( 2, 4 )
|
|
12
|
|
13 def __main__():
|
|
14
|
|
15 input_filename = sys.argv[1]
|
|
16 output_filename = sys.argv[2]
|
|
17 #where to store files that become additional output
|
|
18 database_tmp_dir = sys.argv[5]
|
|
19
|
|
20 species = sys.argv[3].split(',')
|
|
21 partial = sys.argv[4]
|
|
22 out_files = {}
|
|
23 primary_spec = None
|
|
24
|
|
25 if "None" in species:
|
|
26 species = {}
|
|
27 try:
|
|
28 for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ):
|
|
29 for c in m.components:
|
|
30 spec,chrom = maf.src_split( c.src )
|
|
31 if not spec or not chrom:
|
|
32 spec = chrom = c.src
|
|
33 species[spec] = ""
|
|
34 species = species.keys()
|
|
35 except:
|
|
36 print >>sys.stderr, "Invalid MAF file specified"
|
|
37 return
|
|
38
|
|
39 if "?" in species:
|
|
40 print >>sys.stderr, "Invalid dbkey specified"
|
|
41 return
|
|
42
|
|
43
|
|
44 for i in range( 0, len( species ) ):
|
|
45 spec = species[i]
|
|
46 if i == 0:
|
|
47 out_files[spec] = open( output_filename, 'w' )
|
|
48 primary_spec = spec
|
|
49 else:
|
|
50 out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
|
|
51 filename = out_files[spec].name
|
|
52 out_files[spec].close()
|
|
53 out_files[spec] = open( filename, 'w' )
|
|
54 num_species = len( species )
|
|
55
|
|
56 print "Restricted to species:", ",".join( species )
|
|
57
|
|
58 file_in = open( input_filename, 'r' )
|
|
59 maf_reader = maf.Reader( file_in )
|
|
60
|
|
61 block_num = -1
|
|
62
|
|
63 for i, m in enumerate( maf_reader ):
|
|
64 block_num += 1
|
|
65 if "None" not in species:
|
|
66 m = m.limit_to_species( species )
|
|
67 l = m.components
|
|
68 if len(l) < num_species and partial == "partial_disallowed": continue
|
|
69 for c in l:
|
|
70 spec,chrom = maf.src_split( c.src )
|
|
71 if not spec or not chrom:
|
|
72 spec = chrom = c.src
|
|
73 if spec not in out_files.keys():
|
|
74 out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
|
|
75 filename = out_files[spec].name
|
|
76 out_files[spec].close()
|
|
77 out_files[spec] = open( filename, 'w' )
|
|
78
|
|
79 if c.strand == "-":
|
|
80 out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
|
|
81 else:
|
|
82 out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
|
|
83
|
|
84 file_in.close()
|
|
85 for file_out in out_files.keys():
|
|
86 out_files[file_out].close()
|
|
87
|
|
88 for spec in out_files.keys():
|
|
89 if spec != primary_spec:
|
|
90 print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] )
|
|
91 else:
|
|
92 print "#FILE1\t" + spec + "\t" + out_files[spec].name
|
|
93
|
|
94 if __name__ == "__main__": __main__()
|