Mercurial > repos > xuebing > sharplabtool
comparison tools/maf/maf_to_interval.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Read a maf and output intervals for specified list of species. | |
5 """ | |
6 import sys, os | |
7 from galaxy import eggs | |
8 import pkg_resources; pkg_resources.require( "bx-python" ) | |
9 from bx.align import maf | |
10 from galaxy.tools.util import maf_utilities | |
11 | |
12 assert sys.version_info[:2] >= ( 2, 4 ) | |
13 | |
14 def __main__(): | |
15 input_filename = sys.argv[1] | |
16 output_filename = sys.argv[2] | |
17 output_id = sys.argv[3] | |
18 #where to store files that become additional output | |
19 database_tmp_dir = sys.argv[4] | |
20 primary_spec = sys.argv[5] | |
21 species = sys.argv[6].split( ',' ) | |
22 all_species = sys.argv[7].split( ',' ) | |
23 partial = sys.argv[8] | |
24 keep_gaps = sys.argv[9] | |
25 out_files = {} | |
26 | |
27 if "None" in species: | |
28 species = [] | |
29 | |
30 if primary_spec not in species: | |
31 species.append( primary_spec ) | |
32 if primary_spec not in all_species: | |
33 all_species.append( primary_spec ) | |
34 | |
35 all_species.sort() | |
36 for spec in species: | |
37 if spec == primary_spec: | |
38 out_files[ spec ] = open( output_filename, 'wb+' ) | |
39 else: | |
40 out_files[ spec ] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_interval_%s' % ( output_id, spec, spec ) ), 'wb+' ) | |
41 out_files[ spec ].write( '#chrom\tstart\tend\tstrand\tscore\tname\t%s\n' % ( '\t'.join( all_species ) ) ) | |
42 num_species = len( all_species ) | |
43 | |
44 file_in = open( input_filename, 'r' ) | |
45 maf_reader = maf.Reader( file_in ) | |
46 | |
47 for i, m in enumerate( maf_reader ): | |
48 for j, block in enumerate( maf_utilities.iter_blocks_split_by_species( m ) ): | |
49 if len( block.components ) < num_species and partial == "partial_disallowed": continue | |
50 sequences = {} | |
51 for c in block.components: | |
52 spec, chrom = maf_utilities.src_split( c.src ) | |
53 if keep_gaps == 'remove_gaps': | |
54 sequences[ spec ] = c.text.replace( '-', '' ) | |
55 else: | |
56 sequences[ spec ] = c.text | |
57 sequences = '\t'.join( [ sequences.get( spec, '' ) for spec in all_species ] ) | |
58 for spec in species: | |
59 c = block.get_component_by_src_start( spec ) | |
60 if c is not None: | |
61 spec2, chrom = maf_utilities.src_split( c.src ) | |
62 assert spec2 == spec, Exception( 'Species name inconsistancy found in component: %s != %s' % ( spec, spec2 ) ) | |
63 out_files[ spec ].write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( chrom, c.forward_strand_start, c.forward_strand_end, c.strand, m.score, "%s_%s_%s" % (spec, i, j), sequences ) ) | |
64 file_in.close() | |
65 for file_out in out_files.values(): | |
66 file_out.close() | |
67 | |
68 if __name__ == "__main__": __main__() |