annotate tools/maf/maf_to_fasta_multiple_sets.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Read a maf and output a multiple block fasta file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #Dan Blankenberg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 from bx.align import maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 from galaxy.tools.util import maf_utilities
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 maf_reader = maf.Reader( open( sys.argv[1] ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 maf_utilities.tool_fail( "Error opening input MAF: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 file_out = open( sys.argv[2], 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 maf_utilities.tool_fail( "Error opening file for output: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 species = maf_utilities.parse_species_option( sys.argv[3] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 num_species = len( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 num_species = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 maf_utilities.tool_fail( "Error determining species value: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 partial = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 maf_utilities.tool_fail( "Error determining keep partial value: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 print "Restricted to species: %s" % ', '.join( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 print "Not restricted to species."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 for block_num, block in enumerate( maf_reader ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 block = block.limit_to_species( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 if len( maf_utilities.get_species_in_block( block ) ) < num_species and partial == "partial_disallowed": continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 spec_counts = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 for component in block.components:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 spec, chrom = maf_utilities.src_split( component.src )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if spec not in spec_counts:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 spec_counts[ spec ] = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 spec_counts[ spec ] += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 file_out.write( "%s\n" % component.text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 file_out.write( "\n" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 file_out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 if __name__ == "__main__": __main__()