annotate tools/maf/maf_to_fasta_concat.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Read a maf and output a single block fasta file, concatenating blocks
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 usage %prog species1,species2 maf_file out_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 #Dan Blankenberg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 from bx.align import maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 from galaxy.tools.util import maf_utilities
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 species = maf_utilities.parse_species_option( sys.argv[1] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 maf_utilities.tool_fail( "Error determining species value: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 input_filename = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 maf_utilities.tool_fail( "Error reading MAF filename: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 file_out = open( sys.argv[3], 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 maf_utilities.tool_fail( "Error opening file for output: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 print "Restricted to species: %s" % ', '.join( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 print "Not restricted to species."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 if not species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 species = maf_utilities.get_species_in_maf( input_filename )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 maf_utilities.tool_fail( "Error determining species in input MAF: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 for spec in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 file_out.write( ">" + spec + "\n" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 for start_block in maf.Reader( open( input_filename, 'r' ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 for block in maf_utilities.iter_blocks_split_by_species( start_block ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 block.remove_all_gap_columns() #remove extra gaps
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 component = block.get_component_by_src_start( spec ) #blocks only have one occurrence of a particular species, so this is safe
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if component:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 file_out.write( component.text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 file_out.write( "-" * block.text_size )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 maf_utilities.tool_fail( "Your MAF file appears to be malformed: %s" % e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 file_out.write( "\n" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 file_out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 if __name__ == "__main__": __main__()