annotate tools/maf/maf_limit_to_species.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Read a maf file and write out a new maf with only blocks having the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 required species, after dropping any other species and removing
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 columns containing only gaps.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 usage: %prog species,species2,... input_maf output_maf allow_partial min_species_per_block
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 #Dan Blankenberg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 import bx.align.maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 from galaxy.tools.util import maf_utilities
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 species = maf_utilities.parse_species_option( sys.argv[1] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 spec_len = len( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 spec_len = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 maf_reader = bx.align.maf.Reader( open( sys.argv[2],'r' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 maf_writer = bx.align.maf.Writer( open( sys.argv[3],'w' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 print >>sys.stderr, "Your MAF file appears to be malformed."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 allow_partial = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 if int( sys.argv[4] ): allow_partial = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 min_species_per_block = int( sys.argv[5] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 maf_blocks_kept = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 for m in maf_reader:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 m = m.limit_to_species( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 m.remove_all_gap_columns()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 spec_in_block_len = len( maf_utilities.get_species_in_block( m ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 maf_writer.write( m )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 maf_blocks_kept += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 maf_reader.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 maf_writer.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 print "Restricted to species: %s." % ", ".join( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 print "%i MAF blocks have been kept." % maf_blocks_kept
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 main()