0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Read a maf file and write out a new maf with only blocks having the
|
|
5 required species, after dropping any other species and removing
|
|
6 columns containing only gaps.
|
|
7
|
|
8 usage: %prog species,species2,... input_maf output_maf allow_partial min_species_per_block
|
|
9 """
|
|
10 #Dan Blankenberg
|
|
11 from galaxy import eggs
|
|
12 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
13 import bx.align.maf
|
|
14 from galaxy.tools.util import maf_utilities
|
|
15 import sys
|
|
16
|
|
17 assert sys.version_info[:2] >= ( 2, 4 )
|
|
18
|
|
19 def main():
|
|
20
|
|
21 species = maf_utilities.parse_species_option( sys.argv[1] )
|
|
22 if species:
|
|
23 spec_len = len( species )
|
|
24 else:
|
|
25 spec_len = 0
|
|
26 try:
|
|
27 maf_reader = bx.align.maf.Reader( open( sys.argv[2],'r' ) )
|
|
28 maf_writer = bx.align.maf.Writer( open( sys.argv[3],'w' ) )
|
|
29 except:
|
|
30 print >>sys.stderr, "Your MAF file appears to be malformed."
|
|
31 sys.exit()
|
|
32 allow_partial = False
|
|
33 if int( sys.argv[4] ): allow_partial = True
|
|
34 min_species_per_block = int( sys.argv[5] )
|
|
35
|
|
36 maf_blocks_kept = 0
|
|
37 for m in maf_reader:
|
|
38 if species:
|
|
39 m = m.limit_to_species( species )
|
|
40 m.remove_all_gap_columns()
|
|
41 spec_in_block_len = len( maf_utilities.get_species_in_block( m ) )
|
|
42 if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block:
|
|
43 maf_writer.write( m )
|
|
44 maf_blocks_kept += 1
|
|
45
|
|
46 maf_reader.close()
|
|
47 maf_writer.close()
|
|
48
|
|
49 print "Restricted to species: %s." % ", ".join( species )
|
|
50 print "%i MAF blocks have been kept." % maf_blocks_kept
|
|
51
|
|
52 if __name__ == "__main__":
|
|
53 main()
|