Mercurial > repos > xuebing > sharplabtool
diff tools/maf/maf_limit_to_species.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/maf/maf_limit_to_species.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +""" +Read a maf file and write out a new maf with only blocks having the +required species, after dropping any other species and removing +columns containing only gaps. + +usage: %prog species,species2,... input_maf output_maf allow_partial min_species_per_block +""" +#Dan Blankenberg +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +import bx.align.maf +from galaxy.tools.util import maf_utilities +import sys + +assert sys.version_info[:2] >= ( 2, 4 ) + +def main(): + + species = maf_utilities.parse_species_option( sys.argv[1] ) + if species: + spec_len = len( species ) + else: + spec_len = 0 + try: + maf_reader = bx.align.maf.Reader( open( sys.argv[2],'r' ) ) + maf_writer = bx.align.maf.Writer( open( sys.argv[3],'w' ) ) + except: + print >>sys.stderr, "Your MAF file appears to be malformed." + sys.exit() + allow_partial = False + if int( sys.argv[4] ): allow_partial = True + min_species_per_block = int( sys.argv[5] ) + + maf_blocks_kept = 0 + for m in maf_reader: + if species: + m = m.limit_to_species( species ) + m.remove_all_gap_columns() + spec_in_block_len = len( maf_utilities.get_species_in_block( m ) ) + if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block: + maf_writer.write( m ) + maf_blocks_kept += 1 + + maf_reader.close() + maf_writer.close() + + print "Restricted to species: %s." % ", ".join( species ) + print "%i MAF blocks have been kept." % maf_blocks_kept + +if __name__ == "__main__": + main()