Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff alignment/fasta_concatenate_by_species.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alignment/fasta_concatenate_by_species.py Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,40 @@ +#!/usr/bin/env python +#Dan Blankenberg +""" +Takes a Multiple Alignment FASTA file and concatenates +sequences for each species, resulting in one sequence +alignment per species. +""" + +import sys, tempfile +from galaxy import eggs +from galaxy.tools.util.maf_utilities import iter_fasta_alignment +from galaxy.util.odict import odict + +def __main__(): + input_filename = sys.argv[1] + output_filename = sys.argv[2] + species = odict() + cur_size = 0 + for components in iter_fasta_alignment( input_filename ): + species_not_written = species.keys() + for component in components: + if component.species not in species: + species[component.species] = tempfile.TemporaryFile() + species[component.species].write( "-" * cur_size ) + species[component.species].write( component.text ) + try: + species_not_written.remove( component.species ) + except ValueError: + #this is a new species + pass + for spec in species_not_written: + species[spec].write( "-" * len( components[0].text ) ) + cur_size += len( components[0].text ) + out = open( output_filename, 'wb' ) + for spec, f in species.iteritems(): + f.seek( 0 ) + out.write( ">%s\n%s\n" % ( spec, f.read() ) ) + out.close() + +if __name__ == "__main__" : __main__()