comparison alignment/fasta_concatenate_by_species.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/env python
2 #Dan Blankenberg
3 """
4 Takes a Multiple Alignment FASTA file and concatenates
5 sequences for each species, resulting in one sequence
6 alignment per species.
7 """
8
9 import sys, tempfile
10 from galaxy import eggs
11 from galaxy.tools.util.maf_utilities import iter_fasta_alignment
12 from galaxy.util.odict import odict
13
14 def __main__():
15 input_filename = sys.argv[1]
16 output_filename = sys.argv[2]
17 species = odict()
18 cur_size = 0
19 for components in iter_fasta_alignment( input_filename ):
20 species_not_written = species.keys()
21 for component in components:
22 if component.species not in species:
23 species[component.species] = tempfile.TemporaryFile()
24 species[component.species].write( "-" * cur_size )
25 species[component.species].write( component.text )
26 try:
27 species_not_written.remove( component.species )
28 except ValueError:
29 #this is a new species
30 pass
31 for spec in species_not_written:
32 species[spec].write( "-" * len( components[0].text ) )
33 cur_size += len( components[0].text )
34 out = open( output_filename, 'wb' )
35 for spec, f in species.iteritems():
36 f.seek( 0 )
37 out.write( ">%s\n%s\n" % ( spec, f.read() ) )
38 out.close()
39
40 if __name__ == "__main__" : __main__()