0
|
1 #!/usr/bin/env python
|
|
2 #Dan Blankenberg
|
|
3 """
|
|
4 Takes a Multiple Alignment FASTA file and concatenates
|
|
5 sequences for each species, resulting in one sequence
|
|
6 alignment per species.
|
|
7 """
|
|
8
|
|
9 import sys, tempfile
|
|
10 from galaxy import eggs
|
|
11 from galaxy.tools.util.maf_utilities import iter_fasta_alignment
|
|
12 from galaxy.util.odict import odict
|
|
13
|
|
14 def __main__():
|
|
15 input_filename = sys.argv[1]
|
|
16 output_filename = sys.argv[2]
|
|
17 species = odict()
|
|
18 cur_size = 0
|
|
19 for components in iter_fasta_alignment( input_filename ):
|
|
20 species_not_written = species.keys()
|
|
21 for component in components:
|
|
22 if component.species not in species:
|
|
23 species[component.species] = tempfile.TemporaryFile()
|
|
24 species[component.species].write( "-" * cur_size )
|
|
25 species[component.species].write( component.text )
|
|
26 try:
|
|
27 species_not_written.remove( component.species )
|
|
28 except ValueError:
|
|
29 #this is a new species
|
|
30 pass
|
|
31 for spec in species_not_written:
|
|
32 species[spec].write( "-" * len( components[0].text ) )
|
|
33 cur_size += len( components[0].text )
|
|
34 out = open( output_filename, 'wb' )
|
|
35 for spec, f in species.iteritems():
|
|
36 f.seek( 0 )
|
|
37 out.write( ">%s\n%s\n" % ( spec, f.read() ) )
|
|
38 out.close()
|
|
39
|
|
40 if __name__ == "__main__" : __main__()
|