Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/fasta_concatenate_by_species.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 #!/usr/bin/env python | |
2 #Dan Blankenberg | |
3 """ | |
4 Takes a Multiple Alignment FASTA file and concatenates | |
5 sequences for each species, resulting in one sequence | |
6 alignment per species. | |
7 """ | |
8 | |
9 import sys, tempfile | |
10 from galaxy import eggs | |
11 from galaxy.tools.util.maf_utilities import iter_fasta_alignment | |
12 from galaxy.util.odict import odict | |
13 | |
14 def __main__(): | |
15 input_filename = sys.argv[1] | |
16 output_filename = sys.argv[2] | |
17 species = odict() | |
18 cur_size = 0 | |
19 for components in iter_fasta_alignment( input_filename ): | |
20 species_not_written = species.keys() | |
21 for component in components: | |
22 if component.species not in species: | |
23 species[component.species] = tempfile.TemporaryFile() | |
24 species[component.species].write( "-" * cur_size ) | |
25 species[component.species].write( component.text ) | |
26 try: | |
27 species_not_written.remove( component.species ) | |
28 except ValueError: | |
29 #this is a new species | |
30 pass | |
31 for spec in species_not_written: | |
32 species[spec].write( "-" * len( components[0].text ) ) | |
33 cur_size += len( components[0].text ) | |
34 out = open( output_filename, 'wb' ) | |
35 for spec, f in species.iteritems(): | |
36 f.seek( 0 ) | |
37 out.write( ">%s\n%s\n" % ( spec, f.read() ) ) | |
38 out.close() | |
39 | |
40 if __name__ == "__main__" : __main__() |