Mercurial > repos > devteam > fasta_concatenate_by_species
comparison fasta_concatenate_by_species.py @ 0:2126e1b833a2
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 19 May 2014 12:33:30 -0400 |
| parents | |
| children | 16df616b39e5 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2126e1b833a2 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Dan Blankenberg | |
| 3 """ | |
| 4 Takes a Multiple Alignment FASTA file and concatenates | |
| 5 sequences for each species, resulting in one sequence | |
| 6 alignment per species. | |
| 7 """ | |
| 8 | |
| 9 import sys, tempfile | |
| 10 from utils.maf_utilities import iter_fasta_alignment | |
| 11 from utils.odict import odict | |
| 12 | |
| 13 def __main__(): | |
| 14 input_filename = sys.argv[1] | |
| 15 output_filename = sys.argv[2] | |
| 16 species = odict() | |
| 17 cur_size = 0 | |
| 18 for components in iter_fasta_alignment( input_filename ): | |
| 19 species_not_written = species.keys() | |
| 20 for component in components: | |
| 21 if component.species not in species: | |
| 22 species[component.species] = tempfile.TemporaryFile() | |
| 23 species[component.species].write( "-" * cur_size ) | |
| 24 species[component.species].write( component.text ) | |
| 25 try: | |
| 26 species_not_written.remove( component.species ) | |
| 27 except ValueError: | |
| 28 #this is a new species | |
| 29 pass | |
| 30 for spec in species_not_written: | |
| 31 species[spec].write( "-" * len( components[0].text ) ) | |
| 32 cur_size += len( components[0].text ) | |
| 33 out = open( output_filename, 'wb' ) | |
| 34 for spec, f in species.iteritems(): | |
| 35 f.seek( 0 ) | |
| 36 out.write( ">%s\n%s\n" % ( spec, f.read() ) ) | |
| 37 out.close() | |
| 38 | |
| 39 if __name__ == "__main__" : __main__() |
