| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 #Dan Blankenberg
 | 
| 
 | 
     3 """
 | 
| 
 | 
     4 Takes a Multiple Alignment FASTA file and concatenates 
 | 
| 
 | 
     5 sequences for each species, resulting in one sequence 
 | 
| 
 | 
     6 alignment per species.
 | 
| 
 | 
     7 """
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 import sys, tempfile
 | 
| 
 | 
    10 from utils.maf_utilities import iter_fasta_alignment
 | 
| 
 | 
    11 from utils.odict import odict
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 def __main__():
 | 
| 
 | 
    14     input_filename = sys.argv[1]
 | 
| 
 | 
    15     output_filename = sys.argv[2]
 | 
| 
 | 
    16     species = odict()
 | 
| 
 | 
    17     cur_size = 0
 | 
| 
 | 
    18     for components in iter_fasta_alignment( input_filename ):
 | 
| 
 | 
    19         species_not_written = species.keys()
 | 
| 
 | 
    20         for component in components:
 | 
| 
 | 
    21             if component.species not in species:
 | 
| 
 | 
    22                 species[component.species] = tempfile.TemporaryFile()
 | 
| 
 | 
    23                 species[component.species].write( "-" * cur_size )
 | 
| 
 | 
    24             species[component.species].write( component.text )
 | 
| 
 | 
    25             try:
 | 
| 
 | 
    26                 species_not_written.remove( component.species )
 | 
| 
 | 
    27             except ValueError:
 | 
| 
 | 
    28                 #this is a new species
 | 
| 
 | 
    29                 pass
 | 
| 
 | 
    30         for spec in species_not_written:
 | 
| 
 | 
    31             species[spec].write( "-" * len( components[0].text ) )
 | 
| 
 | 
    32         cur_size += len( components[0].text )
 | 
| 
 | 
    33     out = open( output_filename, 'wb' )
 | 
| 
 | 
    34     for spec, f in species.iteritems():
 | 
| 
 | 
    35         f.seek( 0 )
 | 
| 
 | 
    36         out.write( ">%s\n%s\n" % ( spec, f.read() ) )
 | 
| 
 | 
    37     out.close()
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 if __name__ == "__main__" : __main__()
 |