annotate tools/fasta_tools/fasta_concatenate_by_species.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Dan Blankenberg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Takes a Multiple Alignment FASTA file and concatenates
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 sequences for each species, resulting in one sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 alignment per species.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 import sys, tempfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 from galaxy.tools.util.maf_utilities import iter_fasta_alignment
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 from galaxy.util.odict import odict
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 input_filename = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 output_filename = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 species = odict()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 cur_size = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 for components in iter_fasta_alignment( input_filename ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 species_not_written = species.keys()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 for component in components:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 if component.species not in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 species[component.species] = tempfile.TemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 species[component.species].write( "-" * cur_size )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 species[component.species].write( component.text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 species_not_written.remove( component.species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 except ValueError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 #this is a new species
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 for spec in species_not_written:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 species[spec].write( "-" * len( components[0].text ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 cur_size += len( components[0].text )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 out = open( output_filename, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 for spec, f in species.iteritems():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 f.seek( 0 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 out.write( ">%s\n%s\n" % ( spec, f.read() ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 if __name__ == "__main__" : __main__()