Mercurial > repos > devteam > fasta_concatenate_by_species
annotate fasta_concatenate_by_species.py @ 3:25b8736c627a draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit 34a6c9f94a5722bb7d2f887618aafa410a770e91"
| author | devteam | 
|---|---|
| date | Mon, 02 Mar 2020 06:47:07 -0500 | 
| parents | 16df616b39e5 | 
| children | 
| rev | line source | 
|---|---|
| 0 | 1 #!/usr/bin/env python | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 2 # Dan Blankenberg | 
| 0 | 3 """ | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 4 Takes a Multiple Alignment FASTA file and concatenates | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 5 sequences for each species, resulting in one sequence | 
| 0 | 6 alignment per species. | 
| 7 """ | |
| 8 | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 9 import sys | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 10 import tempfile | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 11 from collections import OrderedDict | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 12 | 
| 0 | 13 from utils.maf_utilities import iter_fasta_alignment | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 14 | 
| 0 | 15 | 
| 16 def __main__(): | |
| 17 input_filename = sys.argv[1] | |
| 18 output_filename = sys.argv[2] | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 19 species = OrderedDict() | 
| 0 | 20 cur_size = 0 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 21 for components in iter_fasta_alignment(input_filename): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 22 species_not_written = list(species.keys()) | 
| 0 | 23 for component in components: | 
| 24 if component.species not in species: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 25 species[component.species] = tempfile.TemporaryFile(mode="r+") | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 26 species[component.species].write("-" * cur_size) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 27 species[component.species].write(component.text) | 
| 0 | 28 try: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 29 species_not_written.remove(component.species) | 
| 0 | 30 except ValueError: | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 31 # this is a new species | 
| 0 | 32 pass | 
| 33 for spec in species_not_written: | |
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 34 species[spec].write("-" * len(components[0].text)) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 35 cur_size += len(components[0].text) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 36 with open(output_filename, 'w') as out: | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 37 for spec, f in species.items(): | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 38 f.seek(0) | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 39 out.write(">%s\n%s\n" % (spec, f.read())) | 
| 0 | 40 | 
| 2 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 41 | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 42 if __name__ == "__main__": | 
| 
16df616b39e5
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
 devteam parents: 
0diff
changeset | 43 __main__() | 
