Mercurial > repos > earlhaminst > ete
annotate ete_species_tree_generator.py @ 14:d40b9a7debe5 draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 71b9c1035f713be174bfcf5ecb20804495f39258
author | earlhaminst |
---|---|
date | Thu, 07 Mar 2024 19:39:30 +0000 |
parents | 03c10736e497 |
children |
rev | line source |
---|---|
1
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
1 import optparse |
2
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
2 import sys |
1
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
3 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
4 from ete3 import NCBITaxa |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
5 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
6 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
7 parser = optparse.OptionParser() |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
8 parser.add_option('-s', '--species', dest="input_species_filename", |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
9 help='Species list in text format one species in each line') |
2
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
10 parser.add_option('-d', '--database', dest="database", default=None, |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
11 help='ETE sqlite data base to use (default: ~/.etetoolkit/taxa.sqlite)') |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
12 parser.add_option('-o', '--output', dest="output", help='output file name (default: stdout)') |
1
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('-f', '--format', type='choice', choices=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '100'], dest="format", |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
14 default='8', help='outpur format for tree') |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
15 parser.add_option('-t', '--treebest', type='choice', choices=['yes', 'no'], dest="treebest", |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
16 default='no', help='To be used in TreeBest') |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
17 options, args = parser.parse_args() |
2
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
18 if options.input_species_filename is None: |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
19 parser.error("-s option must be specified, Species list in text format one species in each line") |
1
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
20 |
2
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
21 ncbi = NCBITaxa(dbfile=options.database) |
1
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
22 with open(options.input_species_filename) as f: |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
23 species_name = [_.strip().replace('_', ' ') for _ in f.readlines()] |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
24 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
25 name2taxid = ncbi.get_name_translator(species_name) |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
26 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
27 taxid = [name2taxid[_][0] for _ in species_name] |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
28 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
29 tree = ncbi.get_topology(taxid) |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
30 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
31 if options.treebest == "yes": |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
32 inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()} |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
33 else: |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
34 inv_map = {str(v[0]): k for k, v in name2taxid.items()} |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
35 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
36 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
37 for leaf in tree: |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
38 leaf.name = inv_map[leaf.name] |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
39 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
40 newickTree = tree.write(format=int(options.format)) |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
41 |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
42 if options.treebest == "yes": |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
43 newickTree = newickTree.rstrip(';') |
a4ba317fc713
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff
changeset
|
44 newickTree = newickTree + "root;" |
2
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
45 # setup output |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
46 if not options.output: # if filename is not given |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
47 of = sys.stdout |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
48 else: |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
49 of = open(options.output, "w") |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
50 of.write(newickTree) |
03c10736e497
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents:
1
diff
changeset
|
51 of.close() |