annotate ete_species_tree_generator.py @ 4:87b6de3ef63e draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 41e40314a9b25a9f3c06a13422d367b68334f593
author earlhaminst
date Thu, 22 Mar 2018 13:25:38 -0400
parents 03c10736e497
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
1 import optparse
2
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
2 import sys
1
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
3
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
4 from ete3 import NCBITaxa
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
5
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
6
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
7 parser = optparse.OptionParser()
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
8 parser.add_option('-s', '--species', dest="input_species_filename",
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
9 help='Species list in text format one species in each line')
2
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
10 parser.add_option('-d', '--database', dest="database", default=None,
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
11 help='ETE sqlite data base to use (default: ~/.etetoolkit/taxa.sqlite)')
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
12 parser.add_option('-o', '--output', dest="output", help='output file name (default: stdout)')
1
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
13 parser.add_option('-f', '--format', type='choice', choices=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '100'], dest="format",
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
14 default='8', help='outpur format for tree')
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
15 parser.add_option('-t', '--treebest', type='choice', choices=['yes', 'no'], dest="treebest",
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
16 default='no', help='To be used in TreeBest')
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
17 options, args = parser.parse_args()
2
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
18 if options.input_species_filename is None:
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
19 parser.error("-s option must be specified, Species list in text format one species in each line")
1
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
20
2
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
21 ncbi = NCBITaxa(dbfile=options.database)
1
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
22 with open(options.input_species_filename) as f:
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
23 species_name = [_.strip().replace('_', ' ') for _ in f.readlines()]
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
24
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
25 name2taxid = ncbi.get_name_translator(species_name)
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
26
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
27 taxid = [name2taxid[_][0] for _ in species_name]
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
28
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
29 tree = ncbi.get_topology(taxid)
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
30
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
31 if options.treebest == "yes":
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
32 inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()}
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
33 else:
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
34 inv_map = {str(v[0]): k for k, v in name2taxid.items()}
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
35
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
36
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
37 for leaf in tree:
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
38 leaf.name = inv_map[leaf.name]
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
39
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
40 newickTree = tree.write(format=int(options.format))
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
41
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
42 if options.treebest == "yes":
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
43 newickTree = newickTree.rstrip(';')
a4ba317fc713 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit df73a2add6dba8550867034e157ed0699b3b2f53
earlhaminst
parents:
diff changeset
44 newickTree = newickTree + "root;"
2
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
45 # setup output
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
46 if not options.output: # if filename is not given
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
47 of = sys.stdout
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
48 else:
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
49 of = open(options.output, "w")
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
50 of.write(newickTree)
03c10736e497 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 91b634b8f9b131045bbbbf43cc8edbea59ac686b-dirty
earlhaminst
parents: 1
diff changeset
51 of.close()