annotate ete_tree_generator.py @ 0:276e3ee68c37 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
author earlhaminst
date Thu, 15 Dec 2016 12:55:02 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
1 import optparse
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
2
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
3 from ete3 import NCBITaxa
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
4
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
5 ncbi = NCBITaxa()
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
6
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
7 parser = optparse.OptionParser()
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
8 parser.add_option('-s', '--species', dest="input_species_filename",
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
9 help='Species list in text format one species in each line')
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
10
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
11 parser.add_option('-f', '--format', type='choice', choices=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '100'], dest="format",
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
12 default='8', help='outpur format for tree')
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
13
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
14 parser.add_option('-t', '--treebest', type='choice', choices=['yes', 'no'], dest="treebest",
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
15 default='no', help='To be used in TreeBest')
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
16
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
17 parser.add_option('-d', '--database', type='choice', choices=['yes', 'no'], dest="database",
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
18 default='no', help='Update database')
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
19
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
20 options, args = parser.parse_args()
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
21
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
22 if options.database == "yes":
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
23 try:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
24 ncbi.update_taxonomy_database()
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
25 except:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
26 pass
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
27
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
28 if options.input_species_filename is None:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
29 raise Exception('-s option must be specified, Species list in text format one species in each line')
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
30
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
31 with open(options.input_species_filename) as f:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
32 species_name = [_.strip().replace('_', ' ') for _ in f.readlines()]
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
33
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
34 name2taxid = ncbi.get_name_translator(species_name)
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
35
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
36 taxid = [name2taxid[_][0] for _ in species_name]
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
37
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
38 tree = ncbi.get_topology(taxid)
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
39
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
40 if options.treebest == "yes":
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
41 inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()}
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
42 else:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
43 inv_map = {str(v[0]): k for k, v in name2taxid.items()}
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
44
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
45
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
46 for leaf in tree:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
47 leaf.name = inv_map[leaf.name]
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
48
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
49 newickTree = tree.write(format=int(options.format))
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
50
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
51 if options.treebest == "yes":
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
52 newickTree = newickTree.rstrip(';')
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
53 newickTree = newickTree + "root;"
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
54
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
55 with open('newickTree.nhx', 'w') as newickFile:
276e3ee68c37 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff changeset
56 newickFile.write(newickTree)