Mercurial > repos > earlhaminst > ete
annotate ete_tree_generator.py @ 0:276e3ee68c37 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
author | earlhaminst |
---|---|
date | Thu, 15 Dec 2016 12:55:02 -0500 |
parents | |
children |
rev | line source |
---|---|
0
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
1 import optparse |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
2 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
3 from ete3 import NCBITaxa |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
4 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
5 ncbi = NCBITaxa() |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
6 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
7 parser = optparse.OptionParser() |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
8 parser.add_option('-s', '--species', dest="input_species_filename", |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
9 help='Species list in text format one species in each line') |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
10 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
11 parser.add_option('-f', '--format', type='choice', choices=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '100'], dest="format", |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
12 default='8', help='outpur format for tree') |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
13 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
14 parser.add_option('-t', '--treebest', type='choice', choices=['yes', 'no'], dest="treebest", |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
15 default='no', help='To be used in TreeBest') |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
16 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
17 parser.add_option('-d', '--database', type='choice', choices=['yes', 'no'], dest="database", |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
18 default='no', help='Update database') |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
19 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
20 options, args = parser.parse_args() |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
21 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
22 if options.database == "yes": |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
23 try: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
24 ncbi.update_taxonomy_database() |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
25 except: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
26 pass |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
27 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
28 if options.input_species_filename is None: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
29 raise Exception('-s option must be specified, Species list in text format one species in each line') |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
30 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
31 with open(options.input_species_filename) as f: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
32 species_name = [_.strip().replace('_', ' ') for _ in f.readlines()] |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
33 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
34 name2taxid = ncbi.get_name_translator(species_name) |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
35 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
36 taxid = [name2taxid[_][0] for _ in species_name] |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
37 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
38 tree = ncbi.get_topology(taxid) |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
39 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
40 if options.treebest == "yes": |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
41 inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()} |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
42 else: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
43 inv_map = {str(v[0]): k for k, v in name2taxid.items()} |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
44 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
45 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
46 for leaf in tree: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
47 leaf.name = inv_map[leaf.name] |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
48 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
49 newickTree = tree.write(format=int(options.format)) |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
50 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
51 if options.treebest == "yes": |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
52 newickTree = newickTree.rstrip(';') |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
53 newickTree = newickTree + "root;" |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
54 |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
55 with open('newickTree.nhx', 'w') as newickFile: |
276e3ee68c37
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
56 newickFile.write(newickTree) |