Mercurial > repos > earlhaminst > ete
annotate ete_homology_classifier.py @ 5:817031b8486d draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
author | earlhaminst |
---|---|
date | Thu, 10 May 2018 06:15:17 -0400 |
parents | |
children | f1eca1158f21 |
rev | line source |
---|---|
5
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
2 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
3 import optparse |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
4 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
5 from ete3 import PhyloTree |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
6 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
7 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
8 def main(): |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
9 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
10 parser = optparse.OptionParser(usage=usage) |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
11 parser.add_option('--genetree', help='GeneTree in nhx format') |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
12 parser.add_option('--out_format', type='string', default='tabular', help='Choose output format') |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('--filters', default='', help='Filter families') |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
14 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
15 options, args = parser.parse_args() |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
16 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
17 if options.genetree is None: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
18 parser.error("--genetree option must be specified, GeneTree in nhx format") |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
19 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
20 # reads single gene tree |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
21 genetree = PhyloTree(options.genetree) |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
22 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
23 leaves_list = genetree.get_leaf_names() |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
24 # Genetree nodes are required to be in gene_species format |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
25 leaves_list = [_ for _ in leaves_list if '_' in _] |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
26 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
27 species_list = [_.split("_")[1] for _ in leaves_list] |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
28 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
29 species_dict = {} |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
30 for species in species_list: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
31 count = "one" |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
32 if species in species_dict: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
33 count = "many" |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
34 species_dict[species] = count |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
35 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
36 homologies = { |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
37 'one-to-one': [], |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
38 'one-to-many': [], |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
39 'many-to-one': [], |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
40 'many-to-many': [], |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
41 'paralogs': [] |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
42 } |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
43 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
44 # stores relevant homology types in dict |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
45 for i, leaf1 in enumerate(leaves_list): |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
46 for leaf2 in leaves_list[i + 1:]: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
47 id1 = leaf1.split(":")[1] if ":" in leaf1 else leaf1 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
48 id2 = leaf2.split(":")[1] if ":" in leaf2 else leaf2 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
49 species1 = id1.split("_")[1] |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
50 species2 = id2.split("_")[1] |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
51 if species1 == species2: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
52 homology_type = 'paralogs' |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
53 else: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
54 homology_type = species_dict[species1] + "-to-" + species_dict[species2] |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
55 homologies[homology_type].append((id1, id2)) |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
56 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
57 options.filters = options.filters.split(",") |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
58 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
59 if options.out_format == 'tabular': |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
60 for homology_type, homologs_list in homologies.items(): |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
61 # checks if homology type is in filter |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
62 if homology_type in options.filters: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
63 for (gene1, gene2) in homologs_list: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
64 print("%s\t%s\t%s" % (gene1, gene2, homology_type)) |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
65 elif options.out_format == 'csv': |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
66 print_family = True |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
67 for homology_type, homologs_list in homologies.items(): |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
68 if homologs_list and homology_type not in options.filters: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
69 print_family = False |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
70 break |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
71 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
72 # prints family if homology type is not found in filter |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
73 if print_family: |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
74 print(','.join(leaves_list)) |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
75 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
76 |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
77 if __name__ == "__main__": |
817031b8486d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
earlhaminst
parents:
diff
changeset
|
78 main() |