Mercurial > repos > earlhaminst > ete
comparison ete_homology_classifier.py @ 5:817031b8486d draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
author | earlhaminst |
---|---|
date | Thu, 10 May 2018 06:15:17 -0400 |
parents | |
children | f1eca1158f21 |
comparison
equal
deleted
inserted
replaced
4:87b6de3ef63e | 5:817031b8486d |
---|---|
1 from __future__ import print_function | |
2 | |
3 import optparse | |
4 | |
5 from ete3 import PhyloTree | |
6 | |
7 | |
8 def main(): | |
9 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" | |
10 parser = optparse.OptionParser(usage=usage) | |
11 parser.add_option('--genetree', help='GeneTree in nhx format') | |
12 parser.add_option('--out_format', type='string', default='tabular', help='Choose output format') | |
13 parser.add_option('--filters', default='', help='Filter families') | |
14 | |
15 options, args = parser.parse_args() | |
16 | |
17 if options.genetree is None: | |
18 parser.error("--genetree option must be specified, GeneTree in nhx format") | |
19 | |
20 # reads single gene tree | |
21 genetree = PhyloTree(options.genetree) | |
22 | |
23 leaves_list = genetree.get_leaf_names() | |
24 # Genetree nodes are required to be in gene_species format | |
25 leaves_list = [_ for _ in leaves_list if '_' in _] | |
26 | |
27 species_list = [_.split("_")[1] for _ in leaves_list] | |
28 | |
29 species_dict = {} | |
30 for species in species_list: | |
31 count = "one" | |
32 if species in species_dict: | |
33 count = "many" | |
34 species_dict[species] = count | |
35 | |
36 homologies = { | |
37 'one-to-one': [], | |
38 'one-to-many': [], | |
39 'many-to-one': [], | |
40 'many-to-many': [], | |
41 'paralogs': [] | |
42 } | |
43 | |
44 # stores relevant homology types in dict | |
45 for i, leaf1 in enumerate(leaves_list): | |
46 for leaf2 in leaves_list[i + 1:]: | |
47 id1 = leaf1.split(":")[1] if ":" in leaf1 else leaf1 | |
48 id2 = leaf2.split(":")[1] if ":" in leaf2 else leaf2 | |
49 species1 = id1.split("_")[1] | |
50 species2 = id2.split("_")[1] | |
51 if species1 == species2: | |
52 homology_type = 'paralogs' | |
53 else: | |
54 homology_type = species_dict[species1] + "-to-" + species_dict[species2] | |
55 homologies[homology_type].append((id1, id2)) | |
56 | |
57 options.filters = options.filters.split(",") | |
58 | |
59 if options.out_format == 'tabular': | |
60 for homology_type, homologs_list in homologies.items(): | |
61 # checks if homology type is in filter | |
62 if homology_type in options.filters: | |
63 for (gene1, gene2) in homologs_list: | |
64 print("%s\t%s\t%s" % (gene1, gene2, homology_type)) | |
65 elif options.out_format == 'csv': | |
66 print_family = True | |
67 for homology_type, homologs_list in homologies.items(): | |
68 if homologs_list and homology_type not in options.filters: | |
69 print_family = False | |
70 break | |
71 | |
72 # prints family if homology type is not found in filter | |
73 if print_family: | |
74 print(','.join(leaves_list)) | |
75 | |
76 | |
77 if __name__ == "__main__": | |
78 main() |