Mercurial > repos > earlhaminst > ete
comparison ete_gene_cnv.py @ 8:16e925bf567e draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit ed32f2e6d8174873cefcbe141084f857f84b0586"
author | earlhaminst |
---|---|
date | Thu, 31 Oct 2019 07:48:59 -0400 |
parents | |
children | ed74587a13c8 |
comparison
equal
deleted
inserted
replaced
7:6a5282f71f82 | 8:16e925bf567e |
---|---|
1 from __future__ import print_function | |
2 | |
3 import argparse | |
4 import collections | |
5 | |
6 from ete3 import PhyloTree | |
7 | |
8 | |
9 def printTSV(myDict, colList=None): | |
10 """ Pretty print a list of dictionaries (myDict) as a dynamically sized table. | |
11 If column names (colList) aren't specified, they will show in random order. | |
12 Author: Thierry Husson - Use it as you want but don't blame me. | |
13 """ | |
14 if not colList: | |
15 colList = list(myDict[0].keys() if myDict else []) | |
16 | |
17 myList = [colList] | |
18 | |
19 for item in myDict: | |
20 myList.append([str(item[col] if item[col] is not None else '') for col in colList]) | |
21 | |
22 for item in myList: | |
23 print(*item, sep="\t") | |
24 | |
25 | |
26 def main(): | |
27 parser = argparse.ArgumentParser(description='Gene Copy Number Finder') | |
28 parser.add_argument('--genetree', required=True, help='GeneTree in nhx format') | |
29 parser.add_argument('--speciesorder', required=True, help='Comma-separated species list') | |
30 args = parser.parse_args() | |
31 | |
32 species_list = args.speciesorder.split(",") | |
33 species_list = [_.strip() for _ in species_list] | |
34 table = [] | |
35 | |
36 with open(args.genetree, "r") as f: | |
37 # reads multiple gene tree line by line gene tree | |
38 for line in f: | |
39 # Remove empty NHX features that can be produced by TreeBest but break ete3 | |
40 line = line.replace('[&&NHX]', '') | |
41 | |
42 # reads single gene tree | |
43 genetree = PhyloTree(line) | |
44 leaves = genetree.get_leaf_names() | |
45 | |
46 leaves_parts = [_.split("_") for _ in leaves] | |
47 for i, leaf_parts in enumerate(leaves_parts): | |
48 if len(leaf_parts) != 2: | |
49 raise Exception("Leaf node '%s' is not in gene_species format" % leaves[i]) | |
50 | |
51 leaves_species = [_[1] for _ in leaves_parts] | |
52 species_counter = collections.Counter(leaves_species) | |
53 | |
54 # Assign to ref_species the first element of species_list which | |
55 # appears in a leaf node | |
56 for ref_species in species_list: | |
57 if ref_species in species_counter: | |
58 break | |
59 else: | |
60 raise Exception("None of the specified species was found in the GeneTree '%s'" % line) | |
61 | |
62 # Find the gene of the (first) leaf node for the ref_species | |
63 for leaf_parts in leaves_parts: | |
64 if leaf_parts[1] == ref_species: | |
65 species_counter['gene'] = leaf_parts[0] | |
66 break | |
67 | |
68 table.append(species_counter) | |
69 | |
70 colList = ["gene"] + species_list | |
71 printTSV(table, colList) | |
72 | |
73 | |
74 if __name__ == "__main__": | |
75 main() |