annotate ete_genetree_splitter.py @ 14:d40b9a7debe5 draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 71b9c1035f713be174bfcf5ecb20804495f39258
author earlhaminst
date Thu, 07 Mar 2024 19:39:30 +0000
parents dc32007a6b36
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
1 from __future__ import print_function
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
2
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
3 import optparse
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
4 import os
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
5 import sys
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
6
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
7 from ete3 import PhyloTree
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
8
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
9
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
10 def main():
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
11 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]"
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
12 parser = optparse.OptionParser(usage=usage)
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
13 parser.add_option('--genetree', help='GeneTree in nhx format')
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
14 parser.add_option('--speciestree', help='Species Tree in nhx format')
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
15 parser.add_option('--ingroup', help='Species Tree in nhx format')
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
16 parser.add_option('--outgroup', help='Species Tree in nhx format')
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
17 parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)')
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
18 parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene')
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
19 parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose')
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
20 parser.add_option('--split', type='choice', choices=['dups', 'treeko', 'species'], dest="split", default='dups', help='Choose GeneTree splitting algorithms')
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
21 parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)')
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
22 parser.add_option('-d', '--dir', type='string', default="", help="Absolute or relative path to output directory. If directory does not exist it will be created")
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
23
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
24 options, args = parser.parse_args()
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
25
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
26 if options.dir and not os.path.exists(options.dir):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
27 os.makedirs(options.dir)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
28
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
29 if options.genetree is None:
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
30 parser.error("--genetree option must be specified, GeneTree in nhx format")
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
31
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
32 if os.stat(options.genetree).st_size == 0:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
33 sys.exit()
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
34
7
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
35 with open(options.genetree, 'r') as f:
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
36 contents = f.read()
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
37
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
38 # Remove empty NHX features that can be produced by TreeBest but break ete3
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
39 contents = contents.replace('[&&NHX]', '')
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
40
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
41 # reads single gene tree
7
6a5282f71f82 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents: 3
diff changeset
42 genetree = PhyloTree(contents)
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
43
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
44 # sets species naming function
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
45 if options.gene_node == 0:
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
46 genetree.set_species_naming_function(parse_sp_name)
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
47
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
48 # reconcile species tree with gene tree to help find out gene gain/lose
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
49 if options.gainlose:
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
50 if options.speciestree is None:
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
51 parser.error("--speciestree option must be specified, species tree in nhx format")
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
52
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
53 # reads species tree
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
54 speciestree = PhyloTree(options.speciestree, format=options.species_format)
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
55
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
56 # Removes '*' from Species names comes from Species tree configrured for TreeBest
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
57 for leaf in speciestree:
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
58 leaf.name = leaf.name.strip('*')
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
59
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
60 genetree, events = genetree.reconcile(speciestree)
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
61
9
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
62 if options.split == "dups":
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
63 # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes.
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
64 for cluster_id, node in enumerate(genetree.split_by_dups(), start=1):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
65 outfile = '{}_genetree.nhx'.format(cluster_id)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
66 if options.dir:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
67 outfile = os.path.join(options.dir, outfile)
9
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
68 with open(outfile, 'w') as f:
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
69 f.write(node.write(format=options.output_format))
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
70 elif options.split == "treeko":
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
71 # splits tree using the TreeKO algorithm.
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
72 ntrees, ndups, sptrees = genetree.get_speciation_trees()
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
73
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
74 for cluster_id, spt in enumerate(sptrees, start=1):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
75 outfile = '{}_genetree.nhx'.format(cluster_id)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
76 if options.dir:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
77 outfile = os.path.join(options.dir, outfile)
9
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
78 with open(outfile, 'w') as f:
b29ee6a16524 "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents: 7
diff changeset
79 f.write(spt.write(format=options.output_format))
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
80 elif options.split == "species":
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
81 ingroup = options.ingroup.split(",")
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
82 outgroup = options.outgroup.split(",")
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
83 cluster_id = 0
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
84
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
85 def split_tree_by_species(tree, ingroup, outgroup):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
86 nonlocal cluster_id
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
87
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
88 if len(outgroup) > 0:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
89 outgroup_bool = check_outgroup(tree, outgroup)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
90 else:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
91 outgroup_bool = True
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
92
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
93 if outgroup_bool and check_ingroup(tree, ingroup):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
94 child1, child2 = tree.children
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
95 split_tree_by_species(child1, ingroup, outgroup)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
96 split_tree_by_species(child2, ingroup, outgroup)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
97 else:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
98 cluster_id += 1
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
99 outfile = '{}_genetree.nhx'.format(cluster_id)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
100 if options.dir:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
101 outfile = os.path.join(options.dir, outfile)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
102 with open(outfile, 'w') as f:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
103 f.write(tree.write(format=options.output_format))
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
104
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
105 split_tree_by_species(genetree, ingroup, outgroup)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
106
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
107
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
108 def check_outgroup(tree, outgroup):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
109 species = get_species(tree)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
110
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
111 count = 0
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
112
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
113 for out in outgroup:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
114 if species.count(out) > 1:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
115 count = count + 1
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
116
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
117 return count >= len(outgroup) / 2
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
118
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
119
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
120 def check_ingroup(tree, ingroup):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
121 species = get_species(tree)
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
122
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
123 count = 0
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
124
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
125 for ing in ingroup:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
126 if species.count(ing) > 1:
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
127 count = count + 1
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
128
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
129 return count > 0 and len(ingroup) / count >= 0.8
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
130
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
131
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
132 def parse_sp_name(node_name):
12
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
133 return node_name.split("_")[-1]
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
134
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
135
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
136 def get_species(node):
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
137 leaves_list = node.get_leaf_names()
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
138 # Genetree nodes are required to be in gene_species format
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
139 leaves_list = [_ for _ in leaves_list if '_' in _]
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
140
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
141 species_list = [_.split("_")[-1] for _ in leaves_list]
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
142
dc32007a6b36 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents: 9
diff changeset
143 return species_list
3
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
144
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
145
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
146 if __name__ == "__main__":
077021c45b96 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff changeset
147 main()