comparison ete_genetree_splitter.py @ 3:077021c45b96 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
author earlhaminst
date Mon, 12 Mar 2018 12:51:48 -0400
parents
children 6a5282f71f82
comparison
equal deleted inserted replaced
2:03c10736e497 3:077021c45b96
1 from __future__ import print_function
2
3 import optparse
4
5 from ete3 import PhyloTree
6
7
8 def main():
9 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]"
10 parser = optparse.OptionParser(usage=usage)
11 parser.add_option('--genetree', help='GeneTree in nhx format')
12 parser.add_option('--speciestree', help='Species Tree in nhx format')
13 parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)')
14 parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene')
15 parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose')
16 parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)')
17 options, args = parser.parse_args()
18
19 if options.genetree is None:
20 parser.error("--genetree option must be specified, GeneTree in nhx format")
21
22 # reads single gene tree
23 genetree = PhyloTree(options.genetree)
24
25 # sets species naming function
26 if options.gene_node == 0:
27 genetree.set_species_naming_function(parse_sp_name)
28
29 # reconcile species tree with gene tree to help find out gene gain/lose
30 if options.gainlose:
31
32 if options.speciestree is None:
33 parser.error("--speciestree option must be specified, species tree in nhx format")
34
35 # reads species tree
36 speciestree = PhyloTree(options.speciestree, format=options.species_format)
37
38 # Removes '*' from Species names comes from Species tree configrured for TreeBest
39 for leaf in speciestree:
40 leaf.name = leaf.name.strip('*')
41
42 genetree, events = genetree.reconcile(speciestree)
43
44 # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes.
45 for cluster_id, node in enumerate(genetree.split_by_dups(), 1):
46 outfile = str(cluster_id) + '_genetree.nhx'
47 with open(outfile, 'w') as f:
48 f.write(node.write(format=options.output_format))
49
50
51 def parse_sp_name(node_name):
52 return node_name.split("_")[1]
53
54
55 if __name__ == "__main__":
56 main()