Mercurial > repos > earlhaminst > ete
comparison ete_genetree_splitter.py @ 3:077021c45b96 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
author | earlhaminst |
---|---|
date | Mon, 12 Mar 2018 12:51:48 -0400 |
parents | |
children | 6a5282f71f82 |
comparison
equal
deleted
inserted
replaced
2:03c10736e497 | 3:077021c45b96 |
---|---|
1 from __future__ import print_function | |
2 | |
3 import optparse | |
4 | |
5 from ete3 import PhyloTree | |
6 | |
7 | |
8 def main(): | |
9 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" | |
10 parser = optparse.OptionParser(usage=usage) | |
11 parser.add_option('--genetree', help='GeneTree in nhx format') | |
12 parser.add_option('--speciestree', help='Species Tree in nhx format') | |
13 parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') | |
14 parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') | |
15 parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') | |
16 parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') | |
17 options, args = parser.parse_args() | |
18 | |
19 if options.genetree is None: | |
20 parser.error("--genetree option must be specified, GeneTree in nhx format") | |
21 | |
22 # reads single gene tree | |
23 genetree = PhyloTree(options.genetree) | |
24 | |
25 # sets species naming function | |
26 if options.gene_node == 0: | |
27 genetree.set_species_naming_function(parse_sp_name) | |
28 | |
29 # reconcile species tree with gene tree to help find out gene gain/lose | |
30 if options.gainlose: | |
31 | |
32 if options.speciestree is None: | |
33 parser.error("--speciestree option must be specified, species tree in nhx format") | |
34 | |
35 # reads species tree | |
36 speciestree = PhyloTree(options.speciestree, format=options.species_format) | |
37 | |
38 # Removes '*' from Species names comes from Species tree configrured for TreeBest | |
39 for leaf in speciestree: | |
40 leaf.name = leaf.name.strip('*') | |
41 | |
42 genetree, events = genetree.reconcile(speciestree) | |
43 | |
44 # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. | |
45 for cluster_id, node in enumerate(genetree.split_by_dups(), 1): | |
46 outfile = str(cluster_id) + '_genetree.nhx' | |
47 with open(outfile, 'w') as f: | |
48 f.write(node.write(format=options.output_format)) | |
49 | |
50 | |
51 def parse_sp_name(node_name): | |
52 return node_name.split("_")[1] | |
53 | |
54 | |
55 if __name__ == "__main__": | |
56 main() |