Mercurial > repos > dannon > metaphlan
annotate metaphlan_to_phyloxml.py @ 1:016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
| author | Dannon Baker <dannonbaker@me.com> | 
|---|---|
| date | Tue, 03 Apr 2012 16:30:27 -0600 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
1
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
2 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
3 """ | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
4 Read metaphaln output summarizing taxonomic distribution and format in PhyloXML format | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
5 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
6 usage: %prog metaphlan.txt phylo.xml | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
7 """ | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
8 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
9 import sys | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
10 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
11 # Metaphlan output looks like: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
12 # k__Bacteria 99.07618 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
13 # k__Archaea 0.92382 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
14 # k__Bacteria|p__Proteobacteria 82.50732 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
15 # k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 81.64905 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
16 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
17 rank_map = { 'k__': 'kingdom', 'p__': 'phylum', 'c__': 'class', 'o__': 'order', 'f__': 'family', 'g__': 'genus', 's__': 'species' } | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
18 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
19 class Node( object ): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
20 """Node in a taxonomy""" | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
21 def __init__( self, rank=None, name=None ): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
22 self.rank = rank | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
23 self.name = name | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
24 self.value = None | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
25 self.children = dict() | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
26 @staticmethod | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
27 def from_metaphlan_file( file ): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
28 """ | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
29 Build tree from metaphlan output | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
30 """ | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
31 root = Node() | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
32 for line in file: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
33 taxa, abundance = line.split() | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
34 parts = taxa.split( "|" ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
35 root.add( parts, abundance ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
36 return root | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
37 def add( self, parts, value ): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
38 """ | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
39 Parts is a list of node names, recursively add nodes until we reach | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
40 the last part, and then attach the value to that node. | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
41 """ | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
42 if len( parts ) == 0: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
43 self.value = value | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
44 else: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
45 next_part = parts.pop(0) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
46 rank = rank_map[ next_part[:3] ] | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
47 name = next_part[3:] | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
48 if name not in self.children: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
49 self.children[name] = Node( rank, name ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
50 self.children[name].add( parts, value ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
51 def __str__( self ): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
52 if self.children: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
53 return "(" + ",".join( str( child ) for child in self.children.itervalues() ) + "):" + self.name | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
54 else: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
55 return self.name | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
56 def to_phyloxml( self, out ): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
57 print >>out, "<clade>" | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
58 if self.name: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
59 print >>out, "<name>%s</name>" % self.name | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
60 print >>out, "<taxonomy><scientific_name>%s</scientific_name><rank>%s</rank></taxonomy>" % ( self.name, self.rank ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
61 if self.value: | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
62 print >>out, "<property datatype='xsd:float' ref='metaphlan:abundance' applies_to='node'>%s</property>" % self.value | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
63 ## print >>out, "<confidence type='abundance'>%s</confidence>" % self.value | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
64 for child in self.children.itervalues(): | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
65 child.to_phyloxml( out ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
66 print >>out, "</clade>" | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
67 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
68 out = open( sys.argv[2], 'w' ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
69 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
70 print >>out, '<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">' | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
71 print >>out, '<phylogeny rooted="true">' | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
72 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
73 Node.from_metaphlan_file( open( sys.argv[1] ) ).to_phyloxml( out ) | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
74 | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
75 print >>out, '</phylogeny>' | 
| 
 
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
 
Dannon Baker <dannonbaker@me.com> 
parents:  
diff
changeset
 | 
76 print >>out, '</phyloxml>' | 
