Mercurial > repos > dannon > metaphlan
comparison metaphlan_to_phyloxml.py @ 1:016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
author | Dannon Baker <dannonbaker@me.com> |
---|---|
date | Tue, 03 Apr 2012 16:30:27 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:4f3fe44edb3f | 1:016f6375aadc |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Read metaphaln output summarizing taxonomic distribution and format in PhyloXML format | |
5 | |
6 usage: %prog metaphlan.txt phylo.xml | |
7 """ | |
8 | |
9 import sys | |
10 | |
11 # Metaphlan output looks like: | |
12 # k__Bacteria 99.07618 | |
13 # k__Archaea 0.92382 | |
14 # k__Bacteria|p__Proteobacteria 82.50732 | |
15 # k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 81.64905 | |
16 | |
17 rank_map = { 'k__': 'kingdom', 'p__': 'phylum', 'c__': 'class', 'o__': 'order', 'f__': 'family', 'g__': 'genus', 's__': 'species' } | |
18 | |
19 class Node( object ): | |
20 """Node in a taxonomy""" | |
21 def __init__( self, rank=None, name=None ): | |
22 self.rank = rank | |
23 self.name = name | |
24 self.value = None | |
25 self.children = dict() | |
26 @staticmethod | |
27 def from_metaphlan_file( file ): | |
28 """ | |
29 Build tree from metaphlan output | |
30 """ | |
31 root = Node() | |
32 for line in file: | |
33 taxa, abundance = line.split() | |
34 parts = taxa.split( "|" ) | |
35 root.add( parts, abundance ) | |
36 return root | |
37 def add( self, parts, value ): | |
38 """ | |
39 Parts is a list of node names, recursively add nodes until we reach | |
40 the last part, and then attach the value to that node. | |
41 """ | |
42 if len( parts ) == 0: | |
43 self.value = value | |
44 else: | |
45 next_part = parts.pop(0) | |
46 rank = rank_map[ next_part[:3] ] | |
47 name = next_part[3:] | |
48 if name not in self.children: | |
49 self.children[name] = Node( rank, name ) | |
50 self.children[name].add( parts, value ) | |
51 def __str__( self ): | |
52 if self.children: | |
53 return "(" + ",".join( str( child ) for child in self.children.itervalues() ) + "):" + self.name | |
54 else: | |
55 return self.name | |
56 def to_phyloxml( self, out ): | |
57 print >>out, "<clade>" | |
58 if self.name: | |
59 print >>out, "<name>%s</name>" % self.name | |
60 print >>out, "<taxonomy><scientific_name>%s</scientific_name><rank>%s</rank></taxonomy>" % ( self.name, self.rank ) | |
61 if self.value: | |
62 print >>out, "<property datatype='xsd:float' ref='metaphlan:abundance' applies_to='node'>%s</property>" % self.value | |
63 ## print >>out, "<confidence type='abundance'>%s</confidence>" % self.value | |
64 for child in self.children.itervalues(): | |
65 child.to_phyloxml( out ) | |
66 print >>out, "</clade>" | |
67 | |
68 out = open( sys.argv[2], 'w' ) | |
69 | |
70 print >>out, '<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">' | |
71 print >>out, '<phylogeny rooted="true">' | |
72 | |
73 Node.from_metaphlan_file( open( sys.argv[1] ) ).to_phyloxml( out ) | |
74 | |
75 print >>out, '</phylogeny>' | |
76 print >>out, '</phyloxml>' |