Mercurial > repos > dannon > metaphlan
annotate metaphlan_to_phyloxml.py @ 6:e951f9d38339 default tip
Added metaphlan2krona
author | Dannon Baker <dannonbaker@me.com> |
---|---|
date | Tue, 08 Apr 2014 14:16:46 -0400 |
parents | 016f6375aadc |
children |
rev | line source |
---|---|
1
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
2 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
3 """ |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
4 Read metaphaln output summarizing taxonomic distribution and format in PhyloXML format |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
5 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
6 usage: %prog metaphlan.txt phylo.xml |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
7 """ |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
8 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
9 import sys |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
10 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
11 # Metaphlan output looks like: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
12 # k__Bacteria 99.07618 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
13 # k__Archaea 0.92382 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
14 # k__Bacteria|p__Proteobacteria 82.50732 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
15 # k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 81.64905 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
16 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
17 rank_map = { 'k__': 'kingdom', 'p__': 'phylum', 'c__': 'class', 'o__': 'order', 'f__': 'family', 'g__': 'genus', 's__': 'species' } |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
18 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
19 class Node( object ): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
20 """Node in a taxonomy""" |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
21 def __init__( self, rank=None, name=None ): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
22 self.rank = rank |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
23 self.name = name |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
24 self.value = None |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
25 self.children = dict() |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
26 @staticmethod |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
27 def from_metaphlan_file( file ): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
28 """ |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
29 Build tree from metaphlan output |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
30 """ |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
31 root = Node() |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
32 for line in file: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
33 taxa, abundance = line.split() |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
34 parts = taxa.split( "|" ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
35 root.add( parts, abundance ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
36 return root |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
37 def add( self, parts, value ): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
38 """ |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
39 Parts is a list of node names, recursively add nodes until we reach |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
40 the last part, and then attach the value to that node. |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
41 """ |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
42 if len( parts ) == 0: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
43 self.value = value |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
44 else: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
45 next_part = parts.pop(0) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
46 rank = rank_map[ next_part[:3] ] |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
47 name = next_part[3:] |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
48 if name not in self.children: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
49 self.children[name] = Node( rank, name ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
50 self.children[name].add( parts, value ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
51 def __str__( self ): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
52 if self.children: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
53 return "(" + ",".join( str( child ) for child in self.children.itervalues() ) + "):" + self.name |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
54 else: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
55 return self.name |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
56 def to_phyloxml( self, out ): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
57 print >>out, "<clade>" |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
58 if self.name: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
59 print >>out, "<name>%s</name>" % self.name |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
60 print >>out, "<taxonomy><scientific_name>%s</scientific_name><rank>%s</rank></taxonomy>" % ( self.name, self.rank ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
61 if self.value: |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
62 print >>out, "<property datatype='xsd:float' ref='metaphlan:abundance' applies_to='node'>%s</property>" % self.value |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
63 ## print >>out, "<confidence type='abundance'>%s</confidence>" % self.value |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
64 for child in self.children.itervalues(): |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
65 child.to_phyloxml( out ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
66 print >>out, "</clade>" |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
67 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
68 out = open( sys.argv[2], 'w' ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
69 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
70 print >>out, '<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">' |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
71 print >>out, '<phylogeny rooted="true">' |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
72 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
73 Node.from_metaphlan_file( open( sys.argv[1] ) ).to_phyloxml( out ) |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
74 |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
75 print >>out, '</phylogeny>' |
016f6375aadc
Initial commit of metaphlan_to_phyloxml converter.
Dannon Baker <dannonbaker@me.com>
parents:
diff
changeset
|
76 print >>out, '</phyloxml>' |