diff home/ubuntu/lefse_to_export/qiime2lefse.py @ 1:db64b6287cd6 draft

Modified datatypes
author george-weingart
date Wed, 20 Aug 2014 16:56:51 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/home/ubuntu/lefse_to_export/qiime2lefse.py	Wed Aug 20 16:56:51 2014 -0400
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import sys
+
+def read_params(args):
+    import argparse as ap
+    import textwrap
+
+    p = ap.ArgumentParser( description= "TBA" )
+    
+    p.add_argument( '--in', metavar='INPUT_FILE', type=str, 
+                    nargs='?', default=sys.stdin,
+                    help=   "the Qiime OTU table file "
+                            "[ stdin if not present ]" )
+    p.add_argument( '--md', metavar='METADATA_FILE', type=str, 
+                    nargs='?', default=None,
+                    help=   "the Qiime OTU table file " 
+                            "[ only OTU table without metadata if not present ]" )
+    p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, 
+                    nargs = '?', default=sys.stdout,
+                    help=   "the output file "
+                            "[stdout if not present]")
+
+    p.add_argument( '-c', metavar="class attribute", 
+                    type=str,
+                    help =  "the attribute to use as class"   )
+    p.add_argument( '-s', metavar="subclass attribute", 
+                    type=str,
+                    help =  "the attribute to use as subclass"   )
+    p.add_argument( '-u', metavar="subject attribute", 
+                    type=str,
+                    help =  "the attribute to use as subject"   )
+
+
+
+    return vars(p.parse_args()) 
+
+
+
+def qiime2lefse(  fin, fmd, fout, all_md, sel_md ):
+    with (fin if fin==sys.stdin else open(fin)) as inpf :
+        lines = [list(ll) for ll in 
+                    (zip(*[l.strip().split('\t') 
+                        for l in inpf.readlines()[1:]]) ) ]
+    for i,(l1,l2) in enumerate(zip( lines[0], lines[-1] )):
+        if not l2 == 'Consensus Lineage':
+            lines[-1][i] = l2+"|"+l1
+
+    data = dict([(l[0],l[1:]) for l in lines[1:]])
+    
+    md = {}
+    if fmd:
+        with open(fmd) as inpf:
+            mdlines = [l.strip().split('\t') for l in inpf.readlines()]
+  
+        mdf = mdlines[0][1:]
+
+        for l in mdlines:
+            mdd = dict(zip(mdf,l[1:]))
+            md[l[0]] = mdd
+
+    selected_md = md.values()[0].keys() if md else []
+
+    if not all_md:
+        selected_md = [s for s in sel_md if s]
+    
+    out_m = [   selected_md + 
+                list([d.replace(";","|").replace("\"","") for d in data[ 'Consensus Lineage' ]])    ]
+    for k,v in data.items():
+        if k == 'Consensus Lineage':
+            continue
+        out_m.append( [md[k][kmd] for kmd in selected_md] + list(v) )
+
+    with (fout if fout == sys.stdout else open( fout, "w" )) as outf:
+        for l in zip(*out_m):
+            outf.write( "\t".join(l) + "\n" )
+
+if __name__ == '__main__':
+    pars = read_params( sys.argv )
+  
+    qiime2lefse(   fin     = pars['in'],
+                   fmd     = pars['md'],
+                   fout    = pars['out'],
+                   all_md  = not pars['c'] and not pars['s'] and not pars['u'],
+                   sel_md  = [pars['c'],pars['s'],pars['u']])