Mercurial > repos > george-weingart > lefse
view home/ubuntu/lefse_to_export/lefse2circlader.py @ 2:a31c10fe09c8 draft default tip
Fixed bug due to numerical approximation after normalization affecting root-level clades (e.g. "Bacteria" or "Archaea")
author | george-weingart |
---|---|
date | Tue, 07 Jul 2015 13:52:29 -0400 |
parents | db64b6287cd6 |
children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import with_statement import sys import os import argparse def read_params(args): parser = argparse.ArgumentParser(description='Convert LEfSe output to ' 'Circlader input') parser.add_argument( 'inp_f', metavar='INPUT_FILE', nargs='?', default=None, type=str, help="the input file [stdin if not present]") parser.add_argument( 'out_f', metavar='OUTPUT_FILE', nargs='?', default=None, type=str, help="the output file [stdout if not present]") parser.add_argument('-l', metavar='levels with label', default=0, type=int) return vars(parser.parse_args()) def lefse2circlader(par): finp,fout = bool(par['inp_f']), bool(par['out_f']) with open(par['inp_f']) if finp else sys.stdin as inpf: put_bm = (l.strip().split('\t') for l in inpf.readlines()) biomarkers = [p for p in put_bm if len(p) > 2] circ = [ [ b[0], "" if b[0].count('.') > par['l'] else b[0].split('.')[-1], b[2], b[2]+"_col" ] for b in biomarkers] with open(par['out_f'],'w') if fout else sys.stdout as out_file: for c in circ: out_file.write( "\t".join( c ) + "\n" ) if __name__ == '__main__': params = read_params(sys.argv) lefse2circlader(params)