Mercurial > repos > jjohnson > pandas_pivot_table
view pandas_pivot_table.py @ 1:c02f59711eb6 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit af9d36fa3efb1cf136a69e7ed1a5f06261f9b0d6-dirty"
author | jjohnson |
---|---|
date | Wed, 16 Dec 2020 16:13:51 +0000 |
parents | 621144f8dbe9 |
children | 6f05390deffa |
line wrap: on
line source
#!/usr/bin/env python import argparse import json import pandas as pd import sys from json.decoder import JSONDecodeError def __main__(): p = argparse.ArgumentParser() p.add_argument( '-i', '--input', type=argparse.FileType('r'), required=True, help='Tabular input file to pivot' ) p.add_argument( '-o', '--output', type=argparse.FileType('w'), required=True, help='Output file' ) p.add_argument( '-S', '--skiprows', type=int, default=0, help='Input column names' ) p.add_argument( '-H', '--header', default=None, help='Input column names' ) p.add_argument( '-P', '--prefix', default=None, help='Prefix for input column names' ) p.add_argument( '-I', '--index', help='index columns' ) p.add_argument( '-C', '--columns', help='columns values which are returned as columns' ) p.add_argument( '-V', '--values', help='values' ) p.add_argument( '-F', '--aggfunc', help='aggregate functions on the values' ) p.add_argument( '-N', '--fill_value', default=None, help='fill value for missing values' ) args = p.parse_args() def getValueType(val): if val or 0. == val: try: return int(val) except ValueError: try: return float(val) except ValueError: return val return None def getColumn(name, dfcols): if name in dfcols: return name else: try: i = int(name) return dfcols[i] except Exception: print('%s not a column in %s' % (name, dfcols), file=sys.stderr) exit(1) def getColumns(val, dfcols): fields = [v.strip() for v in val.split(',')] cols = [] for name in fields: cols.append(getColumn(name, dfcols)) return cols def getAggFunc(funcStr, dfcols): af = funcStr try: af = json.loads(funcStr) except JSONDecodeError as de: print('"%s" is not a json string: ' % funcStr, de.msg, file=sys.stderr) exit(1) if isinstance(af, dict): aggfunc = {getColumn(k, dfcols): v for k, v in af.items()} elif isinstance(af, list): aggfunc = af else: aggfunc = af return aggfunc if args.prefix: df = pd.read_table(args.input, skiprows=args.skiprows, header=None, prefix=args.prefix) elif args.header: df = pd.read_table(args.input, skiprows=args.skiprows, header=args.header) else: df = pd.read_table(args.input, skiprows=args.skiprows) df_columns = df.columns.tolist() index = getColumns(args.index, df_columns) columns = getColumns(args.columns, df_columns) values = getColumns(args.values, df_columns) fill_value = getValueType(args.fill_value) aggfunc = getAggFunc(args.aggfunc, values) pdf = df.pivot_table(index=index, columns=columns, values=values, aggfunc=aggfunc, fill_value=fill_value) pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p for p in pdf.columns.tolist()] pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) if __name__ == "__main__": __main__()