Mercurial > repos > jjohnson > pandas_pivot_table
comparison pandas_pivot_table.py @ 0:621144f8dbe9 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit 80684939b0bf75abb5cc70a9878054c1f734b651-dirty"
| author | jjohnson |
|---|---|
| date | Wed, 02 Dec 2020 22:59:57 +0000 |
| parents | |
| children | c02f59711eb6 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:621144f8dbe9 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import json | |
| 5 import re | |
| 6 import pandas as pd | |
| 7 import sys | |
| 8 from json.decoder import JSONDecodeError | |
| 9 | |
| 10 | |
| 11 def __main__(): | |
| 12 p = argparse.ArgumentParser() | |
| 13 p.add_argument( | |
| 14 '-i', '--input', | |
| 15 type=argparse.FileType('r'), | |
| 16 required=True, | |
| 17 help='Tabular input file to pivot' | |
| 18 ) | |
| 19 p.add_argument( | |
| 20 '-o', '--output', | |
| 21 type=argparse.FileType('w'), | |
| 22 required=True, | |
| 23 help='Output file' | |
| 24 ) | |
| 25 p.add_argument( | |
| 26 '-S', '--skiprows', | |
| 27 type=int, | |
| 28 default=0, | |
| 29 help='Input column names' | |
| 30 ) | |
| 31 p.add_argument( | |
| 32 '-H', '--header', | |
| 33 default=None, | |
| 34 help='Input column names' | |
| 35 ) | |
| 36 p.add_argument( | |
| 37 '-P', '--prefix', | |
| 38 default=None, | |
| 39 help='Prefix for input column names' | |
| 40 ) | |
| 41 p.add_argument( | |
| 42 '-I', '--index', | |
| 43 help='index columns' | |
| 44 ) | |
| 45 p.add_argument( | |
| 46 '-C', '--columns', | |
| 47 help='columns values which are returned as columns' | |
| 48 ) | |
| 49 p.add_argument( | |
| 50 '-V', '--values', | |
| 51 help='values' | |
| 52 ) | |
| 53 p.add_argument( | |
| 54 '-F', '--aggfunc', | |
| 55 help='aggregate functions on the values' | |
| 56 ) | |
| 57 p.add_argument( | |
| 58 '-N', '--fill_value', | |
| 59 default=None, | |
| 60 help='fill value for missing values' | |
| 61 ) | |
| 62 args = p.parse_args() | |
| 63 | |
| 64 def getValueType(val): | |
| 65 if val or 0. == val: | |
| 66 try: | |
| 67 return int(val) | |
| 68 except ValueError: | |
| 69 try: | |
| 70 return float(val) | |
| 71 except ValueError: | |
| 72 return val | |
| 73 return None | |
| 74 | |
| 75 def getColumn(name, dfcols): | |
| 76 if name in dfcols: | |
| 77 return name | |
| 78 else: | |
| 79 try: | |
| 80 i = int(name) | |
| 81 return dfcols[i] | |
| 82 except: | |
| 83 print('%s not a column in %s' % (name, dfcols), file=sys.stderr) | |
| 84 exit(1) | |
| 85 | |
| 86 def getColumns(val, dfcols): | |
| 87 fields = [v.strip() for v in val.split(',')] | |
| 88 cols = [] | |
| 89 for name in fields: | |
| 90 cols.append(getColumn(name, dfcols)) | |
| 91 return cols | |
| 92 | |
| 93 def getAggFunc(funcStr, dfcols): | |
| 94 af = funcStr | |
| 95 try: | |
| 96 af = json.loads(funcStr) | |
| 97 except JSONDecodeError as de: | |
| 98 print('"%s" is not a json string: ' % funcStr, de.msg, file=sys.stderr) | |
| 99 exit(1) | |
| 100 if isinstance(af, dict): | |
| 101 aggfunc = {getColumn(k, dfcols) : v for k,v in af.items()} | |
| 102 elif isinstance(af, list): | |
| 103 aggfunc = af | |
| 104 else: | |
| 105 aggfunc = af | |
| 106 return aggfunc | |
| 107 | |
| 108 if args.prefix: | |
| 109 df = pd.read_table(args.input, skiprows=args.skiprows, header=None, prefix=args.prefix) | |
| 110 elif args.header: | |
| 111 df = pd.read_table(args.input, skiprows=args.skiprows, header=args.header) | |
| 112 else: | |
| 113 df = pd.read_table(args.input, skiprows=args.skiprows) | |
| 114 df_columns = df.columns.tolist() | |
| 115 index = getColumns(args.index, df_columns) | |
| 116 columns = getColumns(args.columns, df_columns) | |
| 117 values = getColumns(args.values, df_columns) | |
| 118 fill_value = getValueType(args.fill_value) | |
| 119 aggfunc = getAggFunc(args.aggfunc, values) | |
| 120 pdf = df.pivot_table(index=index, columns=columns, | |
| 121 values=values, aggfunc=aggfunc, | |
| 122 fill_value=fill_value) | |
| 123 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p for p in pdf.columns.tolist()] | |
| 124 pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) | |
| 125 | |
| 126 | |
| 127 if __name__ == "__main__": | |
| 128 __main__() |
