Mercurial > repos > jjohnson > pandas_pivot_table
comparison pandas_pivot_table.py @ 0:621144f8dbe9 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit 80684939b0bf75abb5cc70a9878054c1f734b651-dirty"
author | jjohnson |
---|---|
date | Wed, 02 Dec 2020 22:59:57 +0000 |
parents | |
children | c02f59711eb6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:621144f8dbe9 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import json | |
5 import re | |
6 import pandas as pd | |
7 import sys | |
8 from json.decoder import JSONDecodeError | |
9 | |
10 | |
11 def __main__(): | |
12 p = argparse.ArgumentParser() | |
13 p.add_argument( | |
14 '-i', '--input', | |
15 type=argparse.FileType('r'), | |
16 required=True, | |
17 help='Tabular input file to pivot' | |
18 ) | |
19 p.add_argument( | |
20 '-o', '--output', | |
21 type=argparse.FileType('w'), | |
22 required=True, | |
23 help='Output file' | |
24 ) | |
25 p.add_argument( | |
26 '-S', '--skiprows', | |
27 type=int, | |
28 default=0, | |
29 help='Input column names' | |
30 ) | |
31 p.add_argument( | |
32 '-H', '--header', | |
33 default=None, | |
34 help='Input column names' | |
35 ) | |
36 p.add_argument( | |
37 '-P', '--prefix', | |
38 default=None, | |
39 help='Prefix for input column names' | |
40 ) | |
41 p.add_argument( | |
42 '-I', '--index', | |
43 help='index columns' | |
44 ) | |
45 p.add_argument( | |
46 '-C', '--columns', | |
47 help='columns values which are returned as columns' | |
48 ) | |
49 p.add_argument( | |
50 '-V', '--values', | |
51 help='values' | |
52 ) | |
53 p.add_argument( | |
54 '-F', '--aggfunc', | |
55 help='aggregate functions on the values' | |
56 ) | |
57 p.add_argument( | |
58 '-N', '--fill_value', | |
59 default=None, | |
60 help='fill value for missing values' | |
61 ) | |
62 args = p.parse_args() | |
63 | |
64 def getValueType(val): | |
65 if val or 0. == val: | |
66 try: | |
67 return int(val) | |
68 except ValueError: | |
69 try: | |
70 return float(val) | |
71 except ValueError: | |
72 return val | |
73 return None | |
74 | |
75 def getColumn(name, dfcols): | |
76 if name in dfcols: | |
77 return name | |
78 else: | |
79 try: | |
80 i = int(name) | |
81 return dfcols[i] | |
82 except: | |
83 print('%s not a column in %s' % (name, dfcols), file=sys.stderr) | |
84 exit(1) | |
85 | |
86 def getColumns(val, dfcols): | |
87 fields = [v.strip() for v in val.split(',')] | |
88 cols = [] | |
89 for name in fields: | |
90 cols.append(getColumn(name, dfcols)) | |
91 return cols | |
92 | |
93 def getAggFunc(funcStr, dfcols): | |
94 af = funcStr | |
95 try: | |
96 af = json.loads(funcStr) | |
97 except JSONDecodeError as de: | |
98 print('"%s" is not a json string: ' % funcStr, de.msg, file=sys.stderr) | |
99 exit(1) | |
100 if isinstance(af, dict): | |
101 aggfunc = {getColumn(k, dfcols) : v for k,v in af.items()} | |
102 elif isinstance(af, list): | |
103 aggfunc = af | |
104 else: | |
105 aggfunc = af | |
106 return aggfunc | |
107 | |
108 if args.prefix: | |
109 df = pd.read_table(args.input, skiprows=args.skiprows, header=None, prefix=args.prefix) | |
110 elif args.header: | |
111 df = pd.read_table(args.input, skiprows=args.skiprows, header=args.header) | |
112 else: | |
113 df = pd.read_table(args.input, skiprows=args.skiprows) | |
114 df_columns = df.columns.tolist() | |
115 index = getColumns(args.index, df_columns) | |
116 columns = getColumns(args.columns, df_columns) | |
117 values = getColumns(args.values, df_columns) | |
118 fill_value = getValueType(args.fill_value) | |
119 aggfunc = getAggFunc(args.aggfunc, values) | |
120 pdf = df.pivot_table(index=index, columns=columns, | |
121 values=values, aggfunc=aggfunc, | |
122 fill_value=fill_value) | |
123 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p for p in pdf.columns.tolist()] | |
124 pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) | |
125 | |
126 | |
127 if __name__ == "__main__": | |
128 __main__() |