Mercurial > repos > jjohnson > pandas_pivot_table
comparison pandas_pivot_table.py @ 1:c02f59711eb6 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit af9d36fa3efb1cf136a69e7ed1a5f06261f9b0d6-dirty"
author | jjohnson |
---|---|
date | Wed, 16 Dec 2020 16:13:51 +0000 |
parents | 621144f8dbe9 |
children | 6f05390deffa |
comparison
equal
deleted
inserted
replaced
0:621144f8dbe9 | 1:c02f59711eb6 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import argparse | 3 import argparse |
4 import json | 4 import json |
5 import re | |
6 import pandas as pd | 5 import pandas as pd |
7 import sys | 6 import sys |
8 from json.decoder import JSONDecodeError | 7 from json.decoder import JSONDecodeError |
9 | 8 |
10 | 9 |
77 return name | 76 return name |
78 else: | 77 else: |
79 try: | 78 try: |
80 i = int(name) | 79 i = int(name) |
81 return dfcols[i] | 80 return dfcols[i] |
82 except: | 81 except Exception: |
83 print('%s not a column in %s' % (name, dfcols), file=sys.stderr) | 82 print('%s not a column in %s' % (name, dfcols), |
83 file=sys.stderr) | |
84 exit(1) | 84 exit(1) |
85 | 85 |
86 def getColumns(val, dfcols): | 86 def getColumns(val, dfcols): |
87 fields = [v.strip() for v in val.split(',')] | 87 fields = [v.strip() for v in val.split(',')] |
88 cols = [] | 88 cols = [] |
93 def getAggFunc(funcStr, dfcols): | 93 def getAggFunc(funcStr, dfcols): |
94 af = funcStr | 94 af = funcStr |
95 try: | 95 try: |
96 af = json.loads(funcStr) | 96 af = json.loads(funcStr) |
97 except JSONDecodeError as de: | 97 except JSONDecodeError as de: |
98 print('"%s" is not a json string: ' % funcStr, de.msg, file=sys.stderr) | 98 print('"%s" is not a json string: ' % funcStr, de.msg, |
99 file=sys.stderr) | |
99 exit(1) | 100 exit(1) |
100 if isinstance(af, dict): | 101 if isinstance(af, dict): |
101 aggfunc = {getColumn(k, dfcols) : v for k,v in af.items()} | 102 aggfunc = {getColumn(k, dfcols): v for k, v in af.items()} |
102 elif isinstance(af, list): | 103 elif isinstance(af, list): |
103 aggfunc = af | 104 aggfunc = af |
104 else: | 105 else: |
105 aggfunc = af | 106 aggfunc = af |
106 return aggfunc | 107 return aggfunc |
107 | 108 |
108 if args.prefix: | 109 if args.prefix: |
109 df = pd.read_table(args.input, skiprows=args.skiprows, header=None, prefix=args.prefix) | 110 df = pd.read_table(args.input, |
111 skiprows=args.skiprows, | |
112 header=None, | |
113 prefix=args.prefix) | |
110 elif args.header: | 114 elif args.header: |
111 df = pd.read_table(args.input, skiprows=args.skiprows, header=args.header) | 115 df = pd.read_table(args.input, |
116 skiprows=args.skiprows, | |
117 header=args.header) | |
112 else: | 118 else: |
113 df = pd.read_table(args.input, skiprows=args.skiprows) | 119 df = pd.read_table(args.input, skiprows=args.skiprows) |
114 df_columns = df.columns.tolist() | 120 df_columns = df.columns.tolist() |
115 index = getColumns(args.index, df_columns) | 121 index = getColumns(args.index, df_columns) |
116 columns = getColumns(args.columns, df_columns) | 122 columns = getColumns(args.columns, df_columns) |
118 fill_value = getValueType(args.fill_value) | 124 fill_value = getValueType(args.fill_value) |
119 aggfunc = getAggFunc(args.aggfunc, values) | 125 aggfunc = getAggFunc(args.aggfunc, values) |
120 pdf = df.pivot_table(index=index, columns=columns, | 126 pdf = df.pivot_table(index=index, columns=columns, |
121 values=values, aggfunc=aggfunc, | 127 values=values, aggfunc=aggfunc, |
122 fill_value=fill_value) | 128 fill_value=fill_value) |
123 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p for p in pdf.columns.tolist()] | 129 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p |
130 for p in pdf.columns.tolist()] | |
124 pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) | 131 pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) |
125 | 132 |
126 | 133 |
127 if __name__ == "__main__": | 134 if __name__ == "__main__": |
128 __main__() | 135 __main__() |