comparison pandas_pivot_table.py @ 1:c02f59711eb6 draft

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit af9d36fa3efb1cf136a69e7ed1a5f06261f9b0d6-dirty"
author jjohnson
date Wed, 16 Dec 2020 16:13:51 +0000
parents 621144f8dbe9
children 6f05390deffa
comparison
equal deleted inserted replaced
0:621144f8dbe9 1:c02f59711eb6
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import argparse 3 import argparse
4 import json 4 import json
5 import re
6 import pandas as pd 5 import pandas as pd
7 import sys 6 import sys
8 from json.decoder import JSONDecodeError 7 from json.decoder import JSONDecodeError
9 8
10 9
77 return name 76 return name
78 else: 77 else:
79 try: 78 try:
80 i = int(name) 79 i = int(name)
81 return dfcols[i] 80 return dfcols[i]
82 except: 81 except Exception:
83 print('%s not a column in %s' % (name, dfcols), file=sys.stderr) 82 print('%s not a column in %s' % (name, dfcols),
83 file=sys.stderr)
84 exit(1) 84 exit(1)
85 85
86 def getColumns(val, dfcols): 86 def getColumns(val, dfcols):
87 fields = [v.strip() for v in val.split(',')] 87 fields = [v.strip() for v in val.split(',')]
88 cols = [] 88 cols = []
93 def getAggFunc(funcStr, dfcols): 93 def getAggFunc(funcStr, dfcols):
94 af = funcStr 94 af = funcStr
95 try: 95 try:
96 af = json.loads(funcStr) 96 af = json.loads(funcStr)
97 except JSONDecodeError as de: 97 except JSONDecodeError as de:
98 print('"%s" is not a json string: ' % funcStr, de.msg, file=sys.stderr) 98 print('"%s" is not a json string: ' % funcStr, de.msg,
99 file=sys.stderr)
99 exit(1) 100 exit(1)
100 if isinstance(af, dict): 101 if isinstance(af, dict):
101 aggfunc = {getColumn(k, dfcols) : v for k,v in af.items()} 102 aggfunc = {getColumn(k, dfcols): v for k, v in af.items()}
102 elif isinstance(af, list): 103 elif isinstance(af, list):
103 aggfunc = af 104 aggfunc = af
104 else: 105 else:
105 aggfunc = af 106 aggfunc = af
106 return aggfunc 107 return aggfunc
107 108
108 if args.prefix: 109 if args.prefix:
109 df = pd.read_table(args.input, skiprows=args.skiprows, header=None, prefix=args.prefix) 110 df = pd.read_table(args.input,
111 skiprows=args.skiprows,
112 header=None,
113 prefix=args.prefix)
110 elif args.header: 114 elif args.header:
111 df = pd.read_table(args.input, skiprows=args.skiprows, header=args.header) 115 df = pd.read_table(args.input,
116 skiprows=args.skiprows,
117 header=args.header)
112 else: 118 else:
113 df = pd.read_table(args.input, skiprows=args.skiprows) 119 df = pd.read_table(args.input, skiprows=args.skiprows)
114 df_columns = df.columns.tolist() 120 df_columns = df.columns.tolist()
115 index = getColumns(args.index, df_columns) 121 index = getColumns(args.index, df_columns)
116 columns = getColumns(args.columns, df_columns) 122 columns = getColumns(args.columns, df_columns)
118 fill_value = getValueType(args.fill_value) 124 fill_value = getValueType(args.fill_value)
119 aggfunc = getAggFunc(args.aggfunc, values) 125 aggfunc = getAggFunc(args.aggfunc, values)
120 pdf = df.pivot_table(index=index, columns=columns, 126 pdf = df.pivot_table(index=index, columns=columns,
121 values=values, aggfunc=aggfunc, 127 values=values, aggfunc=aggfunc,
122 fill_value=fill_value) 128 fill_value=fill_value)
123 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p for p in pdf.columns.tolist()] 129 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p
130 for p in pdf.columns.tolist()]
124 pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) 131 pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols)
125 132
126 133
127 if __name__ == "__main__": 134 if __name__ == "__main__":
128 __main__() 135 __main__()