Mercurial > repos > devteam > column_maker
comparison column_maker.py @ 6:13b6f0007d9e draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit 35c79ccee143e8d178ebd24f6317888de3ca0187"
author | devteam |
---|---|
date | Mon, 25 Jan 2021 10:00:12 +0000 |
parents | 9cd341095afd |
children | 427903d47026 |
comparison
equal
deleted
inserted
replaced
5:9cd341095afd | 6:13b6f0007d9e |
---|---|
7 """ | 7 """ |
8 | 8 |
9 import argparse | 9 import argparse |
10 import json | 10 import json |
11 import re | 11 import re |
12 import sys | |
12 | 13 |
13 parser = argparse.ArgumentParser() | 14 parser = argparse.ArgumentParser() |
14 parser.add_argument('input', type=argparse.FileType('r'), help="input file") | 15 parser.add_argument('input', type=argparse.FileType('r'), help="input file") |
15 parser.add_argument('output', type=argparse.FileType('wt'), help="output file") | 16 parser.add_argument('output', type=argparse.FileType('wt'), help="output file") |
16 parser.add_argument('cond', nargs='?', type=str, help="expression") | 17 parser.add_argument('cond', nargs='?', type=str, help="expression") |
17 parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'], | |
18 help="round result") | |
19 parser.add_argument('columns', nargs='?', type=int, help="number of columns") | 18 parser.add_argument('columns', nargs='?', type=int, help="number of columns") |
20 parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") | 19 parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") |
21 parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'], | 20 parser.add_argument('--round', action="store_true", |
21 help="round result") | |
22 parser.add_argument('--avoid_scientific_notation', action="store_true", | |
22 help="avoid scientific notation") | 23 help="avoid scientific notation") |
24 parser.add_argument('--header_new_column_name', default=None, type=str, | |
25 help="First line of input is a header line with column " | |
26 "names and this should become the name of the new " | |
27 "column") | |
23 parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), | 28 parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), |
24 help="overwrite parsed arguments from json file") | 29 help="overwrite parsed arguments from json file") |
25 args = parser.parse_args() | 30 args = parser.parse_args() |
26 | 31 |
27 argparse_dict = vars(args) | 32 argparse_dict = vars(args) |
31 | 36 |
32 fh = argparse_dict['input'] | 37 fh = argparse_dict['input'] |
33 out = argparse_dict['output'] | 38 out = argparse_dict['output'] |
34 expr = argparse_dict['cond'] | 39 expr = argparse_dict['cond'] |
35 round_result = argparse_dict['round'] | 40 round_result = argparse_dict['round'] |
41 avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] | |
42 | |
43 if argparse_dict['header_new_column_name'] is not None: | |
44 header_line = fh.readline().strip('\n') | |
45 out.write( | |
46 '{0}\t{1}\n'.format( | |
47 header_line, argparse_dict['header_new_column_name'] | |
48 ) | |
49 ) | |
36 try: | 50 try: |
37 in_columns = int(argparse_dict['columns']) | 51 in_columns = int(argparse_dict['columns']) |
52 if in_columns < 2: | |
53 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. | |
54 raise ValueError | |
38 except Exception: | 55 except Exception: |
39 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 56 if not fh.readline(): |
40 if in_columns < 2: | 57 # empty file content is ok and should produce empty output |
41 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. | 58 out.close() |
42 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 59 sys.exit() |
60 sys.exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | |
43 try: | 61 try: |
44 in_column_types = argparse_dict['column_types'].split(',') | 62 in_column_types = argparse_dict['column_types'].split(',') |
45 except Exception: | 63 except Exception: |
46 exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 64 sys.exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
47 if len(in_column_types) != in_columns: | 65 if len(in_column_types) != in_columns: |
48 exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 66 sys.exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
49 avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] | |
50 | |
51 # Unescape if input has been escaped | |
52 mapped_str = { | |
53 '__lt__': '<', | |
54 '__le__': '<=', | |
55 '__eq__': '==', | |
56 '__ne__': '!=', | |
57 '__gt__': '>', | |
58 '__ge__': '>=', | |
59 '__sq__': '\'', | |
60 '__dq__': '"', | |
61 } | |
62 for key, value in mapped_str.items(): | |
63 expr = expr.replace(key, value) | |
64 | 67 |
65 operators = 'is|not|or|and' | 68 operators = 'is|not|or|and' |
66 builtin_and_math_functions = 'abs|all|any|bin|chr|cmp|complex|divmod|float|bool|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|log10|exp|sqrt|ceil|floor' | 69 builtin_and_math_functions = 'abs|all|any|bin|chr|cmp|complex|divmod|float|bool|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|log10|exp|sqrt|ceil|floor' |
67 string_and_list_methods = [name for name in dir('') + dir([]) if not name.startswith('_')] | 70 string_and_list_methods = [name for name in dir('') + dir([]) if not name.startswith('_')] |
68 whitelist = r"^([c0-9\+\-\*\/\(\)\.\'\"><=,:! ]|%s|%s|%s)*$" % (operators, builtin_and_math_functions, '|'.join(string_and_list_methods)) | 71 whitelist = r"^([c0-9\+\-\*\/\(\)\.\'\"><=,:! ]|%s|%s|%s)*$" % (operators, builtin_and_math_functions, '|'.join(string_and_list_methods)) |
69 if not re.compile(whitelist).match(expr): | 72 if not re.compile(whitelist).match(expr): |
70 exit("Invalid expression") | 73 sys.exit("Invalid expression") |
71 if avoid_scientific_notation == "yes": | 74 if avoid_scientific_notation: |
72 expr = "format_float_positional(%s)" % expr | 75 expr = "format_float_positional(%s)" % expr |
73 | 76 |
74 # Prepare the column variable names and wrappers for column data types | 77 # Prepare the column variable names and wrappers for column data types |
75 cols, type_casts = [], [] | 78 cols, type_casts = [], [] |
76 for col in range(1, in_columns + 1): | 79 for col in range(1, in_columns + 1): |
77 col_name = "c%d" % col | 80 col_name = "c%d" % col |
78 cols.append(col_name) | 81 cols.append(col_name) |
79 col_type = in_column_types[col - 1].strip() | 82 col_type = in_column_types[col - 1].strip() |
80 if round_result == 'no' and col_type == 'int': | 83 if not round_result and col_type == 'int': |
81 col_type = 'float' | 84 col_type = 'float' |
82 type_cast = "%s(%s)" % (col_type, col_name) | 85 type_cast = "%s(%s)" % (col_type, col_name) |
83 type_casts.append(type_cast) | 86 type_casts.append(type_cast) |
84 | 87 |
85 col_str = ', '.join(cols) # 'c1, c2, c3, c4' | 88 col_str = ', '.join(cols) # 'c1, c2, c3, c4' |
116 continue | 119 continue |
117 try: | 120 try: |
118 %s | 121 %s |
119 %s | 122 %s |
120 new_val = %s | 123 new_val = %s |
121 if round_result == "yes": | 124 if round_result: |
122 new_val = int(round(new_val)) | 125 new_val = int(round(new_val)) |
123 new_line = line + '\\t' + str(new_val) + "\\n" | 126 new_line = line + '\\t' + str(new_val) + "\\n" |
124 out.write(new_line) | 127 out.write(new_line) |
125 lines_kept += 1 | 128 lines_kept += 1 |
126 except Exception: | 129 except Exception: |
136 exec(code) | 139 exec(code) |
137 except Exception as e: | 140 except Exception as e: |
138 out.close() | 141 out.close() |
139 if str(e).startswith('invalid syntax'): | 142 if str(e).startswith('invalid syntax'): |
140 valid_expr = False | 143 valid_expr = False |
141 exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) | 144 sys.exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) |
142 else: | 145 else: |
143 exit(str(e)) | 146 sys.exit(str(e)) |
144 | 147 |
145 if valid_expr: | 148 if valid_expr: |
146 out.close() | 149 out.close() |
147 valid_lines = total_lines - skipped_lines | 150 valid_lines = total_lines - skipped_lines |
148 print('Creating column %d with expression %s' % (in_columns + 1, expr)) | 151 print('Creating column %d with expression %s' % (in_columns + 1, expr)) |