Mercurial > repos > devteam > column_maker
comparison column_maker.py @ 6:13b6f0007d9e draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit 35c79ccee143e8d178ebd24f6317888de3ca0187"
| author | devteam |
|---|---|
| date | Mon, 25 Jan 2021 10:00:12 +0000 |
| parents | 9cd341095afd |
| children | 427903d47026 |
comparison
equal
deleted
inserted
replaced
| 5:9cd341095afd | 6:13b6f0007d9e |
|---|---|
| 7 """ | 7 """ |
| 8 | 8 |
| 9 import argparse | 9 import argparse |
| 10 import json | 10 import json |
| 11 import re | 11 import re |
| 12 import sys | |
| 12 | 13 |
| 13 parser = argparse.ArgumentParser() | 14 parser = argparse.ArgumentParser() |
| 14 parser.add_argument('input', type=argparse.FileType('r'), help="input file") | 15 parser.add_argument('input', type=argparse.FileType('r'), help="input file") |
| 15 parser.add_argument('output', type=argparse.FileType('wt'), help="output file") | 16 parser.add_argument('output', type=argparse.FileType('wt'), help="output file") |
| 16 parser.add_argument('cond', nargs='?', type=str, help="expression") | 17 parser.add_argument('cond', nargs='?', type=str, help="expression") |
| 17 parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'], | |
| 18 help="round result") | |
| 19 parser.add_argument('columns', nargs='?', type=int, help="number of columns") | 18 parser.add_argument('columns', nargs='?', type=int, help="number of columns") |
| 20 parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") | 19 parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") |
| 21 parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'], | 20 parser.add_argument('--round', action="store_true", |
| 21 help="round result") | |
| 22 parser.add_argument('--avoid_scientific_notation', action="store_true", | |
| 22 help="avoid scientific notation") | 23 help="avoid scientific notation") |
| 24 parser.add_argument('--header_new_column_name', default=None, type=str, | |
| 25 help="First line of input is a header line with column " | |
| 26 "names and this should become the name of the new " | |
| 27 "column") | |
| 23 parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), | 28 parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), |
| 24 help="overwrite parsed arguments from json file") | 29 help="overwrite parsed arguments from json file") |
| 25 args = parser.parse_args() | 30 args = parser.parse_args() |
| 26 | 31 |
| 27 argparse_dict = vars(args) | 32 argparse_dict = vars(args) |
| 31 | 36 |
| 32 fh = argparse_dict['input'] | 37 fh = argparse_dict['input'] |
| 33 out = argparse_dict['output'] | 38 out = argparse_dict['output'] |
| 34 expr = argparse_dict['cond'] | 39 expr = argparse_dict['cond'] |
| 35 round_result = argparse_dict['round'] | 40 round_result = argparse_dict['round'] |
| 41 avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] | |
| 42 | |
| 43 if argparse_dict['header_new_column_name'] is not None: | |
| 44 header_line = fh.readline().strip('\n') | |
| 45 out.write( | |
| 46 '{0}\t{1}\n'.format( | |
| 47 header_line, argparse_dict['header_new_column_name'] | |
| 48 ) | |
| 49 ) | |
| 36 try: | 50 try: |
| 37 in_columns = int(argparse_dict['columns']) | 51 in_columns = int(argparse_dict['columns']) |
| 52 if in_columns < 2: | |
| 53 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. | |
| 54 raise ValueError | |
| 38 except Exception: | 55 except Exception: |
| 39 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 56 if not fh.readline(): |
| 40 if in_columns < 2: | 57 # empty file content is ok and should produce empty output |
| 41 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. | 58 out.close() |
| 42 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 59 sys.exit() |
| 60 sys.exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | |
| 43 try: | 61 try: |
| 44 in_column_types = argparse_dict['column_types'].split(',') | 62 in_column_types = argparse_dict['column_types'].split(',') |
| 45 except Exception: | 63 except Exception: |
| 46 exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 64 sys.exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
| 47 if len(in_column_types) != in_columns: | 65 if len(in_column_types) != in_columns: |
| 48 exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 66 sys.exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
| 49 avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] | |
| 50 | |
| 51 # Unescape if input has been escaped | |
| 52 mapped_str = { | |
| 53 '__lt__': '<', | |
| 54 '__le__': '<=', | |
| 55 '__eq__': '==', | |
| 56 '__ne__': '!=', | |
| 57 '__gt__': '>', | |
| 58 '__ge__': '>=', | |
| 59 '__sq__': '\'', | |
| 60 '__dq__': '"', | |
| 61 } | |
| 62 for key, value in mapped_str.items(): | |
| 63 expr = expr.replace(key, value) | |
| 64 | 67 |
| 65 operators = 'is|not|or|and' | 68 operators = 'is|not|or|and' |
| 66 builtin_and_math_functions = 'abs|all|any|bin|chr|cmp|complex|divmod|float|bool|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|log10|exp|sqrt|ceil|floor' | 69 builtin_and_math_functions = 'abs|all|any|bin|chr|cmp|complex|divmod|float|bool|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|log10|exp|sqrt|ceil|floor' |
| 67 string_and_list_methods = [name for name in dir('') + dir([]) if not name.startswith('_')] | 70 string_and_list_methods = [name for name in dir('') + dir([]) if not name.startswith('_')] |
| 68 whitelist = r"^([c0-9\+\-\*\/\(\)\.\'\"><=,:! ]|%s|%s|%s)*$" % (operators, builtin_and_math_functions, '|'.join(string_and_list_methods)) | 71 whitelist = r"^([c0-9\+\-\*\/\(\)\.\'\"><=,:! ]|%s|%s|%s)*$" % (operators, builtin_and_math_functions, '|'.join(string_and_list_methods)) |
| 69 if not re.compile(whitelist).match(expr): | 72 if not re.compile(whitelist).match(expr): |
| 70 exit("Invalid expression") | 73 sys.exit("Invalid expression") |
| 71 if avoid_scientific_notation == "yes": | 74 if avoid_scientific_notation: |
| 72 expr = "format_float_positional(%s)" % expr | 75 expr = "format_float_positional(%s)" % expr |
| 73 | 76 |
| 74 # Prepare the column variable names and wrappers for column data types | 77 # Prepare the column variable names and wrappers for column data types |
| 75 cols, type_casts = [], [] | 78 cols, type_casts = [], [] |
| 76 for col in range(1, in_columns + 1): | 79 for col in range(1, in_columns + 1): |
| 77 col_name = "c%d" % col | 80 col_name = "c%d" % col |
| 78 cols.append(col_name) | 81 cols.append(col_name) |
| 79 col_type = in_column_types[col - 1].strip() | 82 col_type = in_column_types[col - 1].strip() |
| 80 if round_result == 'no' and col_type == 'int': | 83 if not round_result and col_type == 'int': |
| 81 col_type = 'float' | 84 col_type = 'float' |
| 82 type_cast = "%s(%s)" % (col_type, col_name) | 85 type_cast = "%s(%s)" % (col_type, col_name) |
| 83 type_casts.append(type_cast) | 86 type_casts.append(type_cast) |
| 84 | 87 |
| 85 col_str = ', '.join(cols) # 'c1, c2, c3, c4' | 88 col_str = ', '.join(cols) # 'c1, c2, c3, c4' |
| 116 continue | 119 continue |
| 117 try: | 120 try: |
| 118 %s | 121 %s |
| 119 %s | 122 %s |
| 120 new_val = %s | 123 new_val = %s |
| 121 if round_result == "yes": | 124 if round_result: |
| 122 new_val = int(round(new_val)) | 125 new_val = int(round(new_val)) |
| 123 new_line = line + '\\t' + str(new_val) + "\\n" | 126 new_line = line + '\\t' + str(new_val) + "\\n" |
| 124 out.write(new_line) | 127 out.write(new_line) |
| 125 lines_kept += 1 | 128 lines_kept += 1 |
| 126 except Exception: | 129 except Exception: |
| 136 exec(code) | 139 exec(code) |
| 137 except Exception as e: | 140 except Exception as e: |
| 138 out.close() | 141 out.close() |
| 139 if str(e).startswith('invalid syntax'): | 142 if str(e).startswith('invalid syntax'): |
| 140 valid_expr = False | 143 valid_expr = False |
| 141 exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) | 144 sys.exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) |
| 142 else: | 145 else: |
| 143 exit(str(e)) | 146 sys.exit(str(e)) |
| 144 | 147 |
| 145 if valid_expr: | 148 if valid_expr: |
| 146 out.close() | 149 out.close() |
| 147 valid_lines = total_lines - skipped_lines | 150 valid_lines = total_lines - skipped_lines |
| 148 print('Creating column %d with expression %s' % (in_columns + 1, expr)) | 151 print('Creating column %d with expression %s' % (in_columns + 1, expr)) |
