Mercurial > repos > iuc > table_compute
diff scripts/table_compute.py @ 1:dddadbbac949 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit 6820ec9431a22576f3716c40feeb27f0b8cf5e83"
author | iuc |
---|---|
date | Fri, 30 Aug 2019 05:28:18 -0400 |
parents | 1b0f96ed73f2 |
children | 02c3e335a695 |
line wrap: on
line diff
--- a/scripts/table_compute.py Sat Aug 17 16:25:37 2019 -0400 +++ b/scripts/table_compute.py Fri Aug 30 05:28:18 2019 -0400 @@ -3,7 +3,8 @@ Table Compute tool - a wrapper around pandas with parameter input validation. """ -__version__ = "0.8" + +__version__ = "0.9.1" import csv import math @@ -11,15 +12,17 @@ import numpy as np import pandas as pd -import userconfig as uc from safety import Safety -# This should be generated in the same directory -# Version command should not need to copy the config if len(argv) == 2 and argv[1] == "--version": print(__version__) exit(-1) +# The import below should be generated in the same directory as +# the table_compute.py script. +# It is placed here so that the --version switch does not fail +import userconfig as uc # noqa: I100,I202 + class Utils: @staticmethod @@ -37,12 +40,74 @@ "Returns a valid two value DataFrame or Series operator" return getattr(type(pd_obj), "__" + op_name + "__") + @staticmethod + def readcsv(filedict, narm): + data = pd.read_csv( + filedict["file"], + header=filedict["header"], + index_col=filedict["row_names"], + keep_default_na=narm, + nrows=filedict["nrows"], + skipfooter=filedict["skipfooter"], + skip_blank_lines=filedict["skip_blank_lines"], + sep='\t' + ) + # Fix whitespace issues in index or column names + data.columns = [col.strip() if type(col) is str else col + for col in data.columns] + data.index = [row.strip() if type(row) is str else row + for row in data.index] + return(data) -# Math is imported but not directly used because users -# may specify a "math.<function>" when inserting a custom -# function. To remove linting errors, which break CI testing -# we will just use an arbitrary math statement here. -__ = math.log + @staticmethod + def rangemaker(tab): + # e.g. "1:3,2:-2" specifies "1,2,3,2,1,0,-1,-2" to give [0,1,2,1,0,-1,-2] + # Positive indices are decremented by 1 to reference 0-base numbering + # Negative indices are unaltered, so that -1 refers to the last column + out = [] + err_mess = None + for ranges in tab.split(","): + nums = ranges.split(":") + if len(nums) == 1: + numb = int(nums[0]) + # Positive numbers get decremented. + # i.e. column "3" refers to index 2 + # column "-1" still refers to index -1 + if numb != 0: + out.append(numb if (numb < 0) else (numb - 1)) + else: + err_mess = "Please do not use 0 as an index" + elif len(nums) == 2: + left, right = map(int, nums) + if 0 in (left, right): + err_mess = "Please do not use 0 as an index" + elif left < right: + if left > 0: # and right > 0 too + # 1:3 to 0,1,2 + out.extend(range(left - 1, right)) + elif right < 0: # and left < 0 too + # -3:-1 to -3,-2,-1 + out.extend(range(left, right + 1)) + elif left < 0 and right > 0: + # -2:2 to -2,-1,0,1 + out.extend(range(left, 0)) + out.extend(range(0, right)) + elif right < left: + if right > 0: # and left > 0 + # 3:1 to 2,1,0 + out.extend(range(left - 1, right - 2, -1)) + elif left < 0: # and right < 0 + # -1:-3 to -1,-2,-3 + out.extend(range(left, right - 1, -1)) + elif right < 0 and left > 0: + # 2:-2 to 1,0,-1,-2 + out.extend(range(left - 1, right - 1, -1)) + else: + err_mess = "%s should not be equal or contain a zero" % nums + if err_mess: + print(err_mess) + return(None) + return(out) # Set decimal precision @@ -55,19 +120,7 @@ if user_mode == "single": # Read in TSV file - data = pd.read_csv( - uc.Data["tables"][0]["reader_file"], - header=uc.Data["tables"][0]["reader_header"], - index_col=uc.Data["tables"][0]["reader_row_col"], - keep_default_na=uc.Default["narm"], - sep='\t' - ) - # Fix whitespace issues in index or column names - data.columns = [col.strip() if type(col) is str else col - for col in data.columns] - data.index = [row.strip() if type(row) is str else row - for row in data.index] - + data = Utils.readcsv(uc.Data["tables"][0], uc.Default["narm"]) user_mode_single = params["user_mode_single"] if user_mode_single == "precision": @@ -79,9 +132,13 @@ rows_specified = params["select_rows_wanted"] # Select all indexes if empty array of values - if not cols_specified: + if cols_specified: + cols_specified = Utils.rangemaker(cols_specified) + else: cols_specified = range(len(data.columns)) - if not rows_specified: + if rows_specified: + rows_specified = Utils.rangemaker(rows_specified) + else: rows_specified = range(len(data)) # do not use duplicate indexes @@ -161,16 +218,44 @@ elif user_mode_single == "element": # lt, gt, ge, etc. operation = params["element_op"] + bool_mat = None if operation is not None: - op = Utils.getTwoValuePandaOp(operation, data) - value = params["element_value"] - try: - # Could be numeric - value = float(value) - except ValueError: - pass - # generate filter matrix of True/False values - bool_mat = op(data, value) + if operation == "rowcol": + # Select all indexes if empty array of values + if "element_cols" in params: + cols_specified = Utils.rangemaker(params["element_cols"]) + else: + cols_specified = range(len(data.columns)) + if "element_rows" in params: + rows_specified = Utils.rangemaker(params["element_rows"]) + else: + rows_specified = range(len(data)) + + # Inclusive selection: + # - True: Giving a row or column will match all elements in that row or column + # - False: Give a row or column will match only elements in both those rows or columns + inclusive = params["element_inclusive"] + + # Create a bool matrix (intialised to False) with selected + # rows and columns set to True + bool_mat = data.copy() + bool_mat[:] = False + if inclusive: + bool_mat.iloc[rows_specified, :] = True + bool_mat.iloc[:, cols_specified] = True + else: + bool_mat.iloc[rows_specified, cols_specified] = True + + else: + op = Utils.getTwoValuePandaOp(operation, data) + value = params["element_value"] + try: + # Could be numeric + value = float(value) + except ValueError: + pass + # generate filter matrix of True/False values + bool_mat = op(data, value) else: # implement no filtering through a filter matrix filled with # True values. @@ -265,13 +350,7 @@ # Read and populate tables for x, t_sect in enumerate(table_sections): - tmp = pd.read_csv( - t_sect["file"], - header=t_sect["header"], - index_col=t_sect["row_names"], - keep_default_na=uc.Default["narm"], - sep="\t" - ) + tmp = Utils.readcsv(t_sect, uc.Default["narm"]) table.append(tmp) table_names.append("table" + str(x + 1)) table_names_real.append("table[" + str(x) + "]")