Mercurial > repos > recetox > table_pandas_arithmetics
comparison table_pandas_rename_columns_regex.py @ 0:e6d5fee8c7a6 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
| author | recetox |
|---|---|
| date | Wed, 29 Jan 2025 15:35:42 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e6d5fee8c7a6 |
|---|---|
| 1 import argparse | |
| 2 import logging | |
| 3 import re | |
| 4 from typing import List, Tuple | |
| 5 | |
| 6 | |
| 7 import pandas as pd | |
| 8 from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction | |
| 9 | |
| 10 | |
| 11 def rename_columns( | |
| 12 df: pd.DataFrame, columns: List[int], regex_check: str, regex_replace: str | |
| 13 ) -> pd.DataFrame: | |
| 14 """ | |
| 15 Rename columns in the dataframe based on regex patterns. | |
| 16 | |
| 17 Parameters: | |
| 18 df (pd.DataFrame): The input dataframe. | |
| 19 columns (List[int]): The 0-based indices of the columns to rename. | |
| 20 regex_check (str): The regex pattern to check for in column names. | |
| 21 regex_replace (str): The regex pattern to replace with in column names. | |
| 22 | |
| 23 Returns: | |
| 24 pd.DataFrame: The dataframe with renamed columns. | |
| 25 """ | |
| 26 try: | |
| 27 # Map column indices to column names | |
| 28 column_names = [df.columns[i] for i in columns] | |
| 29 | |
| 30 # Rename the specified columns using the regex patterns | |
| 31 for column in column_names: | |
| 32 if column in df.columns: | |
| 33 new_column_name = re.sub(regex_check, regex_replace, column) | |
| 34 df.rename(columns={column: new_column_name}, inplace=True) | |
| 35 return df | |
| 36 except IndexError as e: | |
| 37 logging.error(f"Invalid column index: {e}") | |
| 38 raise | |
| 39 except re.error as e: | |
| 40 logging.error(f"Invalid regex pattern: {e}") | |
| 41 raise | |
| 42 except Exception as e: | |
| 43 logging.error(f"Error renaming columns: {e}") | |
| 44 raise | |
| 45 | |
| 46 | |
| 47 def main( | |
| 48 input_dataset: pd.DataFrame, | |
| 49 columns: List[int], | |
| 50 regex_check: str, | |
| 51 regex_replace: str, | |
| 52 output_dataset: Tuple[callable, str], | |
| 53 ) -> None: | |
| 54 """ | |
| 55 Main function to load the dataset, rename columns, and save the result. | |
| 56 | |
| 57 Parameters: | |
| 58 input_dataset (Tuple[pd.DataFrame, str]): The input dataset and its file extension. | |
| 59 columns (List[int]): The 0-based indices of the columns to rename. | |
| 60 regex_check (str): The regex pattern to check for in column names. | |
| 61 regex_replace (str): The regex pattern to replace with in column names. | |
| 62 output_dataset (Tuple[callable, str]): The output dataset and its file extension. | |
| 63 """ | |
| 64 try: | |
| 65 write_func, file_path = output_dataset | |
| 66 write_func(rename_columns(input_dataset, columns, regex_check, regex_replace), file_path) | |
| 67 except Exception as e: | |
| 68 logging.error(f"Error in main function: {e}") | |
| 69 raise | |
| 70 | |
| 71 | |
| 72 if __name__ == "__main__": | |
| 73 logging.basicConfig(level=logging.INFO) | |
| 74 parser = argparse.ArgumentParser( | |
| 75 description="Apply regex-based transformations on multiple dataframe columns." | |
| 76 ) | |
| 77 parser.add_argument( | |
| 78 "--input_dataset", | |
| 79 nargs=2, | |
| 80 action=LoadDataAction, | |
| 81 required=True, | |
| 82 help="Path to the input dataset and its file extension (csv, tsv, parquet)", | |
| 83 ) | |
| 84 parser.add_argument( | |
| 85 "--columns", | |
| 86 action=SplitColumnIndicesAction, | |
| 87 required=True, | |
| 88 help="Comma-separated list of 1-based indices of the columns to apply the transformation on", | |
| 89 ) | |
| 90 parser.add_argument( | |
| 91 "--regex_check", | |
| 92 type=str, | |
| 93 required=True, | |
| 94 help="Regex pattern to check for in column names", | |
| 95 ) | |
| 96 parser.add_argument( | |
| 97 "--regex_replace", | |
| 98 type=str, | |
| 99 required=True, | |
| 100 help="Regex pattern to replace with in column names", | |
| 101 ) | |
| 102 parser.add_argument( | |
| 103 "--output_dataset", | |
| 104 nargs=2, | |
| 105 action=StoreOutputAction, | |
| 106 required=True, | |
| 107 help="Path to the output dataset and its file extension (csv, tsv, parquet)", | |
| 108 ) | |
| 109 | |
| 110 args = parser.parse_args() | |
| 111 main( | |
| 112 args.input_dataset, | |
| 113 args.columns, | |
| 114 args.regex_check, | |
| 115 args.regex_replace, | |
| 116 args.output_dataset, | |
| 117 ) |
