Mercurial > repos > recetox > table_pandas_rename_column
view table_pandas_transform.py @ 0:3f54cd56a65e draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author | recetox |
---|---|
date | Wed, 29 Jan 2025 15:35:31 +0000 |
parents | |
children |
line wrap: on
line source
import argparse import logging from typing import Callable, List, Tuple import numpy as np import pandas as pd from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction # Define the available transformations TRANSFORMATIONS = { "log": np.log, "log10": np.log10, "ln": np.log, "sqrt": np.sqrt, "exp": np.exp, "abs": np.abs, "floor": np.floor, "ceil": np.ceil, } def apply_transformation( df: pd.DataFrame, columns: List[int], transformation: str ) -> pd.DataFrame: """ Apply the specified transformation to the given columns of the dataframe. Parameters: df (pd.DataFrame): The input dataframe. columns (List[int]): The 0-based indices of the columns to transform. transformation (str): The transformation to apply. Returns: pd.DataFrame: The dataframe with the transformation applied. """ try: transform_func = TRANSFORMATIONS[transformation] for column_index in columns: column_name = df.columns[column_index] df[column_name] = transform_func(df[column_name]) return df except KeyError as e: logging.error(f"Invalid transformation: {e}") raise except IndexError as e: logging.error(f"Invalid column index: {e}") raise except Exception as e: logging.error(f"Error applying transformation: {e}") raise def main( input_dataset: pd.DataFrame, columns: List[int], transformation: str, output_dataset: Tuple[Callable[[pd.DataFrame, str], None], str], ) -> None: """ Main function to load the dataset, apply the transformation, and save the result. Parameters: input_dataset (pd.DataFrame): The input dataset. columns (List[int]): The 0-based indices of the columns to transform. transformation (str): The transformation to apply. output_dataset (Tuple[Callable[[pd.DataFrame, str], None], str]): The output dataset and its file extension. """ try: df = apply_transformation(input_dataset, columns, transformation) write_func, file_path = output_dataset write_func(df, file_path) except Exception as e: logging.error(f"Error in main function: {e}") raise if __name__ == "__main__": logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( description="Apply mathematical transformations to dataframe columns." ) parser.add_argument( "--input_dataset", nargs=2, action=LoadDataAction, required=True, help="Path to the input dataset and its file extension (csv, tsv, parquet)", ) parser.add_argument( "--columns", action=SplitColumnIndicesAction, required=True, help="Comma-separated list of 1-based indices of the columns to apply the transformation on", ) parser.add_argument( "--transformation", type=str, choices=TRANSFORMATIONS.keys(), required=True, help="Transformation to apply", ) parser.add_argument( "--output_dataset", nargs=2, action=StoreOutputAction, required=True, help="Path to the output dataset and its file extension (csv, tsv, parquet)", ) args = parser.parse_args() main(args.input_dataset, args.columns, args.transformation, args.output_dataset)