Mercurial > repos > recetox > table_pandas_rename_column
annotate table_pandas_rename_column.py @ 0:3f54cd56a65e draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author | recetox |
---|---|
date | Wed, 29 Jan 2025 15:35:31 +0000 |
parents | |
children |
rev | line source |
---|---|
0
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
1 import argparse |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
2 import logging |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
3 from typing import Tuple |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
4 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
5 import pandas as pd |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
6 from utils import KeyValuePairsAction, LoadDataAction, StoreOutputAction |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
7 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
8 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
9 def rename_columns(df: pd.DataFrame, rename_dict: dict): |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
10 """ |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
11 Rename columns in the dataframe based on the provided dictionary. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
12 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
13 Parameters: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
14 df (pd.DataFrame): The input dataframe. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
15 rename_dict (dict): A dictionary with 1-based column index as key and new column name as value. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
16 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
17 Returns: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
18 pd.DataFrame: The dataframe with renamed columns. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
19 """ |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
20 try: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
21 rename_map = { |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
22 df.columns[key - 1]: value for key, value in rename_dict.items() |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
23 } # Convert 1-based index to column name |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
24 return df.rename(columns=rename_map) |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
25 except IndexError as e: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
26 logging.error(f"Invalid column index: {e}") |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
27 raise |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
28 except Exception as e: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
29 logging.error(f"Error renaming columns: {e}") |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
30 raise |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
31 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
32 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
33 def main(input_dataset: pd.DataFrame, rename_dict: dict, output_dataset: Tuple[callable, str]): |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
34 """ |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
35 Main function to load the dataset, rename columns, and save the result. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
36 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
37 Parameters: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
38 input_dataset (pd.DataFrame): The input dataset . |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
39 rename_dict (dict): A dictionary with 1-based column index as key and new column name as value. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
40 output_dataset (tuple): The function to store the output dataset and the path. |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
41 """ |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
42 try: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
43 write_func, file_path = output_dataset |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
44 write_func(rename_columns(input_dataset, rename_dict), file_path) |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
45 except Exception as e: |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
46 logging.error(f"Error in main function: {e}") |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
47 raise |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
48 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
49 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
50 if __name__ == "__main__": |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
51 logging.basicConfig(level=logging.INFO) |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
52 parser = argparse.ArgumentParser(description="Rename columns in a dataframe.") |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
53 parser.add_argument( |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
54 "--input_dataset", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
55 nargs=2, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
56 action=LoadDataAction, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
57 required=True, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
58 help="Path to the input dataset and its file extension (csv, tsv, parquet)", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
59 ) |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
60 parser.add_argument( |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
61 "--rename", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
62 nargs="+", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
63 action=KeyValuePairsAction, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
64 required=True, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
65 help="List of key=value pairs with 1-based column index as key and new column name as value", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
66 ) |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
67 parser.add_argument( |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
68 "--output_dataset", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
69 nargs=2, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
70 action=StoreOutputAction, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
71 required=True, |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
72 help="Path to the output dataset and its file extension (csv, tsv, parquet)", |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
73 ) |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
74 |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
75 args = parser.parse_args() |
3f54cd56a65e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
recetox
parents:
diff
changeset
|
76 main(args.input_dataset, args.rename, args.output_dataset) |