rename_annotated_feature: rename_annotated

comparison rename_annotated_feature.py @ 0:268fcec93d9c draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rename_annotated_feature commit 7948bcdd36cec524d201712dc20c438973b4cc28

author	recetox
date	Tue, 21 May 2024 07:44:25 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:268fcec93d9c
+import argparse
+from collections import defaultdict
+from typing import Tuple
+import pandas as pd
+def parse_arguments() -> argparse.Namespace:
+"""Parses command-line arguments.
+Returns:
+argparse.Namespace: Namespace with argument values as attributes.
+"""
+parser = argparse.ArgumentParser(description='Rename annotated feature.')
+parser.add_argument('--annotations_table_path', type=str, required=True, help='Path to the annotations table file.')
+parser.add_argument('--abundance_table_path', type=str, required=True, help='Path to the abundance table file.')
+parser.add_argument('--mode', type=str, choices=['single', 'multiple'], default='single', help='Mode to use for renaming. Can be "single" or "multiple".')
+parser.add_argument('--output_path', type=str, default='output.csv', help='Path to the output CSV file.')
+return parser.parse_args()
+def load_tables(annotations_table_path: str, abundance_table_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
+"""Loads annotation and abundance tables from files.
+Args:
+annotations_table_path (str): Path to the annotations table file.
+abundance_table_path (str): Path to the abundance table file.
+Returns:
+Tuple[pd.DataFrame, pd.DataFrame]: Tuple of DataFrames for annotations and abundance tables.
+"""
+annotations_table = pd.read_table(annotations_table_path)
+abundance_table = pd.read_table(abundance_table_path)
+annotations_table.columns = annotations_table.columns.str.strip()
+abundance_table.columns = abundance_table.columns.str.strip()
+return annotations_table, abundance_table
+def rename_single(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None:
+"""Renames columns in abundance table based on single best match in annotations table.
+Args:
+annotations_table (pd.DataFrame): DataFrame of annotations.
+abundance_table (pd.DataFrame): DataFrame of abundance data.
+"""
+scores_col = annotations_table.columns[-1]
+ref_idxs = annotations_table.groupby("query")[scores_col].idxmax()
+results = annotations_table.loc[ref_idxs]
+queries = results["query"]
+refs = results["reference"]
+mapping = dict(zip(queries, refs))
+abundance_table.rename(columns=mapping, inplace=True)
+def rename_multiple(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None:
+"""Renames columns in abundance table based on multiple matches in annotations table.
+Args:
+annotations_table (pd.DataFrame): DataFrame of annotations.
+abundance_table (pd.DataFrame): DataFrame of abundance data.
+"""
+queries = annotations_table["query"]
+refs = annotations_table["reference"]
+mapping = defaultdict(list)
+for query, ref in zip(queries, refs):
+mapping[query].append(ref)
+for query, refs in mapping.items():
+new_column_name = ', '.join(refs)
+if query in abundance_table.columns:
+abundance_table.rename(columns={query: new_column_name}, inplace=True)
+def main() -> None:
+"""Main function to parse arguments, load tables, rename columns, and save output."""
+args = parse_arguments()
+annotations_table, abundance_table = load_tables(args.annotations_table_path, args.abundance_table_path)
+if args.mode == "single":
+rename_single(annotations_table, abundance_table)
+else:
+rename_multiple(annotations_table, abundance_table)
+abundance_table.to_csv(args.output_path, sep="\t", index=False)
+if __name__ == "__main__":
+main()

Mercurial > repos > recetox > rename_annotated_feature

comparison rename_annotated_feature.py @ 0:268fcec93d9c draft default tip