Mercurial > repos > recetox > rename_annotated_feature
comparison rename_annotated_feature.py @ 0:268fcec93d9c draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rename_annotated_feature commit 7948bcdd36cec524d201712dc20c438973b4cc28
| author | recetox |
|---|---|
| date | Tue, 21 May 2024 07:44:25 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:268fcec93d9c |
|---|---|
| 1 import argparse | |
| 2 from collections import defaultdict | |
| 3 from typing import Tuple | |
| 4 | |
| 5 import pandas as pd | |
| 6 | |
| 7 | |
| 8 def parse_arguments() -> argparse.Namespace: | |
| 9 """Parses command-line arguments. | |
| 10 | |
| 11 Returns: | |
| 12 argparse.Namespace: Namespace with argument values as attributes. | |
| 13 """ | |
| 14 parser = argparse.ArgumentParser(description='Rename annotated feature.') | |
| 15 parser.add_argument('--annotations_table_path', type=str, required=True, help='Path to the annotations table file.') | |
| 16 parser.add_argument('--abundance_table_path', type=str, required=True, help='Path to the abundance table file.') | |
| 17 parser.add_argument('--mode', type=str, choices=['single', 'multiple'], default='single', help='Mode to use for renaming. Can be "single" or "multiple".') | |
| 18 parser.add_argument('--output_path', type=str, default='output.csv', help='Path to the output CSV file.') | |
| 19 return parser.parse_args() | |
| 20 | |
| 21 | |
| 22 def load_tables(annotations_table_path: str, abundance_table_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]: | |
| 23 """Loads annotation and abundance tables from files. | |
| 24 | |
| 25 Args: | |
| 26 annotations_table_path (str): Path to the annotations table file. | |
| 27 abundance_table_path (str): Path to the abundance table file. | |
| 28 | |
| 29 Returns: | |
| 30 Tuple[pd.DataFrame, pd.DataFrame]: Tuple of DataFrames for annotations and abundance tables. | |
| 31 """ | |
| 32 annotations_table = pd.read_table(annotations_table_path) | |
| 33 abundance_table = pd.read_table(abundance_table_path) | |
| 34 | |
| 35 annotations_table.columns = annotations_table.columns.str.strip() | |
| 36 abundance_table.columns = abundance_table.columns.str.strip() | |
| 37 | |
| 38 return annotations_table, abundance_table | |
| 39 | |
| 40 | |
| 41 def rename_single(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None: | |
| 42 """Renames columns in abundance table based on single best match in annotations table. | |
| 43 | |
| 44 Args: | |
| 45 annotations_table (pd.DataFrame): DataFrame of annotations. | |
| 46 abundance_table (pd.DataFrame): DataFrame of abundance data. | |
| 47 """ | |
| 48 scores_col = annotations_table.columns[-1] | |
| 49 ref_idxs = annotations_table.groupby("query")[scores_col].idxmax() | |
| 50 results = annotations_table.loc[ref_idxs] | |
| 51 | |
| 52 queries = results["query"] | |
| 53 refs = results["reference"] | |
| 54 | |
| 55 mapping = dict(zip(queries, refs)) | |
| 56 abundance_table.rename(columns=mapping, inplace=True) | |
| 57 | |
| 58 | |
| 59 def rename_multiple(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None: | |
| 60 """Renames columns in abundance table based on multiple matches in annotations table. | |
| 61 | |
| 62 Args: | |
| 63 annotations_table (pd.DataFrame): DataFrame of annotations. | |
| 64 abundance_table (pd.DataFrame): DataFrame of abundance data. | |
| 65 """ | |
| 66 queries = annotations_table["query"] | |
| 67 refs = annotations_table["reference"] | |
| 68 | |
| 69 mapping = defaultdict(list) | |
| 70 for query, ref in zip(queries, refs): | |
| 71 mapping[query].append(ref) | |
| 72 | |
| 73 for query, refs in mapping.items(): | |
| 74 new_column_name = ', '.join(refs) | |
| 75 if query in abundance_table.columns: | |
| 76 abundance_table.rename(columns={query: new_column_name}, inplace=True) | |
| 77 | |
| 78 | |
| 79 def main() -> None: | |
| 80 """Main function to parse arguments, load tables, rename columns, and save output.""" | |
| 81 args = parse_arguments() | |
| 82 | |
| 83 annotations_table, abundance_table = load_tables(args.annotations_table_path, args.abundance_table_path) | |
| 84 | |
| 85 if args.mode == "single": | |
| 86 rename_single(annotations_table, abundance_table) | |
| 87 else: | |
| 88 rename_multiple(annotations_table, abundance_table) | |
| 89 | |
| 90 abundance_table.to_csv(args.output_path, sep="\t", index=False) | |
| 91 | |
| 92 | |
| 93 if __name__ == "__main__": | |
| 94 main() |
