Mercurial > repos > recetox > rename_annotated_feature
comparison rename_annotated_feature.py @ 0:268fcec93d9c draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rename_annotated_feature commit 7948bcdd36cec524d201712dc20c438973b4cc28
author | recetox |
---|---|
date | Tue, 21 May 2024 07:44:25 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:268fcec93d9c |
---|---|
1 import argparse | |
2 from collections import defaultdict | |
3 from typing import Tuple | |
4 | |
5 import pandas as pd | |
6 | |
7 | |
8 def parse_arguments() -> argparse.Namespace: | |
9 """Parses command-line arguments. | |
10 | |
11 Returns: | |
12 argparse.Namespace: Namespace with argument values as attributes. | |
13 """ | |
14 parser = argparse.ArgumentParser(description='Rename annotated feature.') | |
15 parser.add_argument('--annotations_table_path', type=str, required=True, help='Path to the annotations table file.') | |
16 parser.add_argument('--abundance_table_path', type=str, required=True, help='Path to the abundance table file.') | |
17 parser.add_argument('--mode', type=str, choices=['single', 'multiple'], default='single', help='Mode to use for renaming. Can be "single" or "multiple".') | |
18 parser.add_argument('--output_path', type=str, default='output.csv', help='Path to the output CSV file.') | |
19 return parser.parse_args() | |
20 | |
21 | |
22 def load_tables(annotations_table_path: str, abundance_table_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]: | |
23 """Loads annotation and abundance tables from files. | |
24 | |
25 Args: | |
26 annotations_table_path (str): Path to the annotations table file. | |
27 abundance_table_path (str): Path to the abundance table file. | |
28 | |
29 Returns: | |
30 Tuple[pd.DataFrame, pd.DataFrame]: Tuple of DataFrames for annotations and abundance tables. | |
31 """ | |
32 annotations_table = pd.read_table(annotations_table_path) | |
33 abundance_table = pd.read_table(abundance_table_path) | |
34 | |
35 annotations_table.columns = annotations_table.columns.str.strip() | |
36 abundance_table.columns = abundance_table.columns.str.strip() | |
37 | |
38 return annotations_table, abundance_table | |
39 | |
40 | |
41 def rename_single(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None: | |
42 """Renames columns in abundance table based on single best match in annotations table. | |
43 | |
44 Args: | |
45 annotations_table (pd.DataFrame): DataFrame of annotations. | |
46 abundance_table (pd.DataFrame): DataFrame of abundance data. | |
47 """ | |
48 scores_col = annotations_table.columns[-1] | |
49 ref_idxs = annotations_table.groupby("query")[scores_col].idxmax() | |
50 results = annotations_table.loc[ref_idxs] | |
51 | |
52 queries = results["query"] | |
53 refs = results["reference"] | |
54 | |
55 mapping = dict(zip(queries, refs)) | |
56 abundance_table.rename(columns=mapping, inplace=True) | |
57 | |
58 | |
59 def rename_multiple(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None: | |
60 """Renames columns in abundance table based on multiple matches in annotations table. | |
61 | |
62 Args: | |
63 annotations_table (pd.DataFrame): DataFrame of annotations. | |
64 abundance_table (pd.DataFrame): DataFrame of abundance data. | |
65 """ | |
66 queries = annotations_table["query"] | |
67 refs = annotations_table["reference"] | |
68 | |
69 mapping = defaultdict(list) | |
70 for query, ref in zip(queries, refs): | |
71 mapping[query].append(ref) | |
72 | |
73 for query, refs in mapping.items(): | |
74 new_column_name = ', '.join(refs) | |
75 if query in abundance_table.columns: | |
76 abundance_table.rename(columns={query: new_column_name}, inplace=True) | |
77 | |
78 | |
79 def main() -> None: | |
80 """Main function to parse arguments, load tables, rename columns, and save output.""" | |
81 args = parse_arguments() | |
82 | |
83 annotations_table, abundance_table = load_tables(args.annotations_table_path, args.abundance_table_path) | |
84 | |
85 if args.mode == "single": | |
86 rename_single(annotations_table, abundance_table) | |
87 else: | |
88 rename_multiple(annotations_table, abundance_table) | |
89 | |
90 abundance_table.to_csv(args.output_path, sep="\t", index=False) | |
91 | |
92 | |
93 if __name__ == "__main__": | |
94 main() |