comparison rename_annotated_feature.py @ 0:268fcec93d9c draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rename_annotated_feature commit 7948bcdd36cec524d201712dc20c438973b4cc28
author recetox
date Tue, 21 May 2024 07:44:25 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:268fcec93d9c
1 import argparse
2 from collections import defaultdict
3 from typing import Tuple
4
5 import pandas as pd
6
7
8 def parse_arguments() -> argparse.Namespace:
9 """Parses command-line arguments.
10
11 Returns:
12 argparse.Namespace: Namespace with argument values as attributes.
13 """
14 parser = argparse.ArgumentParser(description='Rename annotated feature.')
15 parser.add_argument('--annotations_table_path', type=str, required=True, help='Path to the annotations table file.')
16 parser.add_argument('--abundance_table_path', type=str, required=True, help='Path to the abundance table file.')
17 parser.add_argument('--mode', type=str, choices=['single', 'multiple'], default='single', help='Mode to use for renaming. Can be "single" or "multiple".')
18 parser.add_argument('--output_path', type=str, default='output.csv', help='Path to the output CSV file.')
19 return parser.parse_args()
20
21
22 def load_tables(annotations_table_path: str, abundance_table_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
23 """Loads annotation and abundance tables from files.
24
25 Args:
26 annotations_table_path (str): Path to the annotations table file.
27 abundance_table_path (str): Path to the abundance table file.
28
29 Returns:
30 Tuple[pd.DataFrame, pd.DataFrame]: Tuple of DataFrames for annotations and abundance tables.
31 """
32 annotations_table = pd.read_table(annotations_table_path)
33 abundance_table = pd.read_table(abundance_table_path)
34
35 annotations_table.columns = annotations_table.columns.str.strip()
36 abundance_table.columns = abundance_table.columns.str.strip()
37
38 return annotations_table, abundance_table
39
40
41 def rename_single(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None:
42 """Renames columns in abundance table based on single best match in annotations table.
43
44 Args:
45 annotations_table (pd.DataFrame): DataFrame of annotations.
46 abundance_table (pd.DataFrame): DataFrame of abundance data.
47 """
48 scores_col = annotations_table.columns[-1]
49 ref_idxs = annotations_table.groupby("query")[scores_col].idxmax()
50 results = annotations_table.loc[ref_idxs]
51
52 queries = results["query"]
53 refs = results["reference"]
54
55 mapping = dict(zip(queries, refs))
56 abundance_table.rename(columns=mapping, inplace=True)
57
58
59 def rename_multiple(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None:
60 """Renames columns in abundance table based on multiple matches in annotations table.
61
62 Args:
63 annotations_table (pd.DataFrame): DataFrame of annotations.
64 abundance_table (pd.DataFrame): DataFrame of abundance data.
65 """
66 queries = annotations_table["query"]
67 refs = annotations_table["reference"]
68
69 mapping = defaultdict(list)
70 for query, ref in zip(queries, refs):
71 mapping[query].append(ref)
72
73 for query, refs in mapping.items():
74 new_column_name = ', '.join(refs)
75 if query in abundance_table.columns:
76 abundance_table.rename(columns={query: new_column_name}, inplace=True)
77
78
79 def main() -> None:
80 """Main function to parse arguments, load tables, rename columns, and save output."""
81 args = parse_arguments()
82
83 annotations_table, abundance_table = load_tables(args.annotations_table_path, args.abundance_table_path)
84
85 if args.mode == "single":
86 rename_single(annotations_table, abundance_table)
87 else:
88 rename_multiple(annotations_table, abundance_table)
89
90 abundance_table.to_csv(args.output_path, sep="\t", index=False)
91
92
93 if __name__ == "__main__":
94 main()