diff rem_complex.py @ 3:567327a97ad2 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
author recetox
date Tue, 16 Apr 2024 11:30:27 +0000 (12 months ago)
parents a0e07a0bc047
children
line wrap: on
line diff
--- a/rem_complex.py	Fri Dec 01 11:19:32 2023 +0000
+++ b/rem_complex.py	Tue Apr 16 11:30:27 2024 +0000
@@ -9,22 +9,23 @@
     parser = argparse.ArgumentParser()
     parser.add_argument('-iformat', '--input_format', help='Input file format')
     parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name')
-    parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name')
+    parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name')
     args = parser.parse_args()
     return args
 
 
-def filter_csv_molecules(file_name: str, output_file_name: str) -> None:
-    """Removes molecules with '.' in SMILES string from csv file.
+def filter_csv_tsv_molecules(file_name: str, output_file_name: str, sep: str) -> None:
+    """Removes molecules with '.' in SMILES string from csv or tsv file.
 
     Args:
-        file_name (str): Path to csv file that contains metadata.
-        output_file_name (str): Path to destination file, in csv format.
+        file_name (str): Path to csv or tsv file that contains metadata.
+        output_file_name (str): Path to destination file, tsv format.
+        sep (str): Separator used in the file (',' for csv, '\t' for tsv).
     """
-    df = pd.read_csv(file_name)
+    df = pd.read_csv(file_name, sep=sep)
     mask = df['smiles'].str.contains(".", na=False, regex=False)
     mask = mask.apply(lambda x: not x)
-    df[mask].to_csv(output_file_name, index=False)
+    df[mask].to_csv(output_file_name, index=False, sep='\t')
 
 
 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None:
@@ -47,12 +48,13 @@
     """Removes molecular complexes depending on the input format.
 
     Args:
-        file_name (str): Path to csv, smi or inchi files
-        output_file_name (str): Path to destination files, in csv. smi or inchi formats.
-        input_format (str): Input file formats.
+        file_name (str): Path to csv, tsv, smi, or inchi files.
+        output_file_name (str): Path to destination files, in corresponding formats.
+        input_format (str): Input file format.
     """
-    if input_format == 'csv':
-        filter_csv_molecules(file_name, output_file_name)
+    if input_format in ['csv', 'tsv']:
+        sep = ',' if input_format == 'csv' else '\t'
+        filter_csv_tsv_molecules(file_name, output_file_name, sep)
     else:
         filter_other_format_molecules(file_name, output_file_name, input_format)