Mercurial > repos > recetox > rem_complex
diff rem_complex.py @ 3:567327a97ad2 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
author | recetox |
---|---|
date | Tue, 16 Apr 2024 11:30:27 +0000 (12 months ago) |
parents | a0e07a0bc047 |
children |
line wrap: on
line diff
--- a/rem_complex.py Fri Dec 01 11:19:32 2023 +0000 +++ b/rem_complex.py Tue Apr 16 11:30:27 2024 +0000 @@ -9,22 +9,23 @@ parser = argparse.ArgumentParser() parser.add_argument('-iformat', '--input_format', help='Input file format') parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') - parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name') + parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name') args = parser.parse_args() return args -def filter_csv_molecules(file_name: str, output_file_name: str) -> None: - """Removes molecules with '.' in SMILES string from csv file. +def filter_csv_tsv_molecules(file_name: str, output_file_name: str, sep: str) -> None: + """Removes molecules with '.' in SMILES string from csv or tsv file. Args: - file_name (str): Path to csv file that contains metadata. - output_file_name (str): Path to destination file, in csv format. + file_name (str): Path to csv or tsv file that contains metadata. + output_file_name (str): Path to destination file, tsv format. + sep (str): Separator used in the file (',' for csv, '\t' for tsv). """ - df = pd.read_csv(file_name) + df = pd.read_csv(file_name, sep=sep) mask = df['smiles'].str.contains(".", na=False, regex=False) mask = mask.apply(lambda x: not x) - df[mask].to_csv(output_file_name, index=False) + df[mask].to_csv(output_file_name, index=False, sep='\t') def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: @@ -47,12 +48,13 @@ """Removes molecular complexes depending on the input format. Args: - file_name (str): Path to csv, smi or inchi files - output_file_name (str): Path to destination files, in csv. smi or inchi formats. - input_format (str): Input file formats. + file_name (str): Path to csv, tsv, smi, or inchi files. + output_file_name (str): Path to destination files, in corresponding formats. + input_format (str): Input file format. """ - if input_format == 'csv': - filter_csv_molecules(file_name, output_file_name) + if input_format in ['csv', 'tsv']: + sep = ',' if input_format == 'csv' else '\t' + filter_csv_tsv_molecules(file_name, output_file_name, sep) else: filter_other_format_molecules(file_name, output_file_name, input_format)