Mercurial > repos > recetox > rem_complex
comparison rem_complex.py @ 3:567327a97ad2 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
author | recetox |
---|---|
date | Tue, 16 Apr 2024 11:30:27 +0000 |
parents | a0e07a0bc047 |
children |
comparison
equal
deleted
inserted
replaced
2:1dc9f5a3163d | 3:567327a97ad2 |
---|---|
7 | 7 |
8 def parse_arguments() -> argparse.Namespace: | 8 def parse_arguments() -> argparse.Namespace: |
9 parser = argparse.ArgumentParser() | 9 parser = argparse.ArgumentParser() |
10 parser.add_argument('-iformat', '--input_format', help='Input file format') | 10 parser.add_argument('-iformat', '--input_format', help='Input file format') |
11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') | 11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') |
12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name') | 12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name') |
13 args = parser.parse_args() | 13 args = parser.parse_args() |
14 return args | 14 return args |
15 | 15 |
16 | 16 |
17 def filter_csv_molecules(file_name: str, output_file_name: str) -> None: | 17 def filter_csv_tsv_molecules(file_name: str, output_file_name: str, sep: str) -> None: |
18 """Removes molecules with '.' in SMILES string from csv file. | 18 """Removes molecules with '.' in SMILES string from csv or tsv file. |
19 | 19 |
20 Args: | 20 Args: |
21 file_name (str): Path to csv file that contains metadata. | 21 file_name (str): Path to csv or tsv file that contains metadata. |
22 output_file_name (str): Path to destination file, in csv format. | 22 output_file_name (str): Path to destination file, tsv format. |
23 sep (str): Separator used in the file (',' for csv, '\t' for tsv). | |
23 """ | 24 """ |
24 df = pd.read_csv(file_name) | 25 df = pd.read_csv(file_name, sep=sep) |
25 mask = df['smiles'].str.contains(".", na=False, regex=False) | 26 mask = df['smiles'].str.contains(".", na=False, regex=False) |
26 mask = mask.apply(lambda x: not x) | 27 mask = mask.apply(lambda x: not x) |
27 df[mask].to_csv(output_file_name, index=False) | 28 df[mask].to_csv(output_file_name, index=False, sep='\t') |
28 | 29 |
29 | 30 |
30 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: | 31 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
31 """Removes molecules with '.' in SMILES string from smi or inchi files. | 32 """Removes molecules with '.' in SMILES string from smi or inchi files. |
32 | 33 |
45 | 46 |
46 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: | 47 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
47 """Removes molecular complexes depending on the input format. | 48 """Removes molecular complexes depending on the input format. |
48 | 49 |
49 Args: | 50 Args: |
50 file_name (str): Path to csv, smi or inchi files | 51 file_name (str): Path to csv, tsv, smi, or inchi files. |
51 output_file_name (str): Path to destination files, in csv. smi or inchi formats. | 52 output_file_name (str): Path to destination files, in corresponding formats. |
52 input_format (str): Input file formats. | 53 input_format (str): Input file format. |
53 """ | 54 """ |
54 if input_format == 'csv': | 55 if input_format in ['csv', 'tsv']: |
55 filter_csv_molecules(file_name, output_file_name) | 56 sep = ',' if input_format == 'csv' else '\t' |
57 filter_csv_tsv_molecules(file_name, output_file_name, sep) | |
56 else: | 58 else: |
57 filter_other_format_molecules(file_name, output_file_name, input_format) | 59 filter_other_format_molecules(file_name, output_file_name, input_format) |
58 | 60 |
59 | 61 |
60 if __name__ == "__main__": | 62 if __name__ == "__main__": |