Mercurial > repos > recetox > rem_complex
annotate rem_complex.py @ 0:a0e07a0bc047 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
author | recetox |
---|---|
date | Mon, 27 Nov 2023 09:04:04 +0000 |
parents | |
children | 567327a97ad2 |
rev | line source |
---|---|
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
1 import argparse |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
2 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
3 import pandas as pd |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
4 from openbabel import openbabel, pybel |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
5 openbabel.obErrorLog.SetOutputLevel(1) # 0: suppress warnings; 1: warnings |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
6 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
7 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
8 def parse_arguments() -> argparse.Namespace: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser() |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
10 parser.add_argument('-iformat', '--input_format', help='Input file format') |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name') |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
13 args = parser.parse_args() |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
14 return args |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
15 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
16 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
17 def filter_csv_molecules(file_name: str, output_file_name: str) -> None: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
18 """Removes molecules with '.' in SMILES string from csv file. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
19 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
20 Args: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
21 file_name (str): Path to csv file that contains metadata. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
22 output_file_name (str): Path to destination file, in csv format. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
23 """ |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
24 df = pd.read_csv(file_name) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
25 mask = df['smiles'].str.contains(".", na=False, regex=False) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
26 mask = mask.apply(lambda x: not x) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
27 df[mask].to_csv(output_file_name, index=False) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
28 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
29 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
30 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
31 """Removes molecules with '.' in SMILES string from smi or inchi files. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
32 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
33 Args: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
34 file_name (str): Path to smi or inchi files. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
35 output_file_name (str): Path to destination files, in smi or inchi formats. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
36 input_format (str): Input file format. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
37 """ |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
38 molecules = list(pybel.readfile(input_format, file_name)) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
39 filtered_molecules = [mol for mol in molecules if "." not in mol.write('smi').strip()] |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
40 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
41 with open(output_file_name, 'w') as f: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
42 for mol in filtered_molecules: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
43 f.write(mol.write(input_format)) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
44 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
45 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
46 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
47 """Removes molecular complexes depending on the input format. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
48 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
49 Args: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
50 file_name (str): Path to csv, smi or inchi files |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
51 output_file_name (str): Path to destination files, in csv. smi or inchi formats. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
52 input_format (str): Input file formats. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
53 """ |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
54 if input_format == 'csv': |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
55 filter_csv_molecules(file_name, output_file_name) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
56 else: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
57 filter_other_format_molecules(file_name, output_file_name, input_format) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
58 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
59 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
60 if __name__ == "__main__": |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
61 args = parse_arguments() |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
62 filter_complex_molecules(args.input_filename, args.output_filename, args.input_format) |