annotate filter_compounds.py @ 0:72b687d21f65 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author recetox
date Tue, 22 Mar 2022 16:06:54 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
1 import argparse
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
2 import re
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
3
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
4 from openbabel import openbabel, pybel
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
5 openbabel.obErrorLog.StopLogging()
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
6
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
7
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
8 def parse_command_line():
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
9 parser = argparse.ArgumentParser()
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
10 parser.add_argument('-i', '--input', required=True, help='Input file name')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
11 parser.add_argument('-o', '--output', required=True, help='Output file name')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
12 parser.add_argument('-m', '--met', required=False, action='store_true', help='Remove organometallic compounds')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
13 parser.add_argument('-a', '--anorg', required=False, action='store_true', help='Remove anorganic compounds')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
14 return parser.parse_args()
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
15
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
16
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
17 def filter_compounds(args, pattern):
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
18 print(pattern)
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
19 with open(args.input, "r") as infile, open(args.output, "w") as outfile:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
20 for line in infile:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
21 values = line.split('\t', 1)
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
22
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
23 # check if input is list of SMILES or indexed table of SMILES
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
24 if values[0].isnumeric():
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
25 mol = pybel.readstring('smi', values[1]).write('inchi').split('/')[1] if values[1].strip() else ''
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
26
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
27 # check if both organometallic and anorganic filtering passes
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
28 # write original line if compound is organic without metals
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
29 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]):
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
30 outfile.write(line)
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
31 else:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
32 outfile.write(f'{values[0]}\t{""}\n')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
33 else:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
34 mol = pybel.readstring('smi', values[0]).write('inchi').split('/')[1]
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
35 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]):
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
36 outfile.write(line)
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
37
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
38
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
39 def __main__():
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
40 """
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
41 Filter organometallics and/or anorganic compounds.
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
42 """
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
43 args = parse_command_line()
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
44
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
45 # check if user selected something to filter out, if not output file == input file
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
46 sel_pattern = []
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
47 if args.met is False and args.anorg is False:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
48 print("No filtering selected - user did not specify what to filter out.")
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
49 sel_pattern = r'^[a-zA-Z]+$'
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
50 # select patterns for filtering
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
51 if args.met:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
52 sel_pattern.append(r'^(?:C|N|O|P|F|S|I|B|Si|Se|Cl|Br|Li|Na|H|K|[0-9]|\.)+$')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
53 if args.anorg:
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
54 sel_pattern.append(r'[C][^abd-z]')
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
55
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
56 filter_compounds(args, sel_pattern)
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
57
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
58
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
59 if __name__ == "__main__":
72b687d21f65 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
60 __main__()