Mercurial > repos > recetox > filter_compounds
annotate filter_compounds.py @ 0:72b687d21f65 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author | recetox |
---|---|
date | Tue, 22 Mar 2022 16:06:54 +0000 |
parents | |
children |
rev | line source |
---|---|
0
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
1 import argparse |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
2 import re |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
3 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
4 from openbabel import openbabel, pybel |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
5 openbabel.obErrorLog.StopLogging() |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
6 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
7 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
8 def parse_command_line(): |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser() |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
10 parser.add_argument('-i', '--input', required=True, help='Input file name') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
11 parser.add_argument('-o', '--output', required=True, help='Output file name') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
12 parser.add_argument('-m', '--met', required=False, action='store_true', help='Remove organometallic compounds') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
13 parser.add_argument('-a', '--anorg', required=False, action='store_true', help='Remove anorganic compounds') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
14 return parser.parse_args() |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
15 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
16 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
17 def filter_compounds(args, pattern): |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
18 print(pattern) |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
19 with open(args.input, "r") as infile, open(args.output, "w") as outfile: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
20 for line in infile: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
21 values = line.split('\t', 1) |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
22 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
23 # check if input is list of SMILES or indexed table of SMILES |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
24 if values[0].isnumeric(): |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
25 mol = pybel.readstring('smi', values[1]).write('inchi').split('/')[1] if values[1].strip() else '' |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
26 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
27 # check if both organometallic and anorganic filtering passes |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
28 # write original line if compound is organic without metals |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
29 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]): |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
30 outfile.write(line) |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
31 else: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
32 outfile.write(f'{values[0]}\t{""}\n') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
33 else: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
34 mol = pybel.readstring('smi', values[0]).write('inchi').split('/')[1] |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
35 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]): |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
36 outfile.write(line) |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
37 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
38 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
39 def __main__(): |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
40 """ |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
41 Filter organometallics and/or anorganic compounds. |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
42 """ |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
43 args = parse_command_line() |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
44 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
45 # check if user selected something to filter out, if not output file == input file |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
46 sel_pattern = [] |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
47 if args.met is False and args.anorg is False: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
48 print("No filtering selected - user did not specify what to filter out.") |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
49 sel_pattern = r'^[a-zA-Z]+$' |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
50 # select patterns for filtering |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
51 if args.met: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
52 sel_pattern.append(r'^(?:C|N|O|P|F|S|I|B|Si|Se|Cl|Br|Li|Na|H|K|[0-9]|\.)+$') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
53 if args.anorg: |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
54 sel_pattern.append(r'[C][^abd-z]') |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
55 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
56 filter_compounds(args, sel_pattern) |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
57 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
58 |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
59 if __name__ == "__main__": |
72b687d21f65
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
60 __main__() |