Mercurial > repos > recetox > biotransformer
comparison wrapper_biotransformer.py @ 0:0b86600b715e draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
author | recetox |
---|---|
date | Tue, 06 Jun 2023 11:23:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0b86600b715e |
---|---|
1 import re | |
2 import subprocess | |
3 import sys | |
4 import tempfile | |
5 | |
6 import pandas | |
7 from openbabel import openbabel, pybel | |
8 openbabel.obErrorLog.StopLogging() | |
9 | |
10 | |
11 def InchiToSmiles(df): | |
12 '''Translate inchi to smiles''' | |
13 sm = [] | |
14 for item in df['InChI']: | |
15 tmp = pybel.readstring("inchi", item) | |
16 sm.append(tmp.write("smi")) | |
17 return sm | |
18 | |
19 | |
20 executable = ["biotransformer"] | |
21 | |
22 argv = sys.argv[1:] | |
23 icsv = argv.pop(argv.index("-icsv") + 1) | |
24 argv.remove("-icsv") | |
25 ocsv = argv.pop(argv.index("-ocsv") + 1) | |
26 argv.remove("-ocsv") | |
27 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) | |
28 argv.remove("-ocsvDup") | |
29 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) | |
30 argv.remove("-ocsvDup2") | |
31 | |
32 in_df = pandas.read_csv(icsv, header=None) | |
33 out_df1 = pandas.DataFrame() # all results | |
34 out_df2 = pandas.DataFrame() # filtered results based on 6 columns | |
35 out_df3 = pandas.DataFrame() # filtered results based on 3 columns | |
36 | |
37 smList1 = [] # list with smiles string | |
38 smList2 = [] | |
39 smList3 = [] | |
40 for _, (smiles,) in in_df.iterrows(): | |
41 with tempfile.NamedTemporaryFile() as out: | |
42 print("Working on compound: " + smiles) | |
43 if not re.search(r'\.', smiles): | |
44 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) | |
45 try: | |
46 bio_out = pandas.read_csv(out.name) | |
47 tmp2 = bio_out.drop_duplicates(subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | |
48 tmp3 = bio_out.drop_duplicates(subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) | |
49 | |
50 smList1.append([smiles] * bio_out.shape[0]) | |
51 smList2.append([smiles] * tmp2.shape[0]) | |
52 smList3.append([smiles] * tmp3.shape[0]) | |
53 | |
54 out_df1 = pandas.concat([out_df1, bio_out]) | |
55 out_df2 = pandas.concat([out_df2, tmp2]) | |
56 out_df3 = pandas.concat([out_df3, tmp3]) | |
57 except pandas.errors.EmptyDataError: | |
58 continue | |
59 else: | |
60 print("ERROR: Input compound cannot be a mixture.") | |
61 smList1 = sum(smList1, []) # merge sublists into one list | |
62 smList2 = sum(smList2, []) | |
63 smList3 = sum(smList3, []) | |
64 | |
65 out_df1.insert(0, "SMILES query", smList1) | |
66 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) | |
67 out_df1.to_csv(ocsv, sep='\t') | |
68 | |
69 out_df2.insert(0, "SMILES query", smList2) | |
70 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) | |
71 out_df2.to_csv(ocsv_dup, sep='\t') | |
72 | |
73 out_df3.insert(0, "SMILES query", smList3) | |
74 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) | |
75 out_df3.to_csv(ocsv_dup2, sep='\t') |