comparison query.py @ 3:95b4196b4ded draft

planemo upload for repository https://github.com/brsynth/galaxytools commit 5e85823d729e9e09adf66ccfb7c47701077dccff-dirty
author tduigou
date Mon, 15 Sep 2025 12:59:58 +0000
parents 58bbbff358d6
children 815748762646
comparison
equal deleted inserted replaced
2:58bbbff358d6 3:95b4196b4ded
1 import argparse 1 import argparse
2 import json 2 import json
3 import logging 3 import logging
4 import sys 4 import sys
5 from typing import Dict, List 5 from typing import Dict, Tuple
6 6
7 import requests 7 import requests
8 8
9 BASE_URL = "https://retrorules.org/api/v0.7" 9 BASE_URL = "https://retrorules.org/api"
10 10
11 11
12 def from_ec_number(ec_number: str, min_diameter: int = None) -> List: 12 def from_templates(
13 url = f"{BASE_URL}/ecnumber" 13 smarts_str: str,
14 params = [("input", ec_number)] 14 template_ids_str: str,
15 if min_diameter: 15 reaction_ids_str: str,
16 params.append(("minDiameter", str(min_diameter))) 16 datasets_str: str,
17 chemical_domain_str: str,
18 ec_number_str: str,
19 min_radius_int: int,
20 valid_str: str,
21 dedup_str: str,
22 limit_int: int,
23 offset_int: int,
24 ) -> Tuple:
25 url = f"{BASE_URL}/templates"
26 params = []
27 if smarts_str:
28 params.append(("q", smarts_str))
29 if template_ids_str:
30 params.append(("template_ids", ",".join(template_ids_str)))
31 if reaction_ids_str:
32 params.append(("reaction_ids", ",".join(reaction_ids_str)))
33 if datasets_str and datasets_str != "any":
34 params.append(("datasets", datasets_str))
35 if chemical_domain_str and chemical_domain_str != "any":
36 params.append(("chemical_domain", chemical_domain_str))
37 if ec_number_str:
38 params.append(("ec", ec_number_str))
39 if min_radius_int is not None:
40 params.append(("min_radius", str(min_radius_int)))
41 if valid_str and valid_str != "any":
42 params.append(("valid", valid_str))
43 if dedup_str and dedup_str != "any":
44 params.append(("dedup", dedup_str))
45 if limit_int:
46 params.append(("limit", str(limit_int)))
47 if offset_int:
48 params.append(("offset", str(offset_int)))
17 return url, params 49 return url, params
18 50
19 51 def from_templates_summary(template_id_str: str) -> Tuple:
20 def from_substrate(substrate: str, min_diameter: int = None) -> List: 52 url = f"{BASE_URL}/templates/{template_id_str}/summary"
21 url = f"{BASE_URL}/substrate" 53 params = {}
22 params = [("input", substrate)]
23 if min_diameter:
24 params.append(("minDiameter", str(min_diameter)))
25 return url, params 54 return url, params
26 55
27 56 def from_templates_sources(template_id_str: str) -> Tuple:
28 def from_reaction_id( 57 url = f"{BASE_URL}/templates/{template_id_str}/sources"
29 reaction_id: str, repository: str, min_diameter: int = None 58 params = {}
30 ) -> List:
31 url = f"{BASE_URL}/reactionid"
32 params = [("input", reaction_id)]
33 params.append(("repo", repository))
34 if min_diameter:
35 params.append(("minDiameter", str(min_diameter)))
36 return url, params 59 return url, params
37 60
38 61 def from_templates_count(
39 def from_inchi(inchi: str, min_diameter: int = None) -> List: 62 smarts_str: str,
40 url = f"{BASE_URL}/inchi" 63 template_ids_str: str,
41 params = [("input", inchi)] 64 reaction_ids_str: str,
42 if min_diameter: 65 datasets_str: str,
43 params.append(("minDiameter", str(min_diameter))) 66 chemical_domain_str: str,
67 ec_number_str: str,
68 min_radius_int: int,
69 valid_str: str,
70 dedup_str: str,
71 ) -> Tuple:
72 url = f"{BASE_URL}/templates_count"
73 params = []
74 if smarts_str:
75 params.append(("q", smarts_str))
76 if template_ids_str:
77 params.append(("template_ids", ",".join(template_ids_str)))
78 if reaction_ids_str:
79 params.append(("reaction_ids", ",".join(reaction_ids_str)))
80 if datasets_str and datasets_str != "any":
81 params.append(("datasets", datasets_str))
82 if chemical_domain_str and chemical_domain_str != "any":
83 params.append(("chemical_domain", chemical_domain_str))
84 if ec_number_str:
85 params.append(("ec", ec_number_str))
86 if min_radius_int is not None:
87 params.append(("min_radius", str(min_radius_int)))
88 if valid_str and valid_str != "any":
89 params.append(("valid", valid_str))
90 if dedup_str and dedup_str != "any":
91 params.append(("dedup", dedup_str))
44 return url, params 92 return url, params
45 93
46 94 def from_templates_export(
47 def from_repository(repository: str, min_diameter: int = None) -> List: 95 generation_token_str: str,
48 url = f"{BASE_URL}/repo" 96 smarts_str: str,
49 params = [("input", repository)] 97 template_ids_str: str,
50 if min_diameter: 98 reaction_ids_str: str,
51 params.append(("minDiameter", str(min_diameter))) 99 datasets_str: str,
100 chemical_domain_str: str,
101 ec_number_str: str,
102 min_radius_int: int,
103 valid_str: str,
104 dedup_str: str,
105 ) -> Tuple:
106 url = f"{BASE_URL}/templates_export"
107 params = []
108 if generation_token_str:
109 params.append(("gen_token", generation_token_str))
110 if smarts_str:
111 params.append(("q", smarts_str))
112 if template_ids_str:
113 params.append(("template_ids", ",".join(template_ids_str)))
114 if reaction_ids_str:
115 params.append(("reaction_ids", ",".join(reaction_ids_str)))
116 if datasets_str and datasets_str != "any":
117 params.append(("datasets", datasets_str))
118 if chemical_domain_str and chemical_domain_str != "any":
119 params.append(("chemical_domain", chemical_domain_str))
120 if ec_number_str:
121 params.append(("ec", ec_number_str))
122 if min_radius_int is not None:
123 params.append(("min_radius", str(min_radius_int)))
124 if valid_str and valid_str != "any":
125 params.append(("valid", valid_str))
126 if dedup_str and dedup_str != "any":
127 params.append(("dedup", dedup_str))
52 return url, params 128 return url, params
53 129
54 130 def query(url: str, params: Dict):
55 def from_smarts_id(smarts_ids: List[str], min_diameter: int = None) -> List:
56 url = f"{BASE_URL}/smartsid"
57 params = [("input", smart_id) for smart_id in smarts_ids]
58 if min_diameter:
59 params.append(("minDiameter", str(min_diameter)))
60 return url, params
61
62
63 def query(url: str, params: Dict) -> Dict:
64 response = requests.get(url, params=params) 131 response = requests.get(url, params=params)
65 response.raise_for_status() 132 response.raise_for_status()
66 return response.json() 133 return response
67
68 134
69 def write_json(path: str, data: Dict): 135 def write_json(path: str, data: Dict):
70 with open(path, "w") as fd: 136 with open(path, "w") as fd:
71 json.dump(data, fd, indent=4) 137 json.dump(data, fd, indent=4)
72 138
139 def write_tab(path: str, data: str):
140 with open(path, "w") as fd:
141 fd.write(data)
73 142
74 def main(): 143 def main():
75 parser = argparse.ArgumentParser( 144 parser = argparse.ArgumentParser(
76 description="Query RetroRules API via command-line endpoints." 145 description="Query RetroRules API via command-line endpoints."
77 ) 146 )
78 subparsers = parser.add_subparsers(dest="command") 147 subparsers = parser.add_subparsers(dest="command")
79 148
80 # Subcommand: EC number 149 # Subcommand: templates
81 parser_ecn = subparsers.add_parser("ec-number", help="From EC number") 150 parser_tem = subparsers.add_parser("templates", help="From templates")
82 parser_ecn.add_argument( 151 parser_tem.add_argument("--input-smarts-str", help="Exact SMARTS")
83 "--input-ec-number-str", required=True, help="EC number such as 1.1.1.1" 152 parser_tem.add_argument(
84 ) 153 "--input-template-ids-str",
85 parser_ecn.add_argument("--input-min-diameter-int", type=int, help="Min diameter") 154 nargs="*",
86 parser_ecn.add_argument( 155 help="Space separated list of template IDs",
87 "--output-data-json", required=True, help="Output results, JSON format" 156 )
88 ) 157 parser_tem.add_argument(
89 158 "--input-reaction-ids-str",
90 # Subcommand: Substrate 159 nargs="*",
91 parser_sub = subparsers.add_parser("substrate", help="From substrate") 160 help="Space separated list of reaction IDs",
92 parser_sub.add_argument( 161 )
93 "--input-substrate-str", required=True, help="Substrate label" 162 parser_tem.add_argument(
94 ) 163 "--input-datasets-str",
95 parser_sub.add_argument("--input-min-diameter-int", type=int, help="Min diameter") 164 default="any",
96 parser_sub.add_argument( 165 choices=["any", "metanetx", "rhea", "uspto"],
97 "--output-data-json", required=True, help="Output results, JSON format" 166 help="Select a specific database",
98 ) 167 )
99 168 parser_tem.add_argument(
100 # Subcommand: Reaction ID 169 "--input-chemical-domain-str",
101 parser_rea = subparsers.add_parser("reaction-id", help="From Reaction ID") 170 default="any",
102 parser_rea.add_argument( 171 choices=["any", "biochem", "orgchem"],
103 "--input-reaction-id-str", required=True, help="Reaction ID" 172 )
104 ) 173 parser_tem.add_argument(
105 parser_rea.add_argument("--input-repository-str", required=True, help="Repository") 174 "--input-ec-number-str",
106 parser_rea.add_argument("--input-min-diameter-int", type=int, help="Min diameter") 175 help="EC number to filter templates",
107 parser_rea.add_argument( 176 )
108 "--output-data-json", required=True, help="Output results, JSON format" 177 parser_tem.add_argument(
109 ) 178 "--input-min-radius-int",
110 179 type=int,
111 # Subcommand: InChI 180 help="Single radius of the template",
112 parser_inc = subparsers.add_parser("inchi", help="From InChI") 181 )
113 parser_inc.add_argument("--input-inchi-str", required=True, help="InChI") 182 parser_tem.add_argument(
114 parser_inc.add_argument("--input-min-diameter-int", type=int, help="Min diameter") 183 "--input-valid-str",
115 parser_inc.add_argument( 184 default="true",
116 "--output-data-json", required=True, help="Output results, JSON format" 185 choices=["any", "true", "false"],
117 ) 186 help="By default only valid templates are returned",
118 187 )
119 # Subcommand: Repository 188 parser_tem.add_argument(
120 parser_rep = subparsers.add_parser("repository", help="From Repository") 189 "--input-dedup-str",
121 parser_rep.add_argument("--input-repository-str", required=True, help="InChI") 190 default="true",
122 parser_rep.add_argument("--input-min-diameter-int", type=int, help="Min diameter") 191 choices=["true", "false"],
123 parser_rep.add_argument( 192 help="By default deduplicated templates are returned",
124 "--output-data-json", required=True, help="Output results, JSON format" 193 )
125 ) 194 parser_tem.add_argument(
126 195 "--input-limit-int",
127 # Subcommand: Smarts ID 196 type=int,
128 parser_sma = subparsers.add_parser("smarts-id", help="From Smarts ID") 197 help="Limit number of returned templates",
129 parser_sma.add_argument( 198 )
130 "--input-smarts-id-str", nargs="+", required=True, help="Smarts ID" 199 parser_tem.add_argument(
131 ) 200 "--input-offset-int",
132 parser_sma.add_argument("--input-min-diameter-int", type=int, help="Min diameter") 201 type=int,
133 parser_sma.add_argument( 202 help="Offset the list of returned templates",
134 "--output-data-json", required=True, help="Output results, JSON format" 203 )
204 parser_tem.add_argument(
205 "--output-data-json",
206 required=True,
207 help="Path to output JSON file",
208 )
209
210 # Subcommand: templates-summary
211 parser_tem_sum = subparsers.add_parser("templates-summary", help="From templates-summary")
212 parser_tem_sum.add_argument("--input-template-id-str", required=True, help="Template ID")
213 parser_tem_sum.add_argument(
214 "--output-data-json",
215 required=True,
216 help="Path to output JSON file",
217 )
218
219 # Subcommand: templates-sources
220 parser_tem_sou = subparsers.add_parser("templates-sources", help="From templates-sources")
221 parser_tem_sou.add_argument("--input-template-id-str", required=True, help="Template ID")
222 parser_tem_sou.add_argument(
223 "--output-data-json",
224 required=True,
225 help="Path to output JSON file",
226 )
227
228 # Subcommand: templates-count
229 parser_cou = subparsers.add_parser("templates-count", help="From templates-count")
230 parser_cou.add_argument("--input-smarts-str", help="Exact SMARTS")
231 parser_cou.add_argument(
232 "--input-template-ids-str",
233 nargs="*",
234 help="Space separated list of template IDs",
235 )
236 parser_cou.add_argument(
237 "--input-reaction-ids-str",
238 nargs="*",
239 help="Space separated list of reaction IDs",
240 )
241 parser_cou.add_argument(
242 "--input-datasets-str",
243 default="any",
244 choices=["any", "metanetx", "rhea", "uspto"],
245 help="Select a specific database",
246 )
247 parser_cou.add_argument(
248 "--input-chemical-domain-str",
249 default="any",
250 choices=["any", "biochem", "orgchem"],
251 )
252 parser_cou.add_argument(
253 "--input-ec-number-str",
254 help="EC number to filter templates",
255 )
256 parser_cou.add_argument(
257 "--input-min-radius-int",
258 type=int,
259 help="Single radius of the template",
260 )
261 parser_cou.add_argument(
262 "--input-valid-str",
263 default="true",
264 choices=["any", "true", "false"],
265 help="By default only valid templates are returned",
266 )
267 parser_cou.add_argument(
268 "--input-dedup-str",
269 default="true",
270 choices=["true", "false"],
271 help="By default deduplicated templates are returned",
272 )
273 parser_cou.add_argument(
274 "--output-data-json",
275 required=True,
276 help="Path to output JSON file",
277 )
278
279 # Subcommand: templates-export
280 parser_exp = subparsers.add_parser("templates-export", help="From templates-export")
281 parser_exp.add_argument(
282 "--input-generation-token-str",
283 help="Generation token from RetroRules web interface",
284 )
285 parser_exp.add_argument("--input-smarts-str", help="Exact SMARTS")
286 parser_exp.add_argument(
287 "--input-template-ids-str",
288 nargs="*",
289 help="Space separated list of template IDs",
290 )
291 parser_exp.add_argument(
292 "--input-reaction-ids-str",
293 nargs="*",
294 help="Space separated list of reaction IDs",
295 )
296 parser_exp.add_argument(
297 "--input-datasets-str",
298 default="any",
299 choices=["any", "metanetx", "rhea", "uspto"],
300 help="Select a specific database",
301 )
302 parser_exp.add_argument(
303 "--input-chemical-domain-str",
304 default="any",
305 choices=["any", "biochem", "orgchem"],
306 )
307 parser_exp.add_argument(
308 "--input-ec-number-str",
309 help="EC number to filter templates",
310 )
311 parser_exp.add_argument(
312 "--input-min-radius-int",
313 type=int,
314 help="Single radius of the template",
315 )
316 parser_exp.add_argument(
317 "--input-valid-str",
318 default="true",
319 choices=["any", "true", "false"],
320 help="By default only valid templates are returned",
321 )
322 parser_exp.add_argument(
323 "--input-dedup-str",
324 default="true",
325 choices=["true", "false"],
326 help="By default deduplicated templates are returned",
327 )
328 parser_exp.add_argument(
329 "--output-data-json",
330 help="Path to output JSON file",
331 )
332 parser_exp.add_argument(
333 "--output-data-csv",
334 help="Path to output CSV file",
335 )
336 parser_exp.add_argument(
337 "--output-data-tsv",
338 help="Path to output TSV file",
135 ) 339 )
136 340
137 logging.info("Query RetroRules - start") 341 logging.info("Query RetroRules - start")
138 args = parser.parse_args() 342 args = parser.parse_args()
139 343
140 try: 344 try:
141 logging.info("Build arguments") 345 logging.info("Build arguments")
142 url, params = "", {} 346 url, params = "", {}
143 if args.command == "ec-number": 347 if args.command == "templates":
144 url, params = from_ec_number( 348 url, params = from_templates(
145 ec_number=args.input_ec_number_str, 349 smarts_str=args.input_smarts_str,
146 min_diameter=args.input_min_diameter_int, 350 template_ids_str=args.input_template_ids_str,
351 reaction_ids_str=args.input_reaction_ids_str,
352 datasets_str=args.input_datasets_str,
353 chemical_domain_str=args.input_chemical_domain_str,
354 ec_number_str=args.input_ec_number_str,
355 min_radius_int=args.input_min_radius_int,
356 valid_str=args.input_valid_str,
357 dedup_str=args.input_dedup_str,
358 limit_int=args.input_limit_int,
359 offset_int=args.input_offset_int,
147 ) 360 )
148 elif args.command == "substrate": 361 elif args.command == "templates-summary":
149 url, params = from_substrate( 362 url, _ = from_templates_summary(
150 substrate=args.input_substrate_str, 363 template_id_str=args.input_template_id_str,
151 min_diameter=args.input_min_diameter_int,
152 ) 364 )
153 elif args.command == "reaction-id": 365 elif args.command == "templates-sources":
154 url, params = from_reaction_id( 366 url, _ = from_templates_sources(
155 reaction_id=args.input_reaction_id_str, 367 template_id_str=args.input_template_id_str,
156 repository=args.input_repository_str,
157 min_diameter=args.input_min_diameter_int,
158 ) 368 )
159 elif args.command == "inchi": 369 elif args.command == "templates-count":
160 url, params = from_inchi( 370 url, params = from_templates_count(
161 inchi=args.input_inchi_str, 371 smarts_str=args.input_smarts_str,
162 min_diameter=args.input_min_diameter_int, 372 template_ids_str=args.input_template_ids_str,
373 reaction_ids_str=args.input_reaction_ids_str,
374 datasets_str=args.input_datasets_str,
375 chemical_domain_str=args.input_chemical_domain_str,
376 ec_number_str=args.input_ec_number_str,
377 min_radius_int=args.input_min_radius_int,
378 valid_str=args.input_valid_str,
379 dedup_str=args.input_dedup_str,
163 ) 380 )
164 elif args.command == "repository": 381 elif args.command == "templates-export":
165 url, params = from_repository( 382 url, params = from_templates_export(
166 repository=args.input_repository_str, 383 generation_token_str=args.input_generation_token_str,
167 min_diameter=args.input_min_diameter_int, 384 smarts_str=args.input_smarts_str,
168 ) 385 template_ids_str=args.input_template_ids_str,
169 elif args.command == "smarts-id": 386 reaction_ids_str=args.input_reaction_ids_str,
170 url, params = from_smarts_id( 387 datasets_str=args.input_datasets_str,
171 smarts_ids=args.input_smarts_id_str, 388 chemical_domain_str=args.input_chemical_domain_str,
172 min_diameter=args.input_min_diameter_int, 389 ec_number_str=args.input_ec_number_str,
390 min_radius_int=args.input_min_radius_int,
391 valid_str=args.input_valid_str,
392 dedup_str=args.input_dedup_str,
173 ) 393 )
174 else: 394 else:
175 parser.print_help() 395 parser.print_help()
176 sys.exit(1) 396 sys.exit(1)
177 397
178 logging.info("Query API") 398 logging.info("Query API")
179 data = query(url=url, params=params) 399 response = query(url=url, params=params)
180 400
181 logging.info("Write data") 401 logging.info("Write data")
182 write_json(path=args.output_data_json, data=data) 402 if "output_data_json" in vars(args) and args.output_data_json:
403 write_json(path=args.output_data_json, data=response.json())
404 if "output_data_csv" in vars(args) and args.output_data_csv:
405 write_tab(path=args.output_data_csv, data=response.text)
406 if "output_data_tsv" in vars(args) and args.output_data_tsv:
407 write_tab(path=args.output_data_tsv, data=response.text)
183 except requests.HTTPError as e: 408 except requests.HTTPError as e:
184 logging.error(f"HTTP error: {e.response.status_code} - {e.response.text}") 409 logging.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
185 sys.exit(1) 410 sys.exit(1)
186 except Exception as e: 411 except Exception as e:
187 logging.error(f"Error: {e}") 412 logging.error(f"Error: {e}")