Mercurial > repos > tduigou > retrorules
diff query.py @ 3:95b4196b4ded draft
planemo upload for repository https://github.com/brsynth/galaxytools commit 5e85823d729e9e09adf66ccfb7c47701077dccff-dirty
| author | tduigou |
|---|---|
| date | Mon, 15 Sep 2025 12:59:58 +0000 |
| parents | 58bbbff358d6 |
| children | 815748762646 |
line wrap: on
line diff
--- a/query.py Mon Sep 15 09:42:19 2025 +0000 +++ b/query.py Mon Sep 15 12:59:58 2025 +0000 @@ -2,74 +2,143 @@ import json import logging import sys -from typing import Dict, List +from typing import Dict, Tuple import requests -BASE_URL = "https://retrorules.org/api/v0.7" - - -def from_ec_number(ec_number: str, min_diameter: int = None) -> List: - url = f"{BASE_URL}/ecnumber" - params = [("input", ec_number)] - if min_diameter: - params.append(("minDiameter", str(min_diameter))) - return url, params - - -def from_substrate(substrate: str, min_diameter: int = None) -> List: - url = f"{BASE_URL}/substrate" - params = [("input", substrate)] - if min_diameter: - params.append(("minDiameter", str(min_diameter))) - return url, params - - -def from_reaction_id( - reaction_id: str, repository: str, min_diameter: int = None -) -> List: - url = f"{BASE_URL}/reactionid" - params = [("input", reaction_id)] - params.append(("repo", repository)) - if min_diameter: - params.append(("minDiameter", str(min_diameter))) - return url, params +BASE_URL = "https://retrorules.org/api" -def from_inchi(inchi: str, min_diameter: int = None) -> List: - url = f"{BASE_URL}/inchi" - params = [("input", inchi)] - if min_diameter: - params.append(("minDiameter", str(min_diameter))) +def from_templates( + smarts_str: str, + template_ids_str: str, + reaction_ids_str: str, + datasets_str: str, + chemical_domain_str: str, + ec_number_str: str, + min_radius_int: int, + valid_str: str, + dedup_str: str, + limit_int: int, + offset_int: int, + ) -> Tuple: + url = f"{BASE_URL}/templates" + params = [] + if smarts_str: + params.append(("q", smarts_str)) + if template_ids_str: + params.append(("template_ids", ",".join(template_ids_str))) + if reaction_ids_str: + params.append(("reaction_ids", ",".join(reaction_ids_str))) + if datasets_str and datasets_str != "any": + params.append(("datasets", datasets_str)) + if chemical_domain_str and chemical_domain_str != "any": + params.append(("chemical_domain", chemical_domain_str)) + if ec_number_str: + params.append(("ec", ec_number_str)) + if min_radius_int is not None: + params.append(("min_radius", str(min_radius_int))) + if valid_str and valid_str != "any": + params.append(("valid", valid_str)) + if dedup_str and dedup_str != "any": + params.append(("dedup", dedup_str)) + if limit_int: + params.append(("limit", str(limit_int))) + if offset_int: + params.append(("offset", str(offset_int))) return url, params +def from_templates_summary(template_id_str: str) -> Tuple: + url = f"{BASE_URL}/templates/{template_id_str}/summary" + params = {} + return url, params -def from_repository(repository: str, min_diameter: int = None) -> List: - url = f"{BASE_URL}/repo" - params = [("input", repository)] - if min_diameter: - params.append(("minDiameter", str(min_diameter))) +def from_templates_sources(template_id_str: str) -> Tuple: + url = f"{BASE_URL}/templates/{template_id_str}/sources" + params = {} return url, params - -def from_smarts_id(smarts_ids: List[str], min_diameter: int = None) -> List: - url = f"{BASE_URL}/smartsid" - params = [("input", smart_id) for smart_id in smarts_ids] - if min_diameter: - params.append(("minDiameter", str(min_diameter))) +def from_templates_count( + smarts_str: str, + template_ids_str: str, + reaction_ids_str: str, + datasets_str: str, + chemical_domain_str: str, + ec_number_str: str, + min_radius_int: int, + valid_str: str, + dedup_str: str, + ) -> Tuple: + url = f"{BASE_URL}/templates_count" + params = [] + if smarts_str: + params.append(("q", smarts_str)) + if template_ids_str: + params.append(("template_ids", ",".join(template_ids_str))) + if reaction_ids_str: + params.append(("reaction_ids", ",".join(reaction_ids_str))) + if datasets_str and datasets_str != "any": + params.append(("datasets", datasets_str)) + if chemical_domain_str and chemical_domain_str != "any": + params.append(("chemical_domain", chemical_domain_str)) + if ec_number_str: + params.append(("ec", ec_number_str)) + if min_radius_int is not None: + params.append(("min_radius", str(min_radius_int))) + if valid_str and valid_str != "any": + params.append(("valid", valid_str)) + if dedup_str and dedup_str != "any": + params.append(("dedup", dedup_str)) return url, params +def from_templates_export( + generation_token_str: str, + smarts_str: str, + template_ids_str: str, + reaction_ids_str: str, + datasets_str: str, + chemical_domain_str: str, + ec_number_str: str, + min_radius_int: int, + valid_str: str, + dedup_str: str, + ) -> Tuple: + url = f"{BASE_URL}/templates_export" + params = [] + if generation_token_str: + params.append(("gen_token", generation_token_str)) + if smarts_str: + params.append(("q", smarts_str)) + if template_ids_str: + params.append(("template_ids", ",".join(template_ids_str))) + if reaction_ids_str: + params.append(("reaction_ids", ",".join(reaction_ids_str))) + if datasets_str and datasets_str != "any": + params.append(("datasets", datasets_str)) + if chemical_domain_str and chemical_domain_str != "any": + params.append(("chemical_domain", chemical_domain_str)) + if ec_number_str: + params.append(("ec", ec_number_str)) + if min_radius_int is not None: + params.append(("min_radius", str(min_radius_int))) + if valid_str and valid_str != "any": + params.append(("valid", valid_str)) + if dedup_str and dedup_str != "any": + params.append(("dedup", dedup_str)) + return url, params -def query(url: str, params: Dict) -> Dict: +def query(url: str, params: Dict): response = requests.get(url, params=params) response.raise_for_status() - return response.json() - + return response def write_json(path: str, data: Dict): with open(path, "w") as fd: json.dump(data, fd, indent=4) +def write_tab(path: str, data: str): + with open(path, "w") as fd: + fd.write(data) def main(): parser = argparse.ArgumentParser( @@ -77,61 +146,196 @@ ) subparsers = parser.add_subparsers(dest="command") - # Subcommand: EC number - parser_ecn = subparsers.add_parser("ec-number", help="From EC number") - parser_ecn.add_argument( - "--input-ec-number-str", required=True, help="EC number such as 1.1.1.1" + # Subcommand: templates + parser_tem = subparsers.add_parser("templates", help="From templates") + parser_tem.add_argument("--input-smarts-str", help="Exact SMARTS") + parser_tem.add_argument( + "--input-template-ids-str", + nargs="*", + help="Space separated list of template IDs", + ) + parser_tem.add_argument( + "--input-reaction-ids-str", + nargs="*", + help="Space separated list of reaction IDs", + ) + parser_tem.add_argument( + "--input-datasets-str", + default="any", + choices=["any", "metanetx", "rhea", "uspto"], + help="Select a specific database", + ) + parser_tem.add_argument( + "--input-chemical-domain-str", + default="any", + choices=["any", "biochem", "orgchem"], + ) + parser_tem.add_argument( + "--input-ec-number-str", + help="EC number to filter templates", ) - parser_ecn.add_argument("--input-min-diameter-int", type=int, help="Min diameter") - parser_ecn.add_argument( - "--output-data-json", required=True, help="Output results, JSON format" + parser_tem.add_argument( + "--input-min-radius-int", + type=int, + help="Single radius of the template", + ) + parser_tem.add_argument( + "--input-valid-str", + default="true", + choices=["any", "true", "false"], + help="By default only valid templates are returned", + ) + parser_tem.add_argument( + "--input-dedup-str", + default="true", + choices=["true", "false"], + help="By default deduplicated templates are returned", + ) + parser_tem.add_argument( + "--input-limit-int", + type=int, + help="Limit number of returned templates", + ) + parser_tem.add_argument( + "--input-offset-int", + type=int, + help="Offset the list of returned templates", + ) + parser_tem.add_argument( + "--output-data-json", + required=True, + help="Path to output JSON file", ) - # Subcommand: Substrate - parser_sub = subparsers.add_parser("substrate", help="From substrate") - parser_sub.add_argument( - "--input-substrate-str", required=True, help="Substrate label" + # Subcommand: templates-summary + parser_tem_sum = subparsers.add_parser("templates-summary", help="From templates-summary") + parser_tem_sum.add_argument("--input-template-id-str", required=True, help="Template ID") + parser_tem_sum.add_argument( + "--output-data-json", + required=True, + help="Path to output JSON file", ) - parser_sub.add_argument("--input-min-diameter-int", type=int, help="Min diameter") - parser_sub.add_argument( - "--output-data-json", required=True, help="Output results, JSON format" + + # Subcommand: templates-sources + parser_tem_sou = subparsers.add_parser("templates-sources", help="From templates-sources") + parser_tem_sou.add_argument("--input-template-id-str", required=True, help="Template ID") + parser_tem_sou.add_argument( + "--output-data-json", + required=True, + help="Path to output JSON file", ) - # Subcommand: Reaction ID - parser_rea = subparsers.add_parser("reaction-id", help="From Reaction ID") - parser_rea.add_argument( - "--input-reaction-id-str", required=True, help="Reaction ID" + # Subcommand: templates-count + parser_cou = subparsers.add_parser("templates-count", help="From templates-count") + parser_cou.add_argument("--input-smarts-str", help="Exact SMARTS") + parser_cou.add_argument( + "--input-template-ids-str", + nargs="*", + help="Space separated list of template IDs", + ) + parser_cou.add_argument( + "--input-reaction-ids-str", + nargs="*", + help="Space separated list of reaction IDs", + ) + parser_cou.add_argument( + "--input-datasets-str", + default="any", + choices=["any", "metanetx", "rhea", "uspto"], + help="Select a specific database", + ) + parser_cou.add_argument( + "--input-chemical-domain-str", + default="any", + choices=["any", "biochem", "orgchem"], ) - parser_rea.add_argument("--input-repository-str", required=True, help="Repository") - parser_rea.add_argument("--input-min-diameter-int", type=int, help="Min diameter") - parser_rea.add_argument( - "--output-data-json", required=True, help="Output results, JSON format" + parser_cou.add_argument( + "--input-ec-number-str", + help="EC number to filter templates", + ) + parser_cou.add_argument( + "--input-min-radius-int", + type=int, + help="Single radius of the template", + ) + parser_cou.add_argument( + "--input-valid-str", + default="true", + choices=["any", "true", "false"], + help="By default only valid templates are returned", + ) + parser_cou.add_argument( + "--input-dedup-str", + default="true", + choices=["true", "false"], + help="By default deduplicated templates are returned", + ) + parser_cou.add_argument( + "--output-data-json", + required=True, + help="Path to output JSON file", ) - # Subcommand: InChI - parser_inc = subparsers.add_parser("inchi", help="From InChI") - parser_inc.add_argument("--input-inchi-str", required=True, help="InChI") - parser_inc.add_argument("--input-min-diameter-int", type=int, help="Min diameter") - parser_inc.add_argument( - "--output-data-json", required=True, help="Output results, JSON format" + # Subcommand: templates-export + parser_exp = subparsers.add_parser("templates-export", help="From templates-export") + parser_exp.add_argument( + "--input-generation-token-str", + help="Generation token from RetroRules web interface", + ) + parser_exp.add_argument("--input-smarts-str", help="Exact SMARTS") + parser_exp.add_argument( + "--input-template-ids-str", + nargs="*", + help="Space separated list of template IDs", + ) + parser_exp.add_argument( + "--input-reaction-ids-str", + nargs="*", + help="Space separated list of reaction IDs", + ) + parser_exp.add_argument( + "--input-datasets-str", + default="any", + choices=["any", "metanetx", "rhea", "uspto"], + help="Select a specific database", + ) + parser_exp.add_argument( + "--input-chemical-domain-str", + default="any", + choices=["any", "biochem", "orgchem"], ) - - # Subcommand: Repository - parser_rep = subparsers.add_parser("repository", help="From Repository") - parser_rep.add_argument("--input-repository-str", required=True, help="InChI") - parser_rep.add_argument("--input-min-diameter-int", type=int, help="Min diameter") - parser_rep.add_argument( - "--output-data-json", required=True, help="Output results, JSON format" + parser_exp.add_argument( + "--input-ec-number-str", + help="EC number to filter templates", + ) + parser_exp.add_argument( + "--input-min-radius-int", + type=int, + help="Single radius of the template", + ) + parser_exp.add_argument( + "--input-valid-str", + default="true", + choices=["any", "true", "false"], + help="By default only valid templates are returned", ) - - # Subcommand: Smarts ID - parser_sma = subparsers.add_parser("smarts-id", help="From Smarts ID") - parser_sma.add_argument( - "--input-smarts-id-str", nargs="+", required=True, help="Smarts ID" + parser_exp.add_argument( + "--input-dedup-str", + default="true", + choices=["true", "false"], + help="By default deduplicated templates are returned", ) - parser_sma.add_argument("--input-min-diameter-int", type=int, help="Min diameter") - parser_sma.add_argument( - "--output-data-json", required=True, help="Output results, JSON format" + parser_exp.add_argument( + "--output-data-json", + help="Path to output JSON file", + ) + parser_exp.add_argument( + "--output-data-csv", + help="Path to output CSV file", + ) + parser_exp.add_argument( + "--output-data-tsv", + help="Path to output TSV file", ) logging.info("Query RetroRules - start") @@ -140,46 +344,67 @@ try: logging.info("Build arguments") url, params = "", {} - if args.command == "ec-number": - url, params = from_ec_number( - ec_number=args.input_ec_number_str, - min_diameter=args.input_min_diameter_int, + if args.command == "templates": + url, params = from_templates( + smarts_str=args.input_smarts_str, + template_ids_str=args.input_template_ids_str, + reaction_ids_str=args.input_reaction_ids_str, + datasets_str=args.input_datasets_str, + chemical_domain_str=args.input_chemical_domain_str, + ec_number_str=args.input_ec_number_str, + min_radius_int=args.input_min_radius_int, + valid_str=args.input_valid_str, + dedup_str=args.input_dedup_str, + limit_int=args.input_limit_int, + offset_int=args.input_offset_int, ) - elif args.command == "substrate": - url, params = from_substrate( - substrate=args.input_substrate_str, - min_diameter=args.input_min_diameter_int, + elif args.command == "templates-summary": + url, _ = from_templates_summary( + template_id_str=args.input_template_id_str, + ) + elif args.command == "templates-sources": + url, _ = from_templates_sources( + template_id_str=args.input_template_id_str, ) - elif args.command == "reaction-id": - url, params = from_reaction_id( - reaction_id=args.input_reaction_id_str, - repository=args.input_repository_str, - min_diameter=args.input_min_diameter_int, + elif args.command == "templates-count": + url, params = from_templates_count( + smarts_str=args.input_smarts_str, + template_ids_str=args.input_template_ids_str, + reaction_ids_str=args.input_reaction_ids_str, + datasets_str=args.input_datasets_str, + chemical_domain_str=args.input_chemical_domain_str, + ec_number_str=args.input_ec_number_str, + min_radius_int=args.input_min_radius_int, + valid_str=args.input_valid_str, + dedup_str=args.input_dedup_str, ) - elif args.command == "inchi": - url, params = from_inchi( - inchi=args.input_inchi_str, - min_diameter=args.input_min_diameter_int, - ) - elif args.command == "repository": - url, params = from_repository( - repository=args.input_repository_str, - min_diameter=args.input_min_diameter_int, - ) - elif args.command == "smarts-id": - url, params = from_smarts_id( - smarts_ids=args.input_smarts_id_str, - min_diameter=args.input_min_diameter_int, + elif args.command == "templates-export": + url, params = from_templates_export( + generation_token_str=args.input_generation_token_str, + smarts_str=args.input_smarts_str, + template_ids_str=args.input_template_ids_str, + reaction_ids_str=args.input_reaction_ids_str, + datasets_str=args.input_datasets_str, + chemical_domain_str=args.input_chemical_domain_str, + ec_number_str=args.input_ec_number_str, + min_radius_int=args.input_min_radius_int, + valid_str=args.input_valid_str, + dedup_str=args.input_dedup_str, ) else: parser.print_help() sys.exit(1) logging.info("Query API") - data = query(url=url, params=params) + response = query(url=url, params=params) logging.info("Write data") - write_json(path=args.output_data_json, data=data) + if "output_data_json" in vars(args) and args.output_data_json: + write_json(path=args.output_data_json, data=response.json()) + if "output_data_csv" in vars(args) and args.output_data_csv: + write_tab(path=args.output_data_csv, data=response.text) + if "output_data_tsv" in vars(args) and args.output_data_tsv: + write_tab(path=args.output_data_tsv, data=response.text) except requests.HTTPError as e: logging.error(f"HTTP error: {e.response.status_code} - {e.response.text}") sys.exit(1)
