Mercurial > repos > astroteam > astronomical_archives
diff astronomical_archives.py @ 2:7398ea3d9ac4 draft default tip
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit b77ceb5085f378a8bef9b202b80e3ca7ef1e9a8e
author | astroteam |
---|---|
date | Tue, 26 Mar 2024 10:03:55 +0000 |
parents | 667fc28d803c |
children |
line wrap: on
line diff
--- a/astronomical_archives.py Tue Oct 24 06:38:22 2023 +0000 +++ b/astronomical_archives.py Tue Mar 26 10:03:55 2024 +0000 @@ -2,6 +2,7 @@ import functools import json import os +import re import signal import sys import urllib @@ -17,6 +18,10 @@ MAX_ALLOWED_ENTRIES = 100 MAX_REGISTRIES_TO_SEARCH = 100 +ARCHIVES_TIMEOUT_BYPASS = [ + "https://datalab.noirlab.edu/tap" +] + class TimeoutException(Exception): pass @@ -217,28 +222,34 @@ self.tables = [] - for table in self.archive_service.tables: - archive_table = { - 'name': table.name, - 'type': table.type, - 'fields': None - } - - fields = [] - - for table_field in table.columns: - field = { - 'name': table_field.name, - 'description': table_field.description, - 'unit': table_field.unit, - 'datatype': table_field.datatype.content + try: + for table in self.archive_service.tables: + archive_table = { + 'name': table.name, + 'type': table.type, + 'fields': None } - fields.append(field) + fields = [] + + for table_field in table.columns: + field = { + 'name': table_field.name, + 'description': table_field.description, + 'unit': table_field.unit, + 'datatype': table_field.datatype.content + } - archive_table['fields'] = fields + fields.append(field) + + archive_table['fields'] = fields - self.tables.append(archive_table) + self.tables.append(archive_table) + + # Exception is raised when a table schema is missing + # Missing table will be omitted so no action needed + except DALServiceError: + pass def _is_query_valid(self, query) -> bool: is_valid = True @@ -507,6 +518,20 @@ self._archives.append( TapArchive(access_url=self._service_access_url)) + elif self._archive_type == 'custom': + self._service_access_url = \ + self._json_parameters['archive_selection']['access_url'] + + if Utils.is_valid_url(self._service_access_url): + self._archives.append( + TapArchive(access_url=self._service_access_url)) + else: + error_message = "archive access url is not a valid url" + Logger.create_action_log( + Logger.ACTION_ERROR, + Logger.ACTION_TYPE_ARCHIVE_CONNECTION, + error_message) + else: keyword = \ self._json_parameters['archive_selection']['keyword'] @@ -752,6 +777,11 @@ for archive in self._archives: try: + + if archive.access_url in ARCHIVES_TIMEOUT_BYPASS: + archive.get_resources = \ + timeout(40)(TapArchive.get_resources.__get__(archive)) # noqa: E501 + _file_url, error_message = archive.get_resources( self._adql_query, self._number_of_files, @@ -1250,9 +1280,9 @@ with open(output, "w") as file_output: for url in urls: try: - file_output.write(url[access_url] + ',') + file_output.write(str(url[access_url]) + ',') except Exception: - error_message = "url field not found for url" + error_message = f"url field {access_url} not found for url" Logger.create_action_log( Logger.ACTION_ERROR, Logger.ACTION_TYPE_WRITE_URL, @@ -1305,6 +1335,11 @@ resource_keys.append(key) return resource_keys + @staticmethod + def is_valid_url(url: str) -> bool: + regex_url = re.compile(r'^https?://(?:[A-Za-z0-9-]+\.)+[A-Za-z]{2,6}(?::\d+)?(?:/[^\s]*)?$') # noqa: E501 + return re.match(regex_url, url) is not None + class Logger: _logs = []