diff astronomical_archives.py @ 2:7398ea3d9ac4 draft default tip

planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit b77ceb5085f378a8bef9b202b80e3ca7ef1e9a8e
author astroteam
date Tue, 26 Mar 2024 10:03:55 +0000
parents 667fc28d803c
children
line wrap: on
line diff
--- a/astronomical_archives.py	Tue Oct 24 06:38:22 2023 +0000
+++ b/astronomical_archives.py	Tue Mar 26 10:03:55 2024 +0000
@@ -2,6 +2,7 @@
 import functools
 import json
 import os
+import re
 import signal
 import sys
 import urllib
@@ -17,6 +18,10 @@
 MAX_ALLOWED_ENTRIES = 100
 MAX_REGISTRIES_TO_SEARCH = 100
 
+ARCHIVES_TIMEOUT_BYPASS = [
+    "https://datalab.noirlab.edu/tap"
+]
+
 
 class TimeoutException(Exception):
     pass
@@ -217,28 +222,34 @@
 
         self.tables = []
 
-        for table in self.archive_service.tables:
-            archive_table = {
-                'name': table.name,
-                'type': table.type,
-                'fields': None
-            }
-
-            fields = []
-
-            for table_field in table.columns:
-                field = {
-                    'name': table_field.name,
-                    'description': table_field.description,
-                    'unit': table_field.unit,
-                    'datatype': table_field.datatype.content
+        try:
+            for table in self.archive_service.tables:
+                archive_table = {
+                    'name': table.name,
+                    'type': table.type,
+                    'fields': None
                 }
 
-                fields.append(field)
+                fields = []
+
+                for table_field in table.columns:
+                    field = {
+                        'name': table_field.name,
+                        'description': table_field.description,
+                        'unit': table_field.unit,
+                        'datatype': table_field.datatype.content
+                    }
 
-            archive_table['fields'] = fields
+                    fields.append(field)
+
+                archive_table['fields'] = fields
 
-            self.tables.append(archive_table)
+                self.tables.append(archive_table)
+
+        # Exception is raised when a table schema is missing
+        # Missing table will be omitted so no action needed
+        except DALServiceError:
+            pass
 
     def _is_query_valid(self, query) -> bool:
         is_valid = True
@@ -507,6 +518,20 @@
             self._archives.append(
                 TapArchive(access_url=self._service_access_url))
 
+        elif self._archive_type == 'custom':
+            self._service_access_url = \
+                self._json_parameters['archive_selection']['access_url']
+
+            if Utils.is_valid_url(self._service_access_url):
+                self._archives.append(
+                    TapArchive(access_url=self._service_access_url))
+            else:
+                error_message = "archive access url is not a valid url"
+                Logger.create_action_log(
+                    Logger.ACTION_ERROR,
+                    Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
+                    error_message)
+
         else:
             keyword = \
                 self._json_parameters['archive_selection']['keyword']
@@ -752,6 +777,11 @@
 
             for archive in self._archives:
                 try:
+
+                    if archive.access_url in ARCHIVES_TIMEOUT_BYPASS:
+                        archive.get_resources = \
+                            timeout(40)(TapArchive.get_resources.__get__(archive))  # noqa: E501
+
                     _file_url, error_message = archive.get_resources(
                         self._adql_query,
                         self._number_of_files,
@@ -1250,9 +1280,9 @@
         with open(output, "w") as file_output:
             for url in urls:
                 try:
-                    file_output.write(url[access_url] + ',')
+                    file_output.write(str(url[access_url]) + ',')
                 except Exception:
-                    error_message = "url field not found for url"
+                    error_message = f"url field {access_url} not found for url"
                     Logger.create_action_log(
                         Logger.ACTION_ERROR,
                         Logger.ACTION_TYPE_WRITE_URL,
@@ -1305,6 +1335,11 @@
                     resource_keys.append(key)
         return resource_keys
 
+    @staticmethod
+    def is_valid_url(url: str) -> bool:
+        regex_url = re.compile(r'^https?://(?:[A-Za-z0-9-]+\.)+[A-Za-z]{2,6}(?::\d+)?(?:/[^\s]*)?$')  # noqa: E501
+        return re.match(regex_url, url) is not None
+
 
 class Logger:
     _logs = []