diff astronomical_archives.py @ 0:0ddfc343f9f9 draft

planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit d68858614f92df46c58724928d918e989d916db0
author astroteam
date Mon, 04 Sep 2023 14:20:34 +0000
parents
children 667fc28d803c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/astronomical_archives.py	Mon Sep 04 14:20:34 2023 +0000
@@ -0,0 +1,1153 @@
+import json
+import os
+import sys
+import urllib
+from urllib import request
+
+import pyvo
+from pyvo import DALAccessError, DALQueryError, DALServiceError
+from pyvo import registry
+
+MAX_ALLOWED_ENTRIES = 100
+MAX_REGISTRIES_TO_SEARCH = 100
+
+
+class Service:
+    # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html
+
+    services = {
+        'TAP': 'tap',
+        'SIA': 'sia',
+        'SIA2': 'sia2',
+        'SPECTRUM': 'spectrum',
+        'SCS': 'scs',
+        'LINE': 'line'
+    }
+
+    supported_services = {
+        'TAP': 'tap'
+    }
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def is_service_supported(service_type) -> bool:
+        is_supported = True
+
+        if service_type not in Service.services.keys():
+            is_supported = False
+        elif service_type not in Service.supported_services.keys():
+            is_supported = False
+
+        return is_supported
+
+
+class Waveband:
+    # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Waveband.html
+    # https://www.ivoa.net/rdf/messenger/2020-08-26/messenger.html
+
+    wavebands = {
+        'Extreme UV': 'EUV',
+        'Gamma ray': 'Gamma-ray',
+        'Infrared': 'Infrared',
+        'Millimeter': 'Millimeter',
+        'Neutrino': 'Neutrino',
+        'Optical': 'Optical',
+        'Photon': 'Photon',
+        'Radio': 'Radio',
+        'Ultra violet': 'UV',
+        'X-ray': 'X-ray'
+    }
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def is_waveband_supported(waveband) -> bool:
+        is_supported = True
+
+        if waveband not in Waveband.wavebands.keys():
+            is_supported = False
+
+        return is_supported
+
+
+class TapArchive:
+    # https://www.ivoa.net/documents/ObsCore/20170509/REC-ObsCore-v1.1-20170509
+
+    service_type = Service.services['TAP']
+
+    def __init__(self,
+                 id=1,
+                 title="Unknown title",
+                 name="Unknown name",
+                 access_url=""):
+
+        self.id = id,
+        self.title = title,
+        self.name = name,
+        self.access_url = access_url
+        self.initialized = False
+        self.archive_service = None
+        self.tables = None
+
+    def get_resources(self,
+                      query,
+                      number_of_results,
+                      url_field='access_url'):
+
+        resource_list_hydrated = []
+
+        error_message = None
+
+        if self.initialized:
+
+            try:
+                raw_resource_list = self.archive_service.search(query)
+
+                for i, resource in enumerate(raw_resource_list):
+                    if i < number_of_results:
+                        resource_list_hydrated.append(
+                            self._get_resource_object(resource))
+                    else:
+                        break
+
+            except DALQueryError:
+                if self.has_obscore_table():
+                    error_message = "Error in query -> " + query
+                    Logger.create_action_log(
+                        Logger.ACTION_ERROR,
+                        Logger.ACTION_TYPE_DOWNLOAD,
+                        error_message)
+                else:
+                    error_message = "No obscore table in the archive"
+                    Logger.create_action_log(
+                        Logger.ACTION_ERROR,
+                        Logger.ACTION_TYPE_DOWNLOAD,
+                        error_message)
+
+            except DALServiceError:
+                error_message = "Error communicating with the service"
+                Logger.create_action_log(
+                    Logger.ACTION_ERROR,
+                    Logger.ACTION_TYPE_DOWNLOAD,
+                    error_message)
+
+            except Exception:
+                error_message = "Unknow error while querying the service"
+                Logger.create_action_log(
+                    Logger.ACTION_ERROR,
+                    Logger.ACTION_TYPE_DOWNLOAD,
+                    error_message)
+
+        return resource_list_hydrated, error_message
+
+    def _get_resource_object(self, resource):
+        resource_hydrated = {}
+
+        for key, value in resource.items():
+            resource_hydrated[key] = value
+
+        return resource_hydrated
+
+    def initialize(self):
+        error_message = None
+
+        try:
+            self._get_service()
+
+            if self.archive_service:
+                self._set_archive_tables()
+                self.initialized = True
+
+        except DALAccessError:
+            error_message = \
+                "A connection to the service could not be established"
+            Logger.create_action_log(
+                Logger.ACTION_ERROR,
+                Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
+                error_message)
+
+        except Exception:
+            error_message = "Unknow error while initializing TAP service"
+            Logger.create_action_log(
+                Logger.ACTION_ERROR,
+                Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
+                error_message)
+
+        return self.initialized, error_message
+
+    def _get_service(self):
+        if self.access_url:
+            self.archive_service = pyvo.dal.TAPService(self.access_url)
+
+    def _set_archive_tables(self):
+
+        self.tables = []
+
+        for table in self.archive_service.tables:
+            archive_table = {
+                'name': table.name,
+                'type': table.type,
+                'fields': None
+            }
+
+            fields = []
+
+            for table_field in table.columns:
+                field = {
+                    'name': table_field.name,
+                    'description': table_field.description,
+                    'unit': table_field.unit,
+                    'datatype': table_field.datatype.content
+                }
+
+                fields.append(field)
+
+            archive_table['fields'] = fields
+
+            self.tables.append(archive_table)
+
+    def _is_query_valid(self, query) -> bool:
+        is_valid = True
+
+        attribute_from = 'from'
+        attribute_where = 'where'
+
+        idx_from = query.index(attribute_from)
+        idx_where = query.index(attribute_where)
+
+        table_name = ''
+
+        for idx in range(idx_from + len('from') + 1, idx_where):
+            table_name = table_name + query[idx]
+
+        if not next(
+                (item for item in self.tables if
+                 item["name"] == table_name),
+                False):
+
+            is_valid = False
+
+        return is_valid
+
+    def has_obscore_table(self) -> bool:
+        has_obscore_table = self._has_table("ivoa.obscore")
+
+        return has_obscore_table
+
+    def _has_table(self, table_name) -> bool:
+        _has_table = False
+
+        _has_table = next(
+            (item for item in self.tables if item["name"] == table_name),
+            False)
+
+        return _has_table
+
+    def get_archive_name(self, archive_type):
+        try:
+            if archive_type == 'registry':
+                name = str(self.title).strip("',()")
+            else:
+                name = self.access_url
+        except Exception:
+            name = 'Unknown archive title'
+
+        return name
+
+
+class RegistrySearchParameters:
+
+    def __init__(self, keyword=None, waveband=None, service_type=None):
+        self.keyword = keyword
+        self.waveband = waveband
+        self.service_type = service_type
+
+    def get_parameters(self):
+
+        parameters = {
+            'keywords': '',
+            'waveband': '',
+            'service_type': ''
+        }
+
+        if self.keyword:
+            parameters['keywords'] = self.keyword
+
+        if Waveband.is_waveband_supported(self.waveband):
+            parameters['waveband'] = \
+                Waveband.wavebands[self.waveband]
+
+        if Service.is_service_supported(self.service_type):
+            parameters['service_type'] = \
+                Service.services[self.service_type]
+        else:
+            parameters['service_type'] = Service.services['TAP']
+
+        return parameters
+
+
+class Registry:
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def search_registries(rsp: RegistrySearchParameters,
+                          number_of_registries):
+
+        parameters = rsp.get_parameters()
+
+        keywords = parameters['keywords']
+        waveband = parameters['waveband']
+        service_type = parameters['service_type']
+
+        if not waveband:
+            registry_list = registry.search(
+                keywords=keywords,
+                servicetype=service_type)
+        else:
+            registry_list = registry.search(
+                keywords=keywords,
+                waveband=waveband,
+                servicetype=service_type)
+
+        if registry_list:
+            registry_list = Registry._get_registries_from_list(
+                registry_list,
+                number_of_registries)
+
+        return registry_list
+
+    @staticmethod
+    def _get_registries_from_list(registry_list, number_of_registries):
+
+        archive_list = []
+
+        for i, ivoa_registry in enumerate(registry_list):
+            if i < number_of_registries:
+                archive = TapArchive(ivoa_registry.standard_id,
+                                     ivoa_registry.res_title,
+                                     ivoa_registry.short_name,
+                                     ivoa_registry.access_url)
+
+                archive_list.append(archive)
+
+        return archive_list
+
+
+class TapQuery:
+
+    def __init__(self, query):
+        self.raw_query = query
+
+    def get_query(self):
+        return urllib.parse.unquote(self.raw_query).replace("+", " ")
+
+
+class BaseADQLQuery:
+
+    def __init__(self):
+        pass
+
+    def _get_order_by_clause(self, order_type):
+        order_by_clause = 'ORDER BY ' + order_type
+
+        return order_by_clause
+
+    def _get_where_clause(self, parameters):
+        where_clause = ''
+        is_first_statement = True
+
+        for key, value in parameters.items():
+
+            if value != '':
+                statement = str(key) + ' = ' + '\'' + str(value) + '\' '
+
+                if is_first_statement:
+                    is_first_statement = False
+                    where_clause += 'WHERE '
+                else:
+                    statement = 'AND ' + statement
+
+                where_clause += statement
+
+        return where_clause
+
+
+class ToolRunner:
+
+    def __init__(self,
+                 run_parameters,
+                 output,
+                 output_csv,
+                 output_html,
+                 output_basic_html,
+                 output_error):
+
+        self._raw_parameters_path = run_parameters
+        self._json_parameters = json.load(open(run_parameters, "r"))
+        self._archive_type = ''
+        self._query_type = ''
+        self._archives = []
+        self._adql_query = ''
+        self._services_access_url = ''
+        self._url_field = 'access_url'
+        self._number_of_files = ''
+        self._is_initialised = False
+
+        self._csv_file = False
+        self._image_file = False
+        self._html_file = False
+        self._basic_html_file = False
+
+        self._output = output
+        self._output_csv = output_csv
+        self._output_html = output_html
+        self._output_basic_html = output_basic_html
+        self._output_error = output_error
+
+        self._set_run_main_parameters()
+        self._is_initialised, error_message = self._set_archive()
+
+        if self._is_initialised and error_message is None:
+            self._set_query()
+            self._set_output()
+
+    def _set_run_main_parameters(self):
+
+        qs = "query_section"
+        qsl = "query_selection"
+
+        self._archive_type = \
+            self._json_parameters['archive_selection']['archive_type']
+        self._query_type = \
+            self._json_parameters[qs][qsl]['query_type']
+
+    def _set_archive(self):
+
+        error_message = None
+
+        if self._archive_type == 'archive':
+            self._service_access_url =\
+                self._json_parameters['archive_selection']['archive']
+
+            self._archives.append(
+                TapArchive(access_url=self._service_access_url))
+
+        else:
+            keyword = \
+                self._json_parameters['archive_selection']['keyword']
+            waveband = \
+                self._json_parameters['archive_selection']['wavebands']
+            service_type = \
+                self._json_parameters['archive_selection']['service_type']
+
+            rsp = RegistrySearchParameters(
+                keyword=keyword,
+                waveband=waveband,
+                service_type=service_type)
+
+            archive_list = Registry.search_registries(
+                rsp,
+                MAX_REGISTRIES_TO_SEARCH)
+
+            if len(archive_list) >= 1:
+                self._archives = archive_list
+            else:
+                error_message = "no archive matching search parameters"
+                Logger.create_action_log(
+                    Logger.ACTION_ERROR,
+                    Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
+                    error_message)
+
+        if error_message is None:
+
+            self._archives[:] = \
+                [archive for archive in self._archives if
+                 archive.initialize()[0]]
+
+            if len(self._archives) >= 1:
+                return True, None
+            else:
+                return False, \
+                    "no archive matching search" \
+                    " parameters could be initialized"
+
+        else:
+            return False, error_message
+
+    def _set_query(self):
+
+        qs = 'query_section'
+        qsl = 'query_selection'
+
+        if self._query_type == 'obscore_query':
+
+            dataproduct_type = \
+                self._json_parameters[qs][qsl]['dataproduct_type']
+            obs_collection = \
+                self._json_parameters[qs][qsl]['obs_collection']
+            obs_title = \
+                self._json_parameters[qs][qsl]['obs_title']
+            obs_id = \
+                self._json_parameters[qs][qsl]['obs_id']
+            facility_name = \
+                self._json_parameters[qs][qsl]['facility_name']
+            instrument_name = \
+                self._json_parameters[qs][qsl]['instrument_name']
+            em_min = \
+                self._json_parameters[qs][qsl]['em_min']
+            em_max = \
+                self._json_parameters[qs][qsl]['em_max']
+            target_name = \
+                self._json_parameters[qs][qsl]['target_name']
+            obs_publisher_id = \
+                self._json_parameters[qs][qsl]['obs_publisher_id']
+            s_fov = \
+                self._json_parameters[qs][qsl]['s_fov']
+            calibration_level = \
+                self._json_parameters[qs][qsl]['calibration_level']
+            t_min = \
+                self._json_parameters[qs][qsl]['t_min']
+            t_max = \
+                self._json_parameters[qs][qsl]['t_max']
+            order_by = \
+                self._json_parameters[qs][qsl]['order_by']
+
+            obscore_query_object = ADQLObscoreQuery(dataproduct_type,
+                                                    obs_collection,
+                                                    obs_title,
+                                                    obs_id,
+                                                    facility_name,
+                                                    instrument_name,
+                                                    em_min,
+                                                    em_max,
+                                                    target_name,
+                                                    obs_publisher_id,
+                                                    s_fov,
+                                                    calibration_level,
+                                                    t_min,
+                                                    t_max,
+                                                    order_by)
+
+            self._adql_query = obscore_query_object.get_query()
+
+        elif self._query_type == 'raw_query':
+
+            wc = 'where_clause'
+
+            tap_table = \
+                self._json_parameters[qs][qsl]['table']
+
+            where_field = \
+                self._json_parameters[qs][qsl][wc]['where_field']
+            where_condition = \
+                self._json_parameters[qs][qsl][wc]['where_condition']
+
+            self._url_field = \
+                self._json_parameters[qs][qsl]['url_field']
+
+            self._adql_query = \
+                ADQLTapQuery().get_query(
+                    tap_table,
+                    where_field,
+                    where_condition)
+        else:
+            self._adql_query = ADQLObscoreQuery.base_query
+
+    def _set_output(self):
+        self._number_of_files = \
+            int(
+                self._json_parameters['output_section']['number_of_files']
+            )
+
+        if self._number_of_files < 1:
+            self._number_of_files = 1
+        elif self._number_of_files > 100:
+            self._number_of_files = MAX_ALLOWED_ENTRIES
+
+        output_selection = \
+            self._json_parameters['output_section']['output_selection']
+
+        if output_selection is not None:
+            if 'c' in output_selection:
+                self._csv_file = True
+            if 'i' in output_selection:
+                self._image_file = True
+            if 'h' in output_selection:
+                self._html_file = True
+            if 'b' in output_selection:
+                self._basic_html_file = True
+
+    def _validate_json_parameters(self, json_parameters):
+        self._json_parameters = json.load(open(json_parameters, "r"))
+
+    def run(self):
+        if self._is_initialised:
+            error_message = None
+            file_url = []
+
+            archive_name = self._archives[0].get_archive_name(
+                self._archive_type)
+
+            for archive in self._archives:
+                _file_url, error_message = archive.get_resources(
+                    self._adql_query,
+                    self._number_of_files,
+                    self._url_field)
+
+                file_url.extend(_file_url)
+
+                if len(file_url) >= int(self._number_of_files):
+                    file_url = file_url[:int(self._number_of_files)]
+                    break
+
+            if file_url:
+
+                if self._csv_file:
+                    FileHandler.write_urls_to_output(
+                        file_url,
+                        self._output_csv,
+                        self._url_field)
+
+                if self._image_file:
+
+                    try:
+                        fits_file = FileHandler.download_file_from_url(
+                            file_url[0][self._url_field])
+
+                        FileHandler.write_file_to_output(
+                            fits_file,
+                            self._output, "wb")
+
+                        log_message = "from url " +\
+                                      file_url[0][self._url_field]
+
+                        Logger.create_action_log(
+                            Logger.ACTION_SUCCESS,
+                            Logger.ACTION_TYPE_DOWNLOAD,
+                            log_message)
+
+                    except Exception:
+                        error_message = "from url " + \
+                                        file_url[0][self._url_field]
+
+                        Logger.create_action_log(
+                            Logger.ACTION_ERROR,
+                            Logger.ACTION_TYPE_DOWNLOAD,
+                            error_message)
+
+                    for i, url in enumerate(file_url[1:], start=1):
+                        try:
+                            fits_file = \
+                                FileHandler.download_file_from_url(
+                                    url[self._url_field])
+
+                            FileHandler.write_file_to_subdir(
+                                fits_file,
+                                FileHandler.get_file_name_from_url(
+                                    url[self._url_field]))
+
+                            log_message = "from url " + \
+                                          url[self._url_field]
+
+                            Logger.create_action_log(
+                                Logger.ACTION_SUCCESS,
+                                Logger.ACTION_TYPE_DOWNLOAD,
+                                log_message)
+
+                        except Exception:
+                            error_message = "from url " + \
+                                            url[self._url_field]
+
+                            Logger.create_action_log(
+                                Logger.ACTION_ERROR,
+                                Logger.ACTION_TYPE_DOWNLOAD,
+                                error_message)
+
+                if self._html_file:
+                    html_file = OutputHandler.generate_html_output(
+                        file_url,
+                        archive_name,
+                        self._adql_query)
+
+                    FileHandler.write_file_to_output(html_file,
+                                                     self._output_html)
+
+                if self._basic_html_file:
+                    html_file = \
+                        OutputHandler.generate_basic_html_output(
+                            file_url,
+                            archive_name,
+                            self._adql_query)
+
+                    FileHandler.write_file_to_output(
+                        html_file,
+                        self._output_basic_html)
+
+                summary_file = Logger.create_log_file(archive_name,
+                                                      self._adql_query)
+                summary_file += "\n Tool run executed with success"
+
+                FileHandler.write_file_to_output(summary_file,
+                                                 self._output_error)
+
+            else:
+
+                summary_file = Logger.create_log_file(archive_name,
+                                                      self._adql_query)
+
+                if error_message is None:
+                    summary_file += \
+                        "\n No resources matching parameters found"
+                else:
+                    summary_file += error_message
+
+                FileHandler.write_file_to_output(summary_file,
+                                                 self._output_error)
+
+        else:
+            summary_file = Logger.create_log_file("Archive",
+                                                  self._adql_query)
+
+            summary_file += "Unable to initialize archive"
+
+            FileHandler.write_file_to_output(summary_file,
+                                             self._output_error)
+
+
+class ADQLObscoreQuery(BaseADQLQuery):
+    order_by_field = {
+        'size': 'access_estsize',
+        'collection': 'obs_collection',
+        'object': 'target_name'
+    }
+
+    base_query = 'SELECT TOP ' + \
+                 str(MAX_ALLOWED_ENTRIES) + \
+                 ' * FROM ivoa.obscore '
+
+    def __init__(self,
+                 dataproduct_type,
+                 obs_collection,
+                 obs_title,
+                 obs_id,
+                 facility_name,
+                 instrument_name,
+                 em_min,
+                 em_max,
+                 target_name,
+                 obs_publisher_id,
+                 s_fov,
+                 calibration_level,
+                 t_min,
+                 t_max,
+                 order_by):
+
+        super().__init__()
+
+        if calibration_level == 'none':
+            calibration_level = ''
+
+        if order_by == 'none':
+            order_by = ''
+
+        if t_min == 'None' or t_min is None:
+            t_min = ''
+
+        if t_max == 'None' or t_max is None:
+            t_max = ''
+
+        if em_min == 'None' or em_min is None:
+            em_min = ''
+
+        if em_max == 'None' or em_max is None:
+            em_max = ''
+
+        if dataproduct_type == 'none' or dataproduct_type is None:
+            dataproduct_type = ''
+
+        self.parameters = {
+            'dataproduct_type': dataproduct_type,
+            'obs_collection': obs_collection,
+            'obs_title': obs_title,
+            'obs_id': obs_id,
+            'facility_name': facility_name,
+            'instrument_name': instrument_name,
+            'em_min': em_min,
+            'em_max': em_max,
+            'target_name': target_name,
+            'obs_publisher_id': obs_publisher_id,
+            's_fov': s_fov,
+            'calibration_level': calibration_level,
+            't_min': t_min,
+            't_max': t_max
+        }
+
+        self.order_by = order_by
+
+    def get_query(self):
+        return ADQLObscoreQuery.base_query + \
+            self.get_where_statement() + \
+            self.get_order_by_statement()
+
+    def get_order_by_statement(self):
+        if self.order_by != '':
+            return self._get_order_by_clause(self.order_by)
+        else:
+            return ''
+
+    def _get_order_by_clause(self, order_type):
+
+        obscore_order_type = ADQLObscoreQuery.order_by_field[order_type]
+
+        return super()._get_order_by_clause(obscore_order_type)
+
+    def get_where_statement(self):
+        return self._get_where_clause(self.parameters)
+
+    def _get_where_clause(self, parameters):
+        return super()._get_where_clause(parameters)
+
+
+class ADQLTapQuery(BaseADQLQuery):
+    base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM '
+
+    def __init__(self):
+        super().__init__()
+
+    def get_order_by_clause(self, order_type):
+        return super()._get_order_by_clause(order_type)
+
+    def get_query(self, table, where_field, where_condition):
+        if where_field != '' and where_condition != '':
+            return ADQLTapQuery.base_query + \
+                str(table) + \
+                ' WHERE ' + \
+                str(where_field) + ' = ' + '\'' + \
+                str(where_condition) + '\''
+        else:
+            return ADQLTapQuery.base_query + str(table)
+
+
+class HTMLReport:
+    _html_report_base_header = ''
+    _html_report_base_body = ''
+    _html_report_base_footer = ''
+    _html_report_base_script = ''
+
+    def __init__(self):
+        pass
+
+
+class OutputHandler:
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def generate_html_output(urls_data, archive_name, adql_query):
+        return OutputHandler.html_header + \
+            OutputHandler.generate_html_content(
+                urls_data,
+                archive_name,
+                adql_query,
+                div_attr='class="title"',
+                table_attr='class="fl-table"')
+
+    @staticmethod
+    def generate_basic_html_output(urls_data,
+                                   archive_name,
+                                   adql_query, ):
+        return OutputHandler.generate_html_content(urls_data,
+                                                   archive_name,
+                                                   adql_query)
+
+    @staticmethod
+    def generate_html_content(urls_data, archive_name, adql_query,
+                              div_attr="", table_attr="border='1'"):
+        html_file = \
+            f"""
+                    <div {div_attr}>
+                        <h2>Resources Preview archive:
+                            <span>
+                                {archive_name}
+                            </span>
+                        </h2>
+                        <span>ADQL query : {adql_query}</span>
+                    </div>"""
+
+        html_file += f'<table {table_attr}><thead><tr>'
+
+        for key in Utils.collect_resource_keys(urls_data):
+            html_file += '<th>' + str(key) + '</th>'
+
+        html_file += '</thead></tr><tbody>'
+
+        for resource in urls_data:
+            html_file += '<tr>'
+
+            for key, value in resource.items():
+                html_file += f'<td>{value}</td>'
+
+            html_file += '<td>'
+            for preview_key in \
+                    ['preview', 'preview_url', 'postcard_url']:
+                if preview_key in resource:
+                    html_file += (
+                        '<details><summary>Preview</summary>'
+                        f'<img src="{resource[preview_key]}"/>'
+                        '</details>'
+                    )
+            html_file += '</td>'
+            html_file += '</tr>'
+
+        html_file += '</tbody></table>'
+        return html_file
+
+    html_header = """ <head><style>
+
+                    details {
+                        padding: 10px;
+                    }
+
+                    .table-wrapper {
+                        margin: 10px 70px 70px;
+                        box-shadow: 0px 35px 50px rgba( 0, 0, 0, 0.2 );
+                    }
+
+                    .fl-table {
+                        border-radius: 5px;
+                        font-size: 12px;
+                        font-weight: normal;
+                        border: none;
+                        border-collapse: collapse;
+                        width: 100%;
+                        max-width: 100%;
+                        white-space: nowrap;
+                        background-color: white;
+                    }
+
+                    .fl-table td, .fl-table th {
+                        text-align: center;
+                        padding: 8px;
+                    }
+
+                    .fl-table td {
+                        border: 1px solid #999999;
+                        font-size: 15px;
+                    }
+
+                    .fl-table thead th {
+                        color: #ffffff;
+                        background: #4FC3A1;
+                        border: 1px solid #999999;
+                    }
+
+
+                    .fl-table thead th:nth-child(odd) {
+                        color: #ffffff;
+                        background: #324960;
+                    }
+
+                    .fl-table tr:nth-child(even) {
+                        background: #F8F8F8;
+                    }
+
+                    .title h2 {
+                      text-align: center;
+                      font-size: 22px;
+                      font-weight: 700; color:#202020;
+                      text-transform: uppercase;
+                      word-spacing: 1px; letter-spacing:2px;
+                      margin-bottom: 50px;
+                    }
+
+                    .title h2 span {
+                      padding-top: 40px;
+                      text-transform: none;
+                      font-size:.80em;
+                      font-weight: bold;
+                      font-family: "Playfair Display","Bookman",serif;
+                      color:#999;
+                      letter-spacing:-0.005em;
+                      word-spacing:1px;
+                      letter-spacing:none;
+                    }
+
+                    .title h1:before {
+                      background-color: #dfdfdf;
+                    }
+
+                </style></head>"""
+
+
+class FileHandler:
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def download_file_from_url(file_url):
+        with request.urlopen(file_url) as response:
+            fits_file = response.read()
+
+        return fits_file
+
+    @staticmethod
+    def write_file_to_output(file, output, write_type="w"):
+        with open(output, write_type) as file_output:
+            file_output.write(file)
+
+    @staticmethod
+    def write_urls_to_output(urls: [], output, access_url="access_url"):
+        with open(output, "w") as file_output:
+            for url in urls:
+                try:
+                    file_output.write(url[access_url] + ',')
+                except Exception:
+                    error_message = "url field not found for url"
+                    Logger.create_action_log(
+                        Logger.ACTION_ERROR,
+                        Logger.ACTION_TYPE_WRITE_URL,
+                        error_message)
+
+    @staticmethod
+    def write_file_to_subdir(file, index):
+        dir = os.getcwd()
+
+        dir += '/fits'
+
+        upload_dir = os.path.join(dir, str(index) + '.fits')
+
+        with open(upload_dir, "wb") as file_output:
+            file_output.write(file)
+
+    @staticmethod
+    def get_file_name_from_url(url, index=None):
+        url_parts = url.split('/')
+
+        file_name = ''
+
+        try:
+            if (url_parts[-1]) != '':
+                file_name = url_parts[-1]
+            elif len(url_parts) > 1:
+                file_name = url_parts[-2]
+        except Exception:
+            file_name = 'archive file '
+
+        return file_name
+
+
+class Utils:
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def collect_resource_keys(urls_data: list) -> list:
+        """
+        Collect all the keys from the resources,
+        keeping the order in the order of key appearance in the resources
+        """
+
+        resource_keys = []
+        for resource in urls_data:
+            for key in resource.keys():
+                if key not in resource_keys:
+                    resource_keys.append(key)
+        return resource_keys
+
+
+class Logger:
+    _logs = []
+
+    ACTION_SUCCESS = 1
+    ACTION_ERROR = 2
+
+    ACTION_TYPE = 1
+    INFO_TYPE = 2
+
+    ACTION_TYPE_DOWNLOAD = 1
+    ACTION_TYPE_ARCHIVE_CONNECTION = 2
+    ACTION_TYPE_WRITE_URL = 3
+    ACTION_TYPE_WRITE_FILE = 4
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def create_action_log(outcome, action, message) -> bool:
+
+        is_log_created = False
+        log = ""
+
+        if action == Logger.ACTION_TYPE_DOWNLOAD:
+            if outcome == Logger.ACTION_SUCCESS:
+                log += "Success downloading file : " + message
+            else:
+                log += "Error downloading file : " + message
+
+            is_log_created = True
+        elif action == Logger.ACTION_TYPE_ARCHIVE_CONNECTION:
+            if outcome == Logger.ACTION_SUCCESS:
+                log += "Success connecting to archive : " + message
+            else:
+                log += "Error connecting to archive : " + message
+
+            is_log_created = True
+        elif action == Logger.ACTION_TYPE_WRITE_URL:
+            if outcome == Logger.ACTION_SUCCESS:
+                log += "Success writing url to file : " + message
+            else:
+                log += "Error writing to file : " + message
+
+            is_log_created = True
+
+        if is_log_created:
+            Logger._insert_log(Logger.ACTION_TYPE, log)
+
+        return is_log_created
+
+    @staticmethod
+    def create_info_log(message):
+        pass
+
+    @staticmethod
+    def _insert_log(type, log):
+        Logger._logs.append(log)
+
+    @staticmethod
+    def create_log_file(archive_name, query):
+        log_file = ""
+
+        log_file += "Run summary for archive : " + archive_name + "\n"
+        log_file += "With query : " + query + "\n"
+
+        for log in Logger._logs:
+            log_file += log + "\n"
+
+        return log_file
+
+
+if __name__ == "__main__":
+    output = sys.argv[1]
+    output_csv = sys.argv[2]
+    output_html = sys.argv[3]
+    output_basic_html = sys.argv[4]
+    output_error = sys.argv[5]
+
+    inputs = sys.argv[6]
+
+    tool_runner = ToolRunner(inputs,
+                             output,
+                             output_csv,
+                             output_html,
+                             output_basic_html,
+                             output_error)
+
+    tool_runner.run()