diff env/lib/python3.7/site-packages/boto/cloudsearch/document.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/boto/cloudsearch/document.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,271 +0,0 @@
-# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
-# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
-# All Rights Reserved
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish, dis-
-# tribute, sublicense, and/or sell copies of the Software, and to permit
-# persons to whom the Software is furnished to do so, subject to the fol-
-# lowing conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
-# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
-# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-#
-
-import boto.exception
-from boto.compat import json
-import requests
-import boto
-
-class SearchServiceException(Exception):
-    pass
-
-
-class CommitMismatchError(Exception):
-    pass
-
-class EncodingError(Exception):
-    """
-    Content sent for Cloud Search indexing was incorrectly encoded.
-
-    This usually happens when a document is marked as unicode but non-unicode
-    characters are present.
-    """
-    pass
-
-class ContentTooLongError(Exception):
-    """
-    Content sent for Cloud Search indexing was too long
-
-    This will usually happen when documents queued for indexing add up to more
-    than the limit allowed per upload batch (5MB)
-
-    """
-    pass
-
-class DocumentServiceConnection(object):
-    """
-    A CloudSearch document service.
-
-    The DocumentServiceConection is used to add, remove and update documents in
-    CloudSearch. Commands are uploaded to CloudSearch in SDF (Search Document Format).
-
-    To generate an appropriate SDF, use :func:`add` to add or update documents,
-    as well as :func:`delete` to remove documents.
-
-    Once the set of documents is ready to be index, use :func:`commit` to send the
-    commands to CloudSearch.
-
-    If there are a lot of documents to index, it may be preferable to split the
-    generation of SDF data and the actual uploading into CloudSearch. Retrieve
-    the current SDF with :func:`get_sdf`. If this file is the uploaded into S3,
-    it can be retrieved back afterwards for upload into CloudSearch using
-    :func:`add_sdf_from_s3`.
-
-    The SDF is not cleared after a :func:`commit`. If you wish to continue
-    using the DocumentServiceConnection for another batch upload of commands,
-    you will need to :func:`clear_sdf` first to stop the previous batch of
-    commands from being uploaded again.
-
-    """
-
-    def __init__(self, domain=None, endpoint=None):
-        self.domain = domain
-        self.endpoint = endpoint
-        if not self.endpoint:
-            self.endpoint = domain.doc_service_endpoint
-        self.documents_batch = []
-        self._sdf = None
-
-    def add(self, _id, version, fields, lang='en'):
-        """
-        Add a document to be processed by the DocumentService
-
-        The document will not actually be added until :func:`commit` is called
-
-        :type _id: string
-        :param _id: A unique ID used to refer to this document.
-
-        :type version: int
-        :param version: Version of the document being indexed. If a file is
-            being reindexed, the version should be higher than the existing one
-            in CloudSearch.
-
-        :type fields: dict
-        :param fields: A dictionary of key-value pairs to be uploaded .
-
-        :type lang: string
-        :param lang: The language code the data is in. Only 'en' is currently
-            supported
-        """
-
-        d = {'type': 'add', 'id': _id, 'version': version, 'lang': lang,
-            'fields': fields}
-        self.documents_batch.append(d)
-
-    def delete(self, _id, version):
-        """
-        Schedule a document to be removed from the CloudSearch service
-
-        The document will not actually be scheduled for removal until :func:`commit` is called
-
-        :type _id: string
-        :param _id: The unique ID of this document.
-
-        :type version: int
-        :param version: Version of the document to remove. The delete will only
-            occur if this version number is higher than the version currently
-            in the index.
-        """
-
-        d = {'type': 'delete', 'id': _id, 'version': version}
-        self.documents_batch.append(d)
-
-    def get_sdf(self):
-        """
-        Generate the working set of documents in Search Data Format (SDF)
-
-        :rtype: string
-        :returns: JSON-formatted string of the documents in SDF
-        """
-
-        return self._sdf if self._sdf else json.dumps(self.documents_batch)
-
-    def clear_sdf(self):
-        """
-        Clear the working documents from this DocumentServiceConnection
-
-        This should be used after :func:`commit` if the connection will be reused
-        for another set of documents.
-        """
-
-        self._sdf = None
-        self.documents_batch = []
-
-    def add_sdf_from_s3(self, key_obj):
-        """
-        Load an SDF from S3
-
-        Using this method will result in documents added through
-        :func:`add` and :func:`delete` being ignored.
-
-        :type key_obj: :class:`boto.s3.key.Key`
-        :param key_obj: An S3 key which contains an SDF
-        """
-        #@todo:: (lucas) would be nice if this could just take an s3://uri..."
-
-        self._sdf = key_obj.get_contents_as_string()
-
-    def commit(self):
-        """
-        Actually send an SDF to CloudSearch for processing
-
-        If an SDF file has been explicitly loaded it will be used. Otherwise,
-        documents added through :func:`add` and :func:`delete` will be used.
-
-        :rtype: :class:`CommitResponse`
-        :returns: A summary of documents added and deleted
-        """
-
-        sdf = self.get_sdf()
-
-        if ': null' in sdf:
-            boto.log.error('null value in sdf detected.  This will probably raise '
-                '500 error.')
-            index = sdf.index(': null')
-            boto.log.error(sdf[index - 100:index + 100])
-
-        url = "http://%s/2011-02-01/documents/batch" % (self.endpoint)
-
-        # Keep-alive is automatic in a post-1.0 requests world.
-        session = requests.Session()
-        adapter = requests.adapters.HTTPAdapter(
-            pool_connections=20,
-            pool_maxsize=50,
-            max_retries=5
-        )
-        session.mount('http://', adapter)
-        session.mount('https://', adapter)
-        r = session.post(url, data=sdf, headers={'Content-Type': 'application/json'})
-
-        return CommitResponse(r, self, sdf)
-
-
-class CommitResponse(object):
-    """Wrapper for response to Cloudsearch document batch commit.
-
-    :type response: :class:`requests.models.Response`
-    :param response: Response from Cloudsearch /documents/batch API
-
-    :type doc_service: :class:`boto.cloudsearch.document.DocumentServiceConnection`
-    :param doc_service: Object containing the documents posted and methods to
-        retry
-
-    :raises: :class:`boto.exception.BotoServerError`
-    :raises: :class:`boto.cloudsearch.document.SearchServiceException`
-    :raises: :class:`boto.cloudsearch.document.EncodingError`
-    :raises: :class:`boto.cloudsearch.document.ContentTooLongError`
-    """
-    def __init__(self, response, doc_service, sdf):
-        self.response = response
-        self.doc_service = doc_service
-        self.sdf = sdf
-
-        _body = response.content.decode('utf-8')
-
-        try:
-            self.content = json.loads(_body)
-        except:
-            boto.log.error('Error indexing documents.\nResponse Content:\n{0}\n\n'
-                'SDF:\n{1}'.format(_body, self.sdf))
-            raise boto.exception.BotoServerError(self.response.status_code, '',
-                body=_body)
-
-        self.status = self.content['status']
-        if self.status == 'error':
-            self.errors = [e.get('message') for e in self.content.get('errors',
-                [])]
-            for e in self.errors:
-                if "Illegal Unicode character" in e:
-                    raise EncodingError("Illegal Unicode character in document")
-                elif e == "The Content-Length is too long":
-                    raise ContentTooLongError("Content was too long")
-            if 'adds' not in self.content or 'deletes' not in self.content:
-                raise SearchServiceException("Error indexing documents"
-                    " => %s" % self.content.get('message', ''))
-        else:
-            self.errors = []
-
-        self.adds = self.content['adds']
-        self.deletes = self.content['deletes']
-        self._check_num_ops('add', self.adds)
-        self._check_num_ops('delete', self.deletes)
-
-    def _check_num_ops(self, type_, response_num):
-        """Raise exception if number of ops in response doesn't match commit
-
-        :type type_: str
-        :param type_: Type of commit operation: 'add' or 'delete'
-
-        :type response_num: int
-        :param response_num: Number of adds or deletes in the response.
-
-        :raises: :class:`boto.cloudsearch.document.CommitMismatchError`
-        """
-        commit_num = len([d for d in self.doc_service.documents_batch
-            if d['type'] == type_])
-
-        if response_num != commit_num:
-            raise CommitMismatchError(
-                'Incorrect number of {0}s returned. Commit: {1} Response: {2}'\
-                .format(type_, commit_num, response_num))