springsuite: planemo/lib/python3.7/site-packages/boto/cloudsearch2/search.py comparison

comparison planemo/lib/python3.7/site-packages/boto/cloudsearch2/search.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"

author	guerler
date	Fri, 31 Jul 2020 00:18:57 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:d30785e31577
+# Copyright (c) 2014 Amazon.com, Inc. or its affiliates.
+# All Rights Reserved
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+from math import ceil
+from boto.compat import json, map, six
+import requests
+from boto.cloudsearchdomain.layer1 import CloudSearchDomainConnection
+SIMPLE = 'simple'
+STRUCTURED = 'structured'
+LUCENE = 'lucene'
+DISMAX = 'dismax'
+class SearchServiceException(Exception):
+pass
+class SearchResults(object):
+def __init__(self, **attrs):
+self.rid = attrs['status']['rid']
+self.time_ms = attrs['status']['time-ms']
+self.hits = attrs['hits']['found']
+self.docs = attrs['hits']['hit']
+self.start = attrs['hits']['start']
+self.query = attrs['query']
+self.search_service = attrs['search_service']
+self.facets = {}
+if 'facets' in attrs:
+for (facet, values) in attrs['facets'].items():
+if 'buckets' in values:
+self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values.get('buckets', [])))
+self.num_pages_needed = ceil(self.hits / self.query.real_size)
+def __len__(self):
+return len(self.docs)
+def __iter__(self):
+return iter(self.docs)
+def next_page(self):
+"""Call Cloudsearch to get the next page of search results
+:rtype: :class:`boto.cloudsearch2.search.SearchResults`
+:return: the following page of search results
+"""
+if self.query.page <= self.num_pages_needed:
+self.query.start += self.query.real_size
+self.query.page += 1
+return self.search_service(self.query)
+else:
+raise StopIteration
+class Query(object):
+RESULTS_PER_PAGE = 500
+def __init__(self, q=None, parser=None, fq=None, expr=None,
+return_fields=None, size=10, start=0, sort=None,
+facet=None, highlight=None, partial=None, options=None):
+self.q = q
+self.parser = parser
+self.fq = fq
+self.expr = expr or {}
+self.sort = sort or []
+self.return_fields = return_fields or []
+self.start = start
+self.facet = facet or {}
+self.highlight = highlight or {}
+self.partial = partial
+self.options = options
+self.page = 0
+self.update_size(size)
+def update_size(self, new_size):
+self.size = new_size
+self.real_size = Query.RESULTS_PER_PAGE if (self.size >
+Query.RESULTS_PER_PAGE or self.size == 0) else self.size
+def to_params(self):
+"""Transform search parameters from instance properties to a dictionary
+:rtype: dict
+:return: search parameters
+"""
+params = {'start': self.start, 'size': self.real_size}
+if self.q:
+params['q'] = self.q
+if self.parser:
+params['q.parser'] = self.parser
+if self.fq:
+params['fq'] = self.fq
+if self.expr:
+for k, v in six.iteritems(self.expr):
+params['expr.%s' % k] = v
+if self.facet:
+for k, v in six.iteritems(self.facet):
+if not isinstance(v, six.string_types):
+v = json.dumps(v)
+params['facet.%s' % k] = v
+if self.highlight:
+for k, v in six.iteritems(self.highlight):
+params['highlight.%s' % k] = v
+if self.options:
+params['q.options'] = self.options
+if self.return_fields:
+params['return'] = ','.join(self.return_fields)
+if self.partial is not None:
+params['partial'] = self.partial
+if self.sort:
+params['sort'] = ','.join(self.sort)
+return params
+def to_domain_connection_params(self):
+"""
+Transform search parameters from instance properties to a dictionary
+that CloudSearchDomainConnection can accept
+:rtype: dict
+:return: search parameters
+"""
+params = {'start': self.start, 'size': self.real_size}
+if self.q:
+params['q'] = self.q
+if self.parser:
+params['query_parser'] = self.parser
+if self.fq:
+params['filter_query'] = self.fq
+if self.expr:
+expr = {}
+for k, v in six.iteritems(self.expr):
+expr['expr.%s' % k] = v
+params['expr'] = expr
+if self.facet:
+facet = {}
+for k, v in six.iteritems(self.facet):
+if not isinstance(v, six.string_types):
+v = json.dumps(v)
+facet['facet.%s' % k] = v
+params['facet'] = facet
+if self.highlight:
+highlight = {}
+for k, v in six.iteritems(self.highlight):
+highlight['highlight.%s' % k] = v
+params['highlight'] = highlight
+if self.options:
+params['query_options'] = self.options
+if self.return_fields:
+params['ret'] = ','.join(self.return_fields)
+if self.partial is not None:
+params['partial'] = self.partial
+if self.sort:
+params['sort'] = ','.join(self.sort)
+return params
+class SearchConnection(object):
+def __init__(self, domain=None, endpoint=None):
+self.domain = domain
+self.endpoint = endpoint
+self.session = requests.Session()
+# Endpoint needs to be set before initializing CloudSearchDomainConnection
+if not endpoint:
+self.endpoint = domain.search_service_endpoint
+# Copy proxy settings from connection and check if request should be signed
+self.sign_request = False
+if self.domain and self.domain.layer1:
+if self.domain.layer1.use_proxy:
+self.session.proxies['http'] = self.domain.layer1.get_proxy_url_with_auth()
+self.sign_request = getattr(self.domain.layer1, 'sign_request', False)
+if self.sign_request:
+layer1 = self.domain.layer1
+self.domain_connection = CloudSearchDomainConnection(
+host=self.endpoint,
+aws_access_key_id=layer1.aws_access_key_id,
+aws_secret_access_key=layer1.aws_secret_access_key,
+region=layer1.region,
+provider=layer1.provider
+)
+def build_query(self, q=None, parser=None, fq=None, rank=None, return_fields=None,
+size=10, start=0, facet=None, highlight=None, sort=None,
+partial=None, options=None):
+return Query(q=q, parser=parser, fq=fq, expr=rank, return_fields=return_fields,
+size=size, start=start, facet=facet, highlight=highlight,
+sort=sort, partial=partial, options=options)
+def search(self, q=None, parser=None, fq=None, rank=None, return_fields=None,
+size=10, start=0, facet=None, highlight=None, sort=None, partial=None,
+options=None):
+"""
+Send a query to CloudSearch
+Each search query should use at least the q or bq argument to specify
+the search parameter. The other options are used to specify the
+criteria of the search.
+:type q: string
+:param q: A string to search the default search fields for.
+:type parser: string
+:param parser: The parser to use. 'simple', 'structured', 'lucene', 'dismax'
+:type fq: string
+:param fq: The filter query to use.
+:type sort: List of strings
+:param sort: A list of fields or rank expressions used to order the
+search results. Order is handled by adding 'desc' or 'asc' after the field name.
+``['year desc', 'author asc']``
+:type return_fields: List of strings
+:param return_fields: A list of fields which should be returned by the
+search. If this field is not specified, only IDs will be returned.
+``['headline']``
+:type size: int
+:param size: Number of search results to specify
+:type start: int
+:param start: Offset of the first search result to return (can be used
+for paging)
+:type facet: dict
+:param facet: Dictionary of fields for which facets should be returned
+The facet value is string of JSON options
+``{'year': '{sort:"bucket", size:3}', 'genres': '{buckets:["Action","Adventure","Sci-Fi"]}'}``
+:type highlight: dict
+:param highlight: Dictionary of fields for which highlights should be returned
+The facet value is string of JSON options
+``{'genres': '{format:'text',max_phrases:2,pre_tag:'<b>',post_tag:'</b>'}'}``
+:type partial: bool
+:param partial: Should partial results from a partioned service be returned if
+one or more index partitions are unreachable.
+:type options: str
+:param options: Options for the query parser specified in *parser*.
+Specified as a string in JSON format.
+``{fields: ['title^5', 'description']}``
+:rtype: :class:`boto.cloudsearch2.search.SearchResults`
+:return: Returns the results of this search
+The following examples all assume we have indexed a set of documents
+with fields: *author*, *date*, *headline*
+A simple search will look for documents whose default text search
+fields will contain the search word exactly:
+>>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy)
+A simple search with more keywords will return documents whose default
+text search fields contain the search strings together or separately.
+>>> search(q='Tim apple') # Will match "tim" and "apple"
+More complex searches require the boolean search operator.
+Wildcard searches can be used to search for any words that start with
+the search string.
+>>> search(q="'Tim*'") # Return documents with words like Tim or Timothy)
+Search terms can also be combined. Allowed operators are "and", "or",
+"not", "field", "optional", "token", "phrase", or "filter"
+>>> search(q="(and 'Tim' (field author 'John Smith'))", parser='structured')
+Facets allow you to show classification information about the search
+results. For example, you can retrieve the authors who have written
+about Tim with a max of 3
+>>> search(q='Tim', facet={'Author': '{sort:"bucket", size:3}'})
+"""
+query = self.build_query(q=q, parser=parser, fq=fq, rank=rank,
+return_fields=return_fields,
+size=size, start=start, facet=facet,
+highlight=highlight, sort=sort,
+partial=partial, options=options)
+return self(query)
+def _search_with_auth(self, params):
+return self.domain_connection.search(params.pop("q", ""), **params)
+def _search_without_auth(self, params, api_version):
+url = "http://%s/%s/search" % (self.endpoint, api_version)
+resp = self.session.get(url, params=params)
+return {'body': resp.content.decode('utf-8'), 'status_code': resp.status_code}
+def __call__(self, query):
+"""Make a call to CloudSearch
+:type query: :class:`boto.cloudsearch2.search.Query`
+:param query: A group of search criteria
+:rtype: :class:`boto.cloudsearch2.search.SearchResults`
+:return: search results
+"""
+api_version = '2013-01-01'
+if self.domain and self.domain.layer1:
+api_version = self.domain.layer1.APIVersion
+if self.sign_request:
+data = self._search_with_auth(query.to_domain_connection_params())
+else:
+r = self._search_without_auth(query.to_params(), api_version)
+_body = r['body']
+_status_code = r['status_code']
+try:
+data = json.loads(_body)
+except ValueError:
+if _status_code == 403:
+msg = ''
+import re
+g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', _body)
+try:
+msg = ': %s' % (g.groups()[0].strip())
+except AttributeError:
+pass
+raise SearchServiceException('Authentication error from Amazon%s' % msg)
+raise SearchServiceException("Got non-json response from Amazon. %s" % _body, query)
+if 'messages' in data and 'error' in data:
+for m in data['messages']:
+if m['severity'] == 'fatal':
+raise SearchServiceException("Error processing search %s "
+"=> %s" % (params, m['message']), query)
+elif 'error' in data:
+raise SearchServiceException("Unknown error processing search %s"
+% json.dumps(data), query)
+data['query'] = query
+data['search_service'] = self
+return SearchResults(**data)
+def get_all_paged(self, query, per_page):
+"""Get a generator to iterate over all pages of search results
+:type query: :class:`boto.cloudsearch2.search.Query`
+:param query: A group of search criteria
+:type per_page: int
+:param per_page: Number of docs in each :class:`boto.cloudsearch2.search.SearchResults` object.
+:rtype: generator
+:return: Generator containing :class:`boto.cloudsearch2.search.SearchResults`
+"""
+query.update_size(per_page)
+page = 0
+num_pages_needed = 0
+while page <= num_pages_needed:
+results = self(query)
+num_pages_needed = results.num_pages_needed
+yield results
+query.start += query.real_size
+page += 1
+def get_all_hits(self, query):
+"""Get a generator to iterate over all search results
+Transparently handles the results paging from Cloudsearch
+search results so even if you have many thousands of results
+you can iterate over all results in a reasonably efficient
+manner.
+:type query: :class:`boto.cloudsearch2.search.Query`
+:param query: A group of search criteria
+:rtype: generator
+:return: All docs matching query
+"""
+page = 0
+num_pages_needed = 0
+while page <= num_pages_needed:
+results = self(query)
+num_pages_needed = results.num_pages_needed
+for doc in results:
+yield doc
+query.start += query.real_size
+page += 1
+def get_num_hits(self, query):
+"""Return the total number of hits for query
+:type query: :class:`boto.cloudsearch2.search.Query`
+:param query: a group of search criteria
+:rtype: int
+:return: Total number of hits for query
+"""
+query.update_size(1)
+return self(query).hits

Mercurial > repos > guerler > springsuite

comparison planemo/lib/python3.7/site-packages/boto/cloudsearch2/search.py @ 0:d30785e31577 draft