Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boto/cloudsearch2/search.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/boto/cloudsearch2/search.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,452 +0,0 @@ -# Copyright (c) 2014 Amazon.com, Inc. or its affiliates. -# All Rights Reserved -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, dis- -# tribute, sublicense, and/or sell copies of the Software, and to permit -# persons to whom the Software is furnished to do so, subject to the fol- -# lowing conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- -# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -from math import ceil -from boto.compat import json, map, six -import requests -from boto.cloudsearchdomain.layer1 import CloudSearchDomainConnection - -SIMPLE = 'simple' -STRUCTURED = 'structured' -LUCENE = 'lucene' -DISMAX = 'dismax' - - -class SearchServiceException(Exception): - pass - - -class SearchResults(object): - def __init__(self, **attrs): - self.rid = attrs['status']['rid'] - self.time_ms = attrs['status']['time-ms'] - self.hits = attrs['hits']['found'] - self.docs = attrs['hits']['hit'] - self.start = attrs['hits']['start'] - self.query = attrs['query'] - self.search_service = attrs['search_service'] - - self.facets = {} - if 'facets' in attrs: - for (facet, values) in attrs['facets'].items(): - if 'buckets' in values: - self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values.get('buckets', []))) - - self.num_pages_needed = ceil(self.hits / self.query.real_size) - - def __len__(self): - return len(self.docs) - - def __iter__(self): - return iter(self.docs) - - def next_page(self): - """Call Cloudsearch to get the next page of search results - - :rtype: :class:`boto.cloudsearch2.search.SearchResults` - :return: the following page of search results - """ - if self.query.page <= self.num_pages_needed: - self.query.start += self.query.real_size - self.query.page += 1 - return self.search_service(self.query) - else: - raise StopIteration - - -class Query(object): - - RESULTS_PER_PAGE = 500 - - def __init__(self, q=None, parser=None, fq=None, expr=None, - return_fields=None, size=10, start=0, sort=None, - facet=None, highlight=None, partial=None, options=None): - - self.q = q - self.parser = parser - self.fq = fq - self.expr = expr or {} - self.sort = sort or [] - self.return_fields = return_fields or [] - self.start = start - self.facet = facet or {} - self.highlight = highlight or {} - self.partial = partial - self.options = options - self.page = 0 - self.update_size(size) - - def update_size(self, new_size): - self.size = new_size - self.real_size = Query.RESULTS_PER_PAGE if (self.size > - Query.RESULTS_PER_PAGE or self.size == 0) else self.size - - def to_params(self): - """Transform search parameters from instance properties to a dictionary - - :rtype: dict - :return: search parameters - """ - params = {'start': self.start, 'size': self.real_size} - - if self.q: - params['q'] = self.q - - if self.parser: - params['q.parser'] = self.parser - - if self.fq: - params['fq'] = self.fq - - if self.expr: - for k, v in six.iteritems(self.expr): - params['expr.%s' % k] = v - - if self.facet: - for k, v in six.iteritems(self.facet): - if not isinstance(v, six.string_types): - v = json.dumps(v) - params['facet.%s' % k] = v - - if self.highlight: - for k, v in six.iteritems(self.highlight): - params['highlight.%s' % k] = v - - if self.options: - params['q.options'] = self.options - - if self.return_fields: - params['return'] = ','.join(self.return_fields) - - if self.partial is not None: - params['partial'] = self.partial - - if self.sort: - params['sort'] = ','.join(self.sort) - - return params - - def to_domain_connection_params(self): - """ - Transform search parameters from instance properties to a dictionary - that CloudSearchDomainConnection can accept - - :rtype: dict - :return: search parameters - """ - params = {'start': self.start, 'size': self.real_size} - - if self.q: - params['q'] = self.q - - if self.parser: - params['query_parser'] = self.parser - - if self.fq: - params['filter_query'] = self.fq - - if self.expr: - expr = {} - for k, v in six.iteritems(self.expr): - expr['expr.%s' % k] = v - - params['expr'] = expr - - if self.facet: - facet = {} - for k, v in six.iteritems(self.facet): - if not isinstance(v, six.string_types): - v = json.dumps(v) - facet['facet.%s' % k] = v - - params['facet'] = facet - - if self.highlight: - highlight = {} - for k, v in six.iteritems(self.highlight): - highlight['highlight.%s' % k] = v - - params['highlight'] = highlight - - if self.options: - params['query_options'] = self.options - - if self.return_fields: - params['ret'] = ','.join(self.return_fields) - - if self.partial is not None: - params['partial'] = self.partial - - if self.sort: - params['sort'] = ','.join(self.sort) - - return params - - -class SearchConnection(object): - - def __init__(self, domain=None, endpoint=None): - self.domain = domain - self.endpoint = endpoint - self.session = requests.Session() - - # Endpoint needs to be set before initializing CloudSearchDomainConnection - if not endpoint: - self.endpoint = domain.search_service_endpoint - - # Copy proxy settings from connection and check if request should be signed - self.sign_request = False - if self.domain and self.domain.layer1: - if self.domain.layer1.use_proxy: - self.session.proxies['http'] = self.domain.layer1.get_proxy_url_with_auth() - - self.sign_request = getattr(self.domain.layer1, 'sign_request', False) - - if self.sign_request: - layer1 = self.domain.layer1 - self.domain_connection = CloudSearchDomainConnection( - host=self.endpoint, - aws_access_key_id=layer1.aws_access_key_id, - aws_secret_access_key=layer1.aws_secret_access_key, - region=layer1.region, - provider=layer1.provider - ) - - def build_query(self, q=None, parser=None, fq=None, rank=None, return_fields=None, - size=10, start=0, facet=None, highlight=None, sort=None, - partial=None, options=None): - return Query(q=q, parser=parser, fq=fq, expr=rank, return_fields=return_fields, - size=size, start=start, facet=facet, highlight=highlight, - sort=sort, partial=partial, options=options) - - def search(self, q=None, parser=None, fq=None, rank=None, return_fields=None, - size=10, start=0, facet=None, highlight=None, sort=None, partial=None, - options=None): - """ - Send a query to CloudSearch - - Each search query should use at least the q or bq argument to specify - the search parameter. The other options are used to specify the - criteria of the search. - - :type q: string - :param q: A string to search the default search fields for. - - :type parser: string - :param parser: The parser to use. 'simple', 'structured', 'lucene', 'dismax' - - :type fq: string - :param fq: The filter query to use. - - :type sort: List of strings - :param sort: A list of fields or rank expressions used to order the - search results. Order is handled by adding 'desc' or 'asc' after the field name. - ``['year desc', 'author asc']`` - - :type return_fields: List of strings - :param return_fields: A list of fields which should be returned by the - search. If this field is not specified, only IDs will be returned. - ``['headline']`` - - :type size: int - :param size: Number of search results to specify - - :type start: int - :param start: Offset of the first search result to return (can be used - for paging) - - :type facet: dict - :param facet: Dictionary of fields for which facets should be returned - The facet value is string of JSON options - ``{'year': '{sort:"bucket", size:3}', 'genres': '{buckets:["Action","Adventure","Sci-Fi"]}'}`` - - :type highlight: dict - :param highlight: Dictionary of fields for which highlights should be returned - The facet value is string of JSON options - ``{'genres': '{format:'text',max_phrases:2,pre_tag:'<b>',post_tag:'</b>'}'}`` - - :type partial: bool - :param partial: Should partial results from a partioned service be returned if - one or more index partitions are unreachable. - - :type options: str - :param options: Options for the query parser specified in *parser*. - Specified as a string in JSON format. - ``{fields: ['title^5', 'description']}`` - - :rtype: :class:`boto.cloudsearch2.search.SearchResults` - :return: Returns the results of this search - - The following examples all assume we have indexed a set of documents - with fields: *author*, *date*, *headline* - - A simple search will look for documents whose default text search - fields will contain the search word exactly: - - >>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy) - - A simple search with more keywords will return documents whose default - text search fields contain the search strings together or separately. - - >>> search(q='Tim apple') # Will match "tim" and "apple" - - More complex searches require the boolean search operator. - - Wildcard searches can be used to search for any words that start with - the search string. - - >>> search(q="'Tim*'") # Return documents with words like Tim or Timothy) - - Search terms can also be combined. Allowed operators are "and", "or", - "not", "field", "optional", "token", "phrase", or "filter" - - >>> search(q="(and 'Tim' (field author 'John Smith'))", parser='structured') - - Facets allow you to show classification information about the search - results. For example, you can retrieve the authors who have written - about Tim with a max of 3 - - >>> search(q='Tim', facet={'Author': '{sort:"bucket", size:3}'}) - """ - - query = self.build_query(q=q, parser=parser, fq=fq, rank=rank, - return_fields=return_fields, - size=size, start=start, facet=facet, - highlight=highlight, sort=sort, - partial=partial, options=options) - return self(query) - - def _search_with_auth(self, params): - return self.domain_connection.search(params.pop("q", ""), **params) - - def _search_without_auth(self, params, api_version): - url = "http://%s/%s/search" % (self.endpoint, api_version) - resp = self.session.get(url, params=params) - - return {'body': resp.content.decode('utf-8'), 'status_code': resp.status_code} - - def __call__(self, query): - """Make a call to CloudSearch - - :type query: :class:`boto.cloudsearch2.search.Query` - :param query: A group of search criteria - - :rtype: :class:`boto.cloudsearch2.search.SearchResults` - :return: search results - """ - api_version = '2013-01-01' - if self.domain and self.domain.layer1: - api_version = self.domain.layer1.APIVersion - - if self.sign_request: - data = self._search_with_auth(query.to_domain_connection_params()) - else: - r = self._search_without_auth(query.to_params(), api_version) - - _body = r['body'] - _status_code = r['status_code'] - - try: - data = json.loads(_body) - except ValueError: - if _status_code == 403: - msg = '' - import re - g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', _body) - try: - msg = ': %s' % (g.groups()[0].strip()) - except AttributeError: - pass - raise SearchServiceException('Authentication error from Amazon%s' % msg) - raise SearchServiceException("Got non-json response from Amazon. %s" % _body, query) - - if 'messages' in data and 'error' in data: - for m in data['messages']: - if m['severity'] == 'fatal': - raise SearchServiceException("Error processing search %s " - "=> %s" % (params, m['message']), query) - elif 'error' in data: - raise SearchServiceException("Unknown error processing search %s" - % json.dumps(data), query) - - data['query'] = query - data['search_service'] = self - - return SearchResults(**data) - - def get_all_paged(self, query, per_page): - """Get a generator to iterate over all pages of search results - - :type query: :class:`boto.cloudsearch2.search.Query` - :param query: A group of search criteria - - :type per_page: int - :param per_page: Number of docs in each :class:`boto.cloudsearch2.search.SearchResults` object. - - :rtype: generator - :return: Generator containing :class:`boto.cloudsearch2.search.SearchResults` - """ - query.update_size(per_page) - page = 0 - num_pages_needed = 0 - while page <= num_pages_needed: - results = self(query) - num_pages_needed = results.num_pages_needed - yield results - query.start += query.real_size - page += 1 - - def get_all_hits(self, query): - """Get a generator to iterate over all search results - - Transparently handles the results paging from Cloudsearch - search results so even if you have many thousands of results - you can iterate over all results in a reasonably efficient - manner. - - :type query: :class:`boto.cloudsearch2.search.Query` - :param query: A group of search criteria - - :rtype: generator - :return: All docs matching query - """ - page = 0 - num_pages_needed = 0 - while page <= num_pages_needed: - results = self(query) - num_pages_needed = results.num_pages_needed - for doc in results: - yield doc - query.start += query.real_size - page += 1 - - def get_num_hits(self, query): - """Return the total number of hits for query - - :type query: :class:`boto.cloudsearch2.search.Query` - :param query: a group of search criteria - - :rtype: int - :return: Total number of hits for query - """ - query.update_size(1) - return self(query).hits