Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/cloudsearch2/search.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
| author | shellac |
|---|---|
| date | Thu, 14 May 2020 14:56:58 -0400 |
| parents | 26e78fe6e8c4 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:75ca89e9b81c | 2:6af9afd405e9 |
|---|---|
| 1 # Copyright (c) 2014 Amazon.com, Inc. or its affiliates. | |
| 2 # All Rights Reserved | |
| 3 # | |
| 4 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 5 # copy of this software and associated documentation files (the | |
| 6 # "Software"), to deal in the Software without restriction, including | |
| 7 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 8 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 9 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 10 # lowing conditions: | |
| 11 # | |
| 12 # The above copyright notice and this permission notice shall be included | |
| 13 # in all copies or substantial portions of the Software. | |
| 14 # | |
| 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 21 # IN THE SOFTWARE. | |
| 22 # | |
| 23 from math import ceil | |
| 24 from boto.compat import json, map, six | |
| 25 import requests | |
| 26 from boto.cloudsearchdomain.layer1 import CloudSearchDomainConnection | |
| 27 | |
| 28 SIMPLE = 'simple' | |
| 29 STRUCTURED = 'structured' | |
| 30 LUCENE = 'lucene' | |
| 31 DISMAX = 'dismax' | |
| 32 | |
| 33 | |
| 34 class SearchServiceException(Exception): | |
| 35 pass | |
| 36 | |
| 37 | |
| 38 class SearchResults(object): | |
| 39 def __init__(self, **attrs): | |
| 40 self.rid = attrs['status']['rid'] | |
| 41 self.time_ms = attrs['status']['time-ms'] | |
| 42 self.hits = attrs['hits']['found'] | |
| 43 self.docs = attrs['hits']['hit'] | |
| 44 self.start = attrs['hits']['start'] | |
| 45 self.query = attrs['query'] | |
| 46 self.search_service = attrs['search_service'] | |
| 47 | |
| 48 self.facets = {} | |
| 49 if 'facets' in attrs: | |
| 50 for (facet, values) in attrs['facets'].items(): | |
| 51 if 'buckets' in values: | |
| 52 self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values.get('buckets', []))) | |
| 53 | |
| 54 self.num_pages_needed = ceil(self.hits / self.query.real_size) | |
| 55 | |
| 56 def __len__(self): | |
| 57 return len(self.docs) | |
| 58 | |
| 59 def __iter__(self): | |
| 60 return iter(self.docs) | |
| 61 | |
| 62 def next_page(self): | |
| 63 """Call Cloudsearch to get the next page of search results | |
| 64 | |
| 65 :rtype: :class:`boto.cloudsearch2.search.SearchResults` | |
| 66 :return: the following page of search results | |
| 67 """ | |
| 68 if self.query.page <= self.num_pages_needed: | |
| 69 self.query.start += self.query.real_size | |
| 70 self.query.page += 1 | |
| 71 return self.search_service(self.query) | |
| 72 else: | |
| 73 raise StopIteration | |
| 74 | |
| 75 | |
| 76 class Query(object): | |
| 77 | |
| 78 RESULTS_PER_PAGE = 500 | |
| 79 | |
| 80 def __init__(self, q=None, parser=None, fq=None, expr=None, | |
| 81 return_fields=None, size=10, start=0, sort=None, | |
| 82 facet=None, highlight=None, partial=None, options=None): | |
| 83 | |
| 84 self.q = q | |
| 85 self.parser = parser | |
| 86 self.fq = fq | |
| 87 self.expr = expr or {} | |
| 88 self.sort = sort or [] | |
| 89 self.return_fields = return_fields or [] | |
| 90 self.start = start | |
| 91 self.facet = facet or {} | |
| 92 self.highlight = highlight or {} | |
| 93 self.partial = partial | |
| 94 self.options = options | |
| 95 self.page = 0 | |
| 96 self.update_size(size) | |
| 97 | |
| 98 def update_size(self, new_size): | |
| 99 self.size = new_size | |
| 100 self.real_size = Query.RESULTS_PER_PAGE if (self.size > | |
| 101 Query.RESULTS_PER_PAGE or self.size == 0) else self.size | |
| 102 | |
| 103 def to_params(self): | |
| 104 """Transform search parameters from instance properties to a dictionary | |
| 105 | |
| 106 :rtype: dict | |
| 107 :return: search parameters | |
| 108 """ | |
| 109 params = {'start': self.start, 'size': self.real_size} | |
| 110 | |
| 111 if self.q: | |
| 112 params['q'] = self.q | |
| 113 | |
| 114 if self.parser: | |
| 115 params['q.parser'] = self.parser | |
| 116 | |
| 117 if self.fq: | |
| 118 params['fq'] = self.fq | |
| 119 | |
| 120 if self.expr: | |
| 121 for k, v in six.iteritems(self.expr): | |
| 122 params['expr.%s' % k] = v | |
| 123 | |
| 124 if self.facet: | |
| 125 for k, v in six.iteritems(self.facet): | |
| 126 if not isinstance(v, six.string_types): | |
| 127 v = json.dumps(v) | |
| 128 params['facet.%s' % k] = v | |
| 129 | |
| 130 if self.highlight: | |
| 131 for k, v in six.iteritems(self.highlight): | |
| 132 params['highlight.%s' % k] = v | |
| 133 | |
| 134 if self.options: | |
| 135 params['q.options'] = self.options | |
| 136 | |
| 137 if self.return_fields: | |
| 138 params['return'] = ','.join(self.return_fields) | |
| 139 | |
| 140 if self.partial is not None: | |
| 141 params['partial'] = self.partial | |
| 142 | |
| 143 if self.sort: | |
| 144 params['sort'] = ','.join(self.sort) | |
| 145 | |
| 146 return params | |
| 147 | |
| 148 def to_domain_connection_params(self): | |
| 149 """ | |
| 150 Transform search parameters from instance properties to a dictionary | |
| 151 that CloudSearchDomainConnection can accept | |
| 152 | |
| 153 :rtype: dict | |
| 154 :return: search parameters | |
| 155 """ | |
| 156 params = {'start': self.start, 'size': self.real_size} | |
| 157 | |
| 158 if self.q: | |
| 159 params['q'] = self.q | |
| 160 | |
| 161 if self.parser: | |
| 162 params['query_parser'] = self.parser | |
| 163 | |
| 164 if self.fq: | |
| 165 params['filter_query'] = self.fq | |
| 166 | |
| 167 if self.expr: | |
| 168 expr = {} | |
| 169 for k, v in six.iteritems(self.expr): | |
| 170 expr['expr.%s' % k] = v | |
| 171 | |
| 172 params['expr'] = expr | |
| 173 | |
| 174 if self.facet: | |
| 175 facet = {} | |
| 176 for k, v in six.iteritems(self.facet): | |
| 177 if not isinstance(v, six.string_types): | |
| 178 v = json.dumps(v) | |
| 179 facet['facet.%s' % k] = v | |
| 180 | |
| 181 params['facet'] = facet | |
| 182 | |
| 183 if self.highlight: | |
| 184 highlight = {} | |
| 185 for k, v in six.iteritems(self.highlight): | |
| 186 highlight['highlight.%s' % k] = v | |
| 187 | |
| 188 params['highlight'] = highlight | |
| 189 | |
| 190 if self.options: | |
| 191 params['query_options'] = self.options | |
| 192 | |
| 193 if self.return_fields: | |
| 194 params['ret'] = ','.join(self.return_fields) | |
| 195 | |
| 196 if self.partial is not None: | |
| 197 params['partial'] = self.partial | |
| 198 | |
| 199 if self.sort: | |
| 200 params['sort'] = ','.join(self.sort) | |
| 201 | |
| 202 return params | |
| 203 | |
| 204 | |
| 205 class SearchConnection(object): | |
| 206 | |
| 207 def __init__(self, domain=None, endpoint=None): | |
| 208 self.domain = domain | |
| 209 self.endpoint = endpoint | |
| 210 self.session = requests.Session() | |
| 211 | |
| 212 # Endpoint needs to be set before initializing CloudSearchDomainConnection | |
| 213 if not endpoint: | |
| 214 self.endpoint = domain.search_service_endpoint | |
| 215 | |
| 216 # Copy proxy settings from connection and check if request should be signed | |
| 217 self.sign_request = False | |
| 218 if self.domain and self.domain.layer1: | |
| 219 if self.domain.layer1.use_proxy: | |
| 220 self.session.proxies['http'] = self.domain.layer1.get_proxy_url_with_auth() | |
| 221 | |
| 222 self.sign_request = getattr(self.domain.layer1, 'sign_request', False) | |
| 223 | |
| 224 if self.sign_request: | |
| 225 layer1 = self.domain.layer1 | |
| 226 self.domain_connection = CloudSearchDomainConnection( | |
| 227 host=self.endpoint, | |
| 228 aws_access_key_id=layer1.aws_access_key_id, | |
| 229 aws_secret_access_key=layer1.aws_secret_access_key, | |
| 230 region=layer1.region, | |
| 231 provider=layer1.provider | |
| 232 ) | |
| 233 | |
| 234 def build_query(self, q=None, parser=None, fq=None, rank=None, return_fields=None, | |
| 235 size=10, start=0, facet=None, highlight=None, sort=None, | |
| 236 partial=None, options=None): | |
| 237 return Query(q=q, parser=parser, fq=fq, expr=rank, return_fields=return_fields, | |
| 238 size=size, start=start, facet=facet, highlight=highlight, | |
| 239 sort=sort, partial=partial, options=options) | |
| 240 | |
| 241 def search(self, q=None, parser=None, fq=None, rank=None, return_fields=None, | |
| 242 size=10, start=0, facet=None, highlight=None, sort=None, partial=None, | |
| 243 options=None): | |
| 244 """ | |
| 245 Send a query to CloudSearch | |
| 246 | |
| 247 Each search query should use at least the q or bq argument to specify | |
| 248 the search parameter. The other options are used to specify the | |
| 249 criteria of the search. | |
| 250 | |
| 251 :type q: string | |
| 252 :param q: A string to search the default search fields for. | |
| 253 | |
| 254 :type parser: string | |
| 255 :param parser: The parser to use. 'simple', 'structured', 'lucene', 'dismax' | |
| 256 | |
| 257 :type fq: string | |
| 258 :param fq: The filter query to use. | |
| 259 | |
| 260 :type sort: List of strings | |
| 261 :param sort: A list of fields or rank expressions used to order the | |
| 262 search results. Order is handled by adding 'desc' or 'asc' after the field name. | |
| 263 ``['year desc', 'author asc']`` | |
| 264 | |
| 265 :type return_fields: List of strings | |
| 266 :param return_fields: A list of fields which should be returned by the | |
| 267 search. If this field is not specified, only IDs will be returned. | |
| 268 ``['headline']`` | |
| 269 | |
| 270 :type size: int | |
| 271 :param size: Number of search results to specify | |
| 272 | |
| 273 :type start: int | |
| 274 :param start: Offset of the first search result to return (can be used | |
| 275 for paging) | |
| 276 | |
| 277 :type facet: dict | |
| 278 :param facet: Dictionary of fields for which facets should be returned | |
| 279 The facet value is string of JSON options | |
| 280 ``{'year': '{sort:"bucket", size:3}', 'genres': '{buckets:["Action","Adventure","Sci-Fi"]}'}`` | |
| 281 | |
| 282 :type highlight: dict | |
| 283 :param highlight: Dictionary of fields for which highlights should be returned | |
| 284 The facet value is string of JSON options | |
| 285 ``{'genres': '{format:'text',max_phrases:2,pre_tag:'<b>',post_tag:'</b>'}'}`` | |
| 286 | |
| 287 :type partial: bool | |
| 288 :param partial: Should partial results from a partioned service be returned if | |
| 289 one or more index partitions are unreachable. | |
| 290 | |
| 291 :type options: str | |
| 292 :param options: Options for the query parser specified in *parser*. | |
| 293 Specified as a string in JSON format. | |
| 294 ``{fields: ['title^5', 'description']}`` | |
| 295 | |
| 296 :rtype: :class:`boto.cloudsearch2.search.SearchResults` | |
| 297 :return: Returns the results of this search | |
| 298 | |
| 299 The following examples all assume we have indexed a set of documents | |
| 300 with fields: *author*, *date*, *headline* | |
| 301 | |
| 302 A simple search will look for documents whose default text search | |
| 303 fields will contain the search word exactly: | |
| 304 | |
| 305 >>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy) | |
| 306 | |
| 307 A simple search with more keywords will return documents whose default | |
| 308 text search fields contain the search strings together or separately. | |
| 309 | |
| 310 >>> search(q='Tim apple') # Will match "tim" and "apple" | |
| 311 | |
| 312 More complex searches require the boolean search operator. | |
| 313 | |
| 314 Wildcard searches can be used to search for any words that start with | |
| 315 the search string. | |
| 316 | |
| 317 >>> search(q="'Tim*'") # Return documents with words like Tim or Timothy) | |
| 318 | |
| 319 Search terms can also be combined. Allowed operators are "and", "or", | |
| 320 "not", "field", "optional", "token", "phrase", or "filter" | |
| 321 | |
| 322 >>> search(q="(and 'Tim' (field author 'John Smith'))", parser='structured') | |
| 323 | |
| 324 Facets allow you to show classification information about the search | |
| 325 results. For example, you can retrieve the authors who have written | |
| 326 about Tim with a max of 3 | |
| 327 | |
| 328 >>> search(q='Tim', facet={'Author': '{sort:"bucket", size:3}'}) | |
| 329 """ | |
| 330 | |
| 331 query = self.build_query(q=q, parser=parser, fq=fq, rank=rank, | |
| 332 return_fields=return_fields, | |
| 333 size=size, start=start, facet=facet, | |
| 334 highlight=highlight, sort=sort, | |
| 335 partial=partial, options=options) | |
| 336 return self(query) | |
| 337 | |
| 338 def _search_with_auth(self, params): | |
| 339 return self.domain_connection.search(params.pop("q", ""), **params) | |
| 340 | |
| 341 def _search_without_auth(self, params, api_version): | |
| 342 url = "http://%s/%s/search" % (self.endpoint, api_version) | |
| 343 resp = self.session.get(url, params=params) | |
| 344 | |
| 345 return {'body': resp.content.decode('utf-8'), 'status_code': resp.status_code} | |
| 346 | |
| 347 def __call__(self, query): | |
| 348 """Make a call to CloudSearch | |
| 349 | |
| 350 :type query: :class:`boto.cloudsearch2.search.Query` | |
| 351 :param query: A group of search criteria | |
| 352 | |
| 353 :rtype: :class:`boto.cloudsearch2.search.SearchResults` | |
| 354 :return: search results | |
| 355 """ | |
| 356 api_version = '2013-01-01' | |
| 357 if self.domain and self.domain.layer1: | |
| 358 api_version = self.domain.layer1.APIVersion | |
| 359 | |
| 360 if self.sign_request: | |
| 361 data = self._search_with_auth(query.to_domain_connection_params()) | |
| 362 else: | |
| 363 r = self._search_without_auth(query.to_params(), api_version) | |
| 364 | |
| 365 _body = r['body'] | |
| 366 _status_code = r['status_code'] | |
| 367 | |
| 368 try: | |
| 369 data = json.loads(_body) | |
| 370 except ValueError: | |
| 371 if _status_code == 403: | |
| 372 msg = '' | |
| 373 import re | |
| 374 g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', _body) | |
| 375 try: | |
| 376 msg = ': %s' % (g.groups()[0].strip()) | |
| 377 except AttributeError: | |
| 378 pass | |
| 379 raise SearchServiceException('Authentication error from Amazon%s' % msg) | |
| 380 raise SearchServiceException("Got non-json response from Amazon. %s" % _body, query) | |
| 381 | |
| 382 if 'messages' in data and 'error' in data: | |
| 383 for m in data['messages']: | |
| 384 if m['severity'] == 'fatal': | |
| 385 raise SearchServiceException("Error processing search %s " | |
| 386 "=> %s" % (params, m['message']), query) | |
| 387 elif 'error' in data: | |
| 388 raise SearchServiceException("Unknown error processing search %s" | |
| 389 % json.dumps(data), query) | |
| 390 | |
| 391 data['query'] = query | |
| 392 data['search_service'] = self | |
| 393 | |
| 394 return SearchResults(**data) | |
| 395 | |
| 396 def get_all_paged(self, query, per_page): | |
| 397 """Get a generator to iterate over all pages of search results | |
| 398 | |
| 399 :type query: :class:`boto.cloudsearch2.search.Query` | |
| 400 :param query: A group of search criteria | |
| 401 | |
| 402 :type per_page: int | |
| 403 :param per_page: Number of docs in each :class:`boto.cloudsearch2.search.SearchResults` object. | |
| 404 | |
| 405 :rtype: generator | |
| 406 :return: Generator containing :class:`boto.cloudsearch2.search.SearchResults` | |
| 407 """ | |
| 408 query.update_size(per_page) | |
| 409 page = 0 | |
| 410 num_pages_needed = 0 | |
| 411 while page <= num_pages_needed: | |
| 412 results = self(query) | |
| 413 num_pages_needed = results.num_pages_needed | |
| 414 yield results | |
| 415 query.start += query.real_size | |
| 416 page += 1 | |
| 417 | |
| 418 def get_all_hits(self, query): | |
| 419 """Get a generator to iterate over all search results | |
| 420 | |
| 421 Transparently handles the results paging from Cloudsearch | |
| 422 search results so even if you have many thousands of results | |
| 423 you can iterate over all results in a reasonably efficient | |
| 424 manner. | |
| 425 | |
| 426 :type query: :class:`boto.cloudsearch2.search.Query` | |
| 427 :param query: A group of search criteria | |
| 428 | |
| 429 :rtype: generator | |
| 430 :return: All docs matching query | |
| 431 """ | |
| 432 page = 0 | |
| 433 num_pages_needed = 0 | |
| 434 while page <= num_pages_needed: | |
| 435 results = self(query) | |
| 436 num_pages_needed = results.num_pages_needed | |
| 437 for doc in results: | |
| 438 yield doc | |
| 439 query.start += query.real_size | |
| 440 page += 1 | |
| 441 | |
| 442 def get_num_hits(self, query): | |
| 443 """Return the total number of hits for query | |
| 444 | |
| 445 :type query: :class:`boto.cloudsearch2.search.Query` | |
| 446 :param query: a group of search criteria | |
| 447 | |
| 448 :rtype: int | |
| 449 :return: Total number of hits for query | |
| 450 """ | |
| 451 query.update_size(1) | |
| 452 return self(query).hits |
