Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/boto/cloudsearch2/search.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 # Copyright (c) 2014 Amazon.com, Inc. or its affiliates. | |
2 # All Rights Reserved | |
3 # | |
4 # Permission is hereby granted, free of charge, to any person obtaining a | |
5 # copy of this software and associated documentation files (the | |
6 # "Software"), to deal in the Software without restriction, including | |
7 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
8 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
9 # persons to whom the Software is furnished to do so, subject to the fol- | |
10 # lowing conditions: | |
11 # | |
12 # The above copyright notice and this permission notice shall be included | |
13 # in all copies or substantial portions of the Software. | |
14 # | |
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 # IN THE SOFTWARE. | |
22 # | |
23 from math import ceil | |
24 from boto.compat import json, map, six | |
25 import requests | |
26 from boto.cloudsearchdomain.layer1 import CloudSearchDomainConnection | |
27 | |
28 SIMPLE = 'simple' | |
29 STRUCTURED = 'structured' | |
30 LUCENE = 'lucene' | |
31 DISMAX = 'dismax' | |
32 | |
33 | |
34 class SearchServiceException(Exception): | |
35 pass | |
36 | |
37 | |
38 class SearchResults(object): | |
39 def __init__(self, **attrs): | |
40 self.rid = attrs['status']['rid'] | |
41 self.time_ms = attrs['status']['time-ms'] | |
42 self.hits = attrs['hits']['found'] | |
43 self.docs = attrs['hits']['hit'] | |
44 self.start = attrs['hits']['start'] | |
45 self.query = attrs['query'] | |
46 self.search_service = attrs['search_service'] | |
47 | |
48 self.facets = {} | |
49 if 'facets' in attrs: | |
50 for (facet, values) in attrs['facets'].items(): | |
51 if 'buckets' in values: | |
52 self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values.get('buckets', []))) | |
53 | |
54 self.num_pages_needed = ceil(self.hits / self.query.real_size) | |
55 | |
56 def __len__(self): | |
57 return len(self.docs) | |
58 | |
59 def __iter__(self): | |
60 return iter(self.docs) | |
61 | |
62 def next_page(self): | |
63 """Call Cloudsearch to get the next page of search results | |
64 | |
65 :rtype: :class:`boto.cloudsearch2.search.SearchResults` | |
66 :return: the following page of search results | |
67 """ | |
68 if self.query.page <= self.num_pages_needed: | |
69 self.query.start += self.query.real_size | |
70 self.query.page += 1 | |
71 return self.search_service(self.query) | |
72 else: | |
73 raise StopIteration | |
74 | |
75 | |
76 class Query(object): | |
77 | |
78 RESULTS_PER_PAGE = 500 | |
79 | |
80 def __init__(self, q=None, parser=None, fq=None, expr=None, | |
81 return_fields=None, size=10, start=0, sort=None, | |
82 facet=None, highlight=None, partial=None, options=None): | |
83 | |
84 self.q = q | |
85 self.parser = parser | |
86 self.fq = fq | |
87 self.expr = expr or {} | |
88 self.sort = sort or [] | |
89 self.return_fields = return_fields or [] | |
90 self.start = start | |
91 self.facet = facet or {} | |
92 self.highlight = highlight or {} | |
93 self.partial = partial | |
94 self.options = options | |
95 self.page = 0 | |
96 self.update_size(size) | |
97 | |
98 def update_size(self, new_size): | |
99 self.size = new_size | |
100 self.real_size = Query.RESULTS_PER_PAGE if (self.size > | |
101 Query.RESULTS_PER_PAGE or self.size == 0) else self.size | |
102 | |
103 def to_params(self): | |
104 """Transform search parameters from instance properties to a dictionary | |
105 | |
106 :rtype: dict | |
107 :return: search parameters | |
108 """ | |
109 params = {'start': self.start, 'size': self.real_size} | |
110 | |
111 if self.q: | |
112 params['q'] = self.q | |
113 | |
114 if self.parser: | |
115 params['q.parser'] = self.parser | |
116 | |
117 if self.fq: | |
118 params['fq'] = self.fq | |
119 | |
120 if self.expr: | |
121 for k, v in six.iteritems(self.expr): | |
122 params['expr.%s' % k] = v | |
123 | |
124 if self.facet: | |
125 for k, v in six.iteritems(self.facet): | |
126 if not isinstance(v, six.string_types): | |
127 v = json.dumps(v) | |
128 params['facet.%s' % k] = v | |
129 | |
130 if self.highlight: | |
131 for k, v in six.iteritems(self.highlight): | |
132 params['highlight.%s' % k] = v | |
133 | |
134 if self.options: | |
135 params['q.options'] = self.options | |
136 | |
137 if self.return_fields: | |
138 params['return'] = ','.join(self.return_fields) | |
139 | |
140 if self.partial is not None: | |
141 params['partial'] = self.partial | |
142 | |
143 if self.sort: | |
144 params['sort'] = ','.join(self.sort) | |
145 | |
146 return params | |
147 | |
148 def to_domain_connection_params(self): | |
149 """ | |
150 Transform search parameters from instance properties to a dictionary | |
151 that CloudSearchDomainConnection can accept | |
152 | |
153 :rtype: dict | |
154 :return: search parameters | |
155 """ | |
156 params = {'start': self.start, 'size': self.real_size} | |
157 | |
158 if self.q: | |
159 params['q'] = self.q | |
160 | |
161 if self.parser: | |
162 params['query_parser'] = self.parser | |
163 | |
164 if self.fq: | |
165 params['filter_query'] = self.fq | |
166 | |
167 if self.expr: | |
168 expr = {} | |
169 for k, v in six.iteritems(self.expr): | |
170 expr['expr.%s' % k] = v | |
171 | |
172 params['expr'] = expr | |
173 | |
174 if self.facet: | |
175 facet = {} | |
176 for k, v in six.iteritems(self.facet): | |
177 if not isinstance(v, six.string_types): | |
178 v = json.dumps(v) | |
179 facet['facet.%s' % k] = v | |
180 | |
181 params['facet'] = facet | |
182 | |
183 if self.highlight: | |
184 highlight = {} | |
185 for k, v in six.iteritems(self.highlight): | |
186 highlight['highlight.%s' % k] = v | |
187 | |
188 params['highlight'] = highlight | |
189 | |
190 if self.options: | |
191 params['query_options'] = self.options | |
192 | |
193 if self.return_fields: | |
194 params['ret'] = ','.join(self.return_fields) | |
195 | |
196 if self.partial is not None: | |
197 params['partial'] = self.partial | |
198 | |
199 if self.sort: | |
200 params['sort'] = ','.join(self.sort) | |
201 | |
202 return params | |
203 | |
204 | |
205 class SearchConnection(object): | |
206 | |
207 def __init__(self, domain=None, endpoint=None): | |
208 self.domain = domain | |
209 self.endpoint = endpoint | |
210 self.session = requests.Session() | |
211 | |
212 # Endpoint needs to be set before initializing CloudSearchDomainConnection | |
213 if not endpoint: | |
214 self.endpoint = domain.search_service_endpoint | |
215 | |
216 # Copy proxy settings from connection and check if request should be signed | |
217 self.sign_request = False | |
218 if self.domain and self.domain.layer1: | |
219 if self.domain.layer1.use_proxy: | |
220 self.session.proxies['http'] = self.domain.layer1.get_proxy_url_with_auth() | |
221 | |
222 self.sign_request = getattr(self.domain.layer1, 'sign_request', False) | |
223 | |
224 if self.sign_request: | |
225 layer1 = self.domain.layer1 | |
226 self.domain_connection = CloudSearchDomainConnection( | |
227 host=self.endpoint, | |
228 aws_access_key_id=layer1.aws_access_key_id, | |
229 aws_secret_access_key=layer1.aws_secret_access_key, | |
230 region=layer1.region, | |
231 provider=layer1.provider | |
232 ) | |
233 | |
234 def build_query(self, q=None, parser=None, fq=None, rank=None, return_fields=None, | |
235 size=10, start=0, facet=None, highlight=None, sort=None, | |
236 partial=None, options=None): | |
237 return Query(q=q, parser=parser, fq=fq, expr=rank, return_fields=return_fields, | |
238 size=size, start=start, facet=facet, highlight=highlight, | |
239 sort=sort, partial=partial, options=options) | |
240 | |
241 def search(self, q=None, parser=None, fq=None, rank=None, return_fields=None, | |
242 size=10, start=0, facet=None, highlight=None, sort=None, partial=None, | |
243 options=None): | |
244 """ | |
245 Send a query to CloudSearch | |
246 | |
247 Each search query should use at least the q or bq argument to specify | |
248 the search parameter. The other options are used to specify the | |
249 criteria of the search. | |
250 | |
251 :type q: string | |
252 :param q: A string to search the default search fields for. | |
253 | |
254 :type parser: string | |
255 :param parser: The parser to use. 'simple', 'structured', 'lucene', 'dismax' | |
256 | |
257 :type fq: string | |
258 :param fq: The filter query to use. | |
259 | |
260 :type sort: List of strings | |
261 :param sort: A list of fields or rank expressions used to order the | |
262 search results. Order is handled by adding 'desc' or 'asc' after the field name. | |
263 ``['year desc', 'author asc']`` | |
264 | |
265 :type return_fields: List of strings | |
266 :param return_fields: A list of fields which should be returned by the | |
267 search. If this field is not specified, only IDs will be returned. | |
268 ``['headline']`` | |
269 | |
270 :type size: int | |
271 :param size: Number of search results to specify | |
272 | |
273 :type start: int | |
274 :param start: Offset of the first search result to return (can be used | |
275 for paging) | |
276 | |
277 :type facet: dict | |
278 :param facet: Dictionary of fields for which facets should be returned | |
279 The facet value is string of JSON options | |
280 ``{'year': '{sort:"bucket", size:3}', 'genres': '{buckets:["Action","Adventure","Sci-Fi"]}'}`` | |
281 | |
282 :type highlight: dict | |
283 :param highlight: Dictionary of fields for which highlights should be returned | |
284 The facet value is string of JSON options | |
285 ``{'genres': '{format:'text',max_phrases:2,pre_tag:'<b>',post_tag:'</b>'}'}`` | |
286 | |
287 :type partial: bool | |
288 :param partial: Should partial results from a partioned service be returned if | |
289 one or more index partitions are unreachable. | |
290 | |
291 :type options: str | |
292 :param options: Options for the query parser specified in *parser*. | |
293 Specified as a string in JSON format. | |
294 ``{fields: ['title^5', 'description']}`` | |
295 | |
296 :rtype: :class:`boto.cloudsearch2.search.SearchResults` | |
297 :return: Returns the results of this search | |
298 | |
299 The following examples all assume we have indexed a set of documents | |
300 with fields: *author*, *date*, *headline* | |
301 | |
302 A simple search will look for documents whose default text search | |
303 fields will contain the search word exactly: | |
304 | |
305 >>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy) | |
306 | |
307 A simple search with more keywords will return documents whose default | |
308 text search fields contain the search strings together or separately. | |
309 | |
310 >>> search(q='Tim apple') # Will match "tim" and "apple" | |
311 | |
312 More complex searches require the boolean search operator. | |
313 | |
314 Wildcard searches can be used to search for any words that start with | |
315 the search string. | |
316 | |
317 >>> search(q="'Tim*'") # Return documents with words like Tim or Timothy) | |
318 | |
319 Search terms can also be combined. Allowed operators are "and", "or", | |
320 "not", "field", "optional", "token", "phrase", or "filter" | |
321 | |
322 >>> search(q="(and 'Tim' (field author 'John Smith'))", parser='structured') | |
323 | |
324 Facets allow you to show classification information about the search | |
325 results. For example, you can retrieve the authors who have written | |
326 about Tim with a max of 3 | |
327 | |
328 >>> search(q='Tim', facet={'Author': '{sort:"bucket", size:3}'}) | |
329 """ | |
330 | |
331 query = self.build_query(q=q, parser=parser, fq=fq, rank=rank, | |
332 return_fields=return_fields, | |
333 size=size, start=start, facet=facet, | |
334 highlight=highlight, sort=sort, | |
335 partial=partial, options=options) | |
336 return self(query) | |
337 | |
338 def _search_with_auth(self, params): | |
339 return self.domain_connection.search(params.pop("q", ""), **params) | |
340 | |
341 def _search_without_auth(self, params, api_version): | |
342 url = "http://%s/%s/search" % (self.endpoint, api_version) | |
343 resp = self.session.get(url, params=params) | |
344 | |
345 return {'body': resp.content.decode('utf-8'), 'status_code': resp.status_code} | |
346 | |
347 def __call__(self, query): | |
348 """Make a call to CloudSearch | |
349 | |
350 :type query: :class:`boto.cloudsearch2.search.Query` | |
351 :param query: A group of search criteria | |
352 | |
353 :rtype: :class:`boto.cloudsearch2.search.SearchResults` | |
354 :return: search results | |
355 """ | |
356 api_version = '2013-01-01' | |
357 if self.domain and self.domain.layer1: | |
358 api_version = self.domain.layer1.APIVersion | |
359 | |
360 if self.sign_request: | |
361 data = self._search_with_auth(query.to_domain_connection_params()) | |
362 else: | |
363 r = self._search_without_auth(query.to_params(), api_version) | |
364 | |
365 _body = r['body'] | |
366 _status_code = r['status_code'] | |
367 | |
368 try: | |
369 data = json.loads(_body) | |
370 except ValueError: | |
371 if _status_code == 403: | |
372 msg = '' | |
373 import re | |
374 g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', _body) | |
375 try: | |
376 msg = ': %s' % (g.groups()[0].strip()) | |
377 except AttributeError: | |
378 pass | |
379 raise SearchServiceException('Authentication error from Amazon%s' % msg) | |
380 raise SearchServiceException("Got non-json response from Amazon. %s" % _body, query) | |
381 | |
382 if 'messages' in data and 'error' in data: | |
383 for m in data['messages']: | |
384 if m['severity'] == 'fatal': | |
385 raise SearchServiceException("Error processing search %s " | |
386 "=> %s" % (params, m['message']), query) | |
387 elif 'error' in data: | |
388 raise SearchServiceException("Unknown error processing search %s" | |
389 % json.dumps(data), query) | |
390 | |
391 data['query'] = query | |
392 data['search_service'] = self | |
393 | |
394 return SearchResults(**data) | |
395 | |
396 def get_all_paged(self, query, per_page): | |
397 """Get a generator to iterate over all pages of search results | |
398 | |
399 :type query: :class:`boto.cloudsearch2.search.Query` | |
400 :param query: A group of search criteria | |
401 | |
402 :type per_page: int | |
403 :param per_page: Number of docs in each :class:`boto.cloudsearch2.search.SearchResults` object. | |
404 | |
405 :rtype: generator | |
406 :return: Generator containing :class:`boto.cloudsearch2.search.SearchResults` | |
407 """ | |
408 query.update_size(per_page) | |
409 page = 0 | |
410 num_pages_needed = 0 | |
411 while page <= num_pages_needed: | |
412 results = self(query) | |
413 num_pages_needed = results.num_pages_needed | |
414 yield results | |
415 query.start += query.real_size | |
416 page += 1 | |
417 | |
418 def get_all_hits(self, query): | |
419 """Get a generator to iterate over all search results | |
420 | |
421 Transparently handles the results paging from Cloudsearch | |
422 search results so even if you have many thousands of results | |
423 you can iterate over all results in a reasonably efficient | |
424 manner. | |
425 | |
426 :type query: :class:`boto.cloudsearch2.search.Query` | |
427 :param query: A group of search criteria | |
428 | |
429 :rtype: generator | |
430 :return: All docs matching query | |
431 """ | |
432 page = 0 | |
433 num_pages_needed = 0 | |
434 while page <= num_pages_needed: | |
435 results = self(query) | |
436 num_pages_needed = results.num_pages_needed | |
437 for doc in results: | |
438 yield doc | |
439 query.start += query.real_size | |
440 page += 1 | |
441 | |
442 def get_num_hits(self, query): | |
443 """Return the total number of hits for query | |
444 | |
445 :type query: :class:`boto.cloudsearch2.search.Query` | |
446 :param query: a group of search criteria | |
447 | |
448 :rtype: int | |
449 :return: Total number of hits for query | |
450 """ | |
451 query.update_size(1) | |
452 return self(query).hits |