comparison env/lib/python3.7/site-packages/boto/cloudsearch/search.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
2 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
3 # All Rights Reserved
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23 #
24 from math import ceil
25 from boto.compat import json, map, six
26 import requests
27
28
29 class SearchServiceException(Exception):
30 pass
31
32
33 class CommitMismatchError(Exception):
34 pass
35
36
37 class SearchResults(object):
38 def __init__(self, **attrs):
39 self.rid = attrs['info']['rid']
40 # self.doc_coverage_pct = attrs['info']['doc-coverage-pct']
41 self.cpu_time_ms = attrs['info']['cpu-time-ms']
42 self.time_ms = attrs['info']['time-ms']
43 self.hits = attrs['hits']['found']
44 self.docs = attrs['hits']['hit']
45 self.start = attrs['hits']['start']
46 self.rank = attrs['rank']
47 self.match_expression = attrs['match-expr']
48 self.query = attrs['query']
49 self.search_service = attrs['search_service']
50
51 self.facets = {}
52 if 'facets' in attrs:
53 for (facet, values) in attrs['facets'].items():
54 if 'constraints' in values:
55 self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values['constraints']))
56
57 self.num_pages_needed = ceil(self.hits / self.query.real_size)
58
59 def __len__(self):
60 return len(self.docs)
61
62 def __iter__(self):
63 return iter(self.docs)
64
65 def next_page(self):
66 """Call Cloudsearch to get the next page of search results
67
68 :rtype: :class:`boto.cloudsearch.search.SearchResults`
69 :return: the following page of search results
70 """
71 if self.query.page <= self.num_pages_needed:
72 self.query.start += self.query.real_size
73 self.query.page += 1
74 return self.search_service(self.query)
75 else:
76 raise StopIteration
77
78
79 class Query(object):
80
81 RESULTS_PER_PAGE = 500
82
83 def __init__(self, q=None, bq=None, rank=None,
84 return_fields=None, size=10,
85 start=0, facet=None, facet_constraints=None,
86 facet_sort=None, facet_top_n=None, t=None):
87
88 self.q = q
89 self.bq = bq
90 self.rank = rank or []
91 self.return_fields = return_fields or []
92 self.start = start
93 self.facet = facet or []
94 self.facet_constraints = facet_constraints or {}
95 self.facet_sort = facet_sort or {}
96 self.facet_top_n = facet_top_n or {}
97 self.t = t or {}
98 self.page = 0
99 self.update_size(size)
100
101 def update_size(self, new_size):
102 self.size = new_size
103 self.real_size = Query.RESULTS_PER_PAGE if (self.size >
104 Query.RESULTS_PER_PAGE or self.size == 0) else self.size
105
106 def to_params(self):
107 """Transform search parameters from instance properties to a dictionary
108
109 :rtype: dict
110 :return: search parameters
111 """
112 params = {'start': self.start, 'size': self.real_size}
113
114 if self.q:
115 params['q'] = self.q
116
117 if self.bq:
118 params['bq'] = self.bq
119
120 if self.rank:
121 params['rank'] = ','.join(self.rank)
122
123 if self.return_fields:
124 params['return-fields'] = ','.join(self.return_fields)
125
126 if self.facet:
127 params['facet'] = ','.join(self.facet)
128
129 if self.facet_constraints:
130 for k, v in six.iteritems(self.facet_constraints):
131 params['facet-%s-constraints' % k] = v
132
133 if self.facet_sort:
134 for k, v in six.iteritems(self.facet_sort):
135 params['facet-%s-sort' % k] = v
136
137 if self.facet_top_n:
138 for k, v in six.iteritems(self.facet_top_n):
139 params['facet-%s-top-n' % k] = v
140
141 if self.t:
142 for k, v in six.iteritems(self.t):
143 params['t-%s' % k] = v
144 return params
145
146
147 class SearchConnection(object):
148
149 def __init__(self, domain=None, endpoint=None):
150 self.domain = domain
151 self.endpoint = endpoint
152 if not endpoint:
153 self.endpoint = domain.search_service_endpoint
154
155 def build_query(self, q=None, bq=None, rank=None, return_fields=None,
156 size=10, start=0, facet=None, facet_constraints=None,
157 facet_sort=None, facet_top_n=None, t=None):
158 return Query(q=q, bq=bq, rank=rank, return_fields=return_fields,
159 size=size, start=start, facet=facet,
160 facet_constraints=facet_constraints,
161 facet_sort=facet_sort, facet_top_n=facet_top_n, t=t)
162
163 def search(self, q=None, bq=None, rank=None, return_fields=None,
164 size=10, start=0, facet=None, facet_constraints=None,
165 facet_sort=None, facet_top_n=None, t=None):
166 """
167 Send a query to CloudSearch
168
169 Each search query should use at least the q or bq argument to specify
170 the search parameter. The other options are used to specify the
171 criteria of the search.
172
173 :type q: string
174 :param q: A string to search the default search fields for.
175
176 :type bq: string
177 :param bq: A string to perform a Boolean search. This can be used to
178 create advanced searches.
179
180 :type rank: List of strings
181 :param rank: A list of fields or rank expressions used to order the
182 search results. A field can be reversed by using the - operator.
183 ``['-year', 'author']``
184
185 :type return_fields: List of strings
186 :param return_fields: A list of fields which should be returned by the
187 search. If this field is not specified, only IDs will be returned.
188 ``['headline']``
189
190 :type size: int
191 :param size: Number of search results to specify
192
193 :type start: int
194 :param start: Offset of the first search result to return (can be used
195 for paging)
196
197 :type facet: list
198 :param facet: List of fields for which facets should be returned
199 ``['colour', 'size']``
200
201 :type facet_constraints: dict
202 :param facet_constraints: Use to limit facets to specific values
203 specified as comma-delimited strings in a Dictionary of facets
204 ``{'colour': "'blue','white','red'", 'size': "big"}``
205
206 :type facet_sort: dict
207 :param facet_sort: Rules used to specify the order in which facet
208 values should be returned. Allowed values are *alpha*, *count*,
209 *max*, *sum*. Use *alpha* to sort alphabetical, and *count* to sort
210 the facet by number of available result.
211 ``{'color': 'alpha', 'size': 'count'}``
212
213 :type facet_top_n: dict
214 :param facet_top_n: Dictionary of facets and number of facets to
215 return.
216 ``{'colour': 2}``
217
218 :type t: dict
219 :param t: Specify ranges for specific fields
220 ``{'year': '2000..2005'}``
221
222 :rtype: :class:`boto.cloudsearch.search.SearchResults`
223 :return: Returns the results of this search
224
225 The following examples all assume we have indexed a set of documents
226 with fields: *author*, *date*, *headline*
227
228 A simple search will look for documents whose default text search
229 fields will contain the search word exactly:
230
231 >>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy)
232
233 A simple search with more keywords will return documents whose default
234 text search fields contain the search strings together or separately.
235
236 >>> search(q='Tim apple') # Will match "tim" and "apple"
237
238 More complex searches require the boolean search operator.
239
240 Wildcard searches can be used to search for any words that start with
241 the search string.
242
243 >>> search(bq="'Tim*'") # Return documents with words like Tim or Timothy)
244
245 Search terms can also be combined. Allowed operators are "and", "or",
246 "not", "field", "optional", "token", "phrase", or "filter"
247
248 >>> search(bq="(and 'Tim' (field author 'John Smith'))")
249
250 Facets allow you to show classification information about the search
251 results. For example, you can retrieve the authors who have written
252 about Tim:
253
254 >>> search(q='Tim', facet=['Author'])
255
256 With facet_constraints, facet_top_n and facet_sort more complicated
257 constraints can be specified such as returning the top author out of
258 John Smith and Mark Smith who have a document with the word Tim in it.
259
260 >>> search(q='Tim',
261 ... facet=['Author'],
262 ... facet_constraints={'author': "'John Smith','Mark Smith'"},
263 ... facet=['author'],
264 ... facet_top_n={'author': 1},
265 ... facet_sort={'author': 'count'})
266 """
267
268 query = self.build_query(q=q, bq=bq, rank=rank,
269 return_fields=return_fields,
270 size=size, start=start, facet=facet,
271 facet_constraints=facet_constraints,
272 facet_sort=facet_sort,
273 facet_top_n=facet_top_n, t=t)
274 return self(query)
275
276 def __call__(self, query):
277 """Make a call to CloudSearch
278
279 :type query: :class:`boto.cloudsearch.search.Query`
280 :param query: A group of search criteria
281
282 :rtype: :class:`boto.cloudsearch.search.SearchResults`
283 :return: search results
284 """
285 url = "http://%s/2011-02-01/search" % (self.endpoint)
286 params = query.to_params()
287
288 r = requests.get(url, params=params)
289 body = r.content.decode('utf-8')
290 try:
291 data = json.loads(body)
292 except ValueError as e:
293 if r.status_code == 403:
294 msg = ''
295 import re
296 g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', body)
297 try:
298 msg = ': %s' % (g.groups()[0].strip())
299 except AttributeError:
300 pass
301 raise SearchServiceException('Authentication error from Amazon%s' % msg)
302 raise SearchServiceException("Got non-json response from Amazon. %s" % body, query)
303
304 if 'messages' in data and 'error' in data:
305 for m in data['messages']:
306 if m['severity'] == 'fatal':
307 raise SearchServiceException("Error processing search %s "
308 "=> %s" % (params, m['message']), query)
309 elif 'error' in data:
310 raise SearchServiceException("Unknown error processing search %s"
311 % json.dumps(data), query)
312
313 data['query'] = query
314 data['search_service'] = self
315
316 return SearchResults(**data)
317
318 def get_all_paged(self, query, per_page):
319 """Get a generator to iterate over all pages of search results
320
321 :type query: :class:`boto.cloudsearch.search.Query`
322 :param query: A group of search criteria
323
324 :type per_page: int
325 :param per_page: Number of docs in each :class:`boto.cloudsearch.search.SearchResults` object.
326
327 :rtype: generator
328 :return: Generator containing :class:`boto.cloudsearch.search.SearchResults`
329 """
330 query.update_size(per_page)
331 page = 0
332 num_pages_needed = 0
333 while page <= num_pages_needed:
334 results = self(query)
335 num_pages_needed = results.num_pages_needed
336 yield results
337 query.start += query.real_size
338 page += 1
339
340 def get_all_hits(self, query):
341 """Get a generator to iterate over all search results
342
343 Transparently handles the results paging from Cloudsearch
344 search results so even if you have many thousands of results
345 you can iterate over all results in a reasonably efficient
346 manner.
347
348 :type query: :class:`boto.cloudsearch.search.Query`
349 :param query: A group of search criteria
350
351 :rtype: generator
352 :return: All docs matching query
353 """
354 page = 0
355 num_pages_needed = 0
356 while page <= num_pages_needed:
357 results = self(query)
358 num_pages_needed = results.num_pages_needed
359 for doc in results:
360 yield doc
361 query.start += query.real_size
362 page += 1
363
364 def get_num_hits(self, query):
365 """Return the total number of hits for query
366
367 :type query: :class:`boto.cloudsearch.search.Query`
368 :param query: a group of search criteria
369
370 :rtype: int
371 :return: Total number of hits for query
372 """
373 query.update_size(1)
374 return self(query).hits
375
376
377