Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/cloudsearch/search.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | |
2 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | |
3 # All Rights Reserved | |
4 # | |
5 # Permission is hereby granted, free of charge, to any person obtaining a | |
6 # copy of this software and associated documentation files (the | |
7 # "Software"), to deal in the Software without restriction, including | |
8 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
9 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
10 # persons to whom the Software is furnished to do so, subject to the fol- | |
11 # lowing conditions: | |
12 # | |
13 # The above copyright notice and this permission notice shall be included | |
14 # in all copies or substantial portions of the Software. | |
15 # | |
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
22 # IN THE SOFTWARE. | |
23 # | |
24 from math import ceil | |
25 from boto.compat import json, map, six | |
26 import requests | |
27 | |
28 | |
29 class SearchServiceException(Exception): | |
30 pass | |
31 | |
32 | |
33 class CommitMismatchError(Exception): | |
34 pass | |
35 | |
36 | |
37 class SearchResults(object): | |
38 def __init__(self, **attrs): | |
39 self.rid = attrs['info']['rid'] | |
40 # self.doc_coverage_pct = attrs['info']['doc-coverage-pct'] | |
41 self.cpu_time_ms = attrs['info']['cpu-time-ms'] | |
42 self.time_ms = attrs['info']['time-ms'] | |
43 self.hits = attrs['hits']['found'] | |
44 self.docs = attrs['hits']['hit'] | |
45 self.start = attrs['hits']['start'] | |
46 self.rank = attrs['rank'] | |
47 self.match_expression = attrs['match-expr'] | |
48 self.query = attrs['query'] | |
49 self.search_service = attrs['search_service'] | |
50 | |
51 self.facets = {} | |
52 if 'facets' in attrs: | |
53 for (facet, values) in attrs['facets'].items(): | |
54 if 'constraints' in values: | |
55 self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values['constraints'])) | |
56 | |
57 self.num_pages_needed = ceil(self.hits / self.query.real_size) | |
58 | |
59 def __len__(self): | |
60 return len(self.docs) | |
61 | |
62 def __iter__(self): | |
63 return iter(self.docs) | |
64 | |
65 def next_page(self): | |
66 """Call Cloudsearch to get the next page of search results | |
67 | |
68 :rtype: :class:`boto.cloudsearch.search.SearchResults` | |
69 :return: the following page of search results | |
70 """ | |
71 if self.query.page <= self.num_pages_needed: | |
72 self.query.start += self.query.real_size | |
73 self.query.page += 1 | |
74 return self.search_service(self.query) | |
75 else: | |
76 raise StopIteration | |
77 | |
78 | |
79 class Query(object): | |
80 | |
81 RESULTS_PER_PAGE = 500 | |
82 | |
83 def __init__(self, q=None, bq=None, rank=None, | |
84 return_fields=None, size=10, | |
85 start=0, facet=None, facet_constraints=None, | |
86 facet_sort=None, facet_top_n=None, t=None): | |
87 | |
88 self.q = q | |
89 self.bq = bq | |
90 self.rank = rank or [] | |
91 self.return_fields = return_fields or [] | |
92 self.start = start | |
93 self.facet = facet or [] | |
94 self.facet_constraints = facet_constraints or {} | |
95 self.facet_sort = facet_sort or {} | |
96 self.facet_top_n = facet_top_n or {} | |
97 self.t = t or {} | |
98 self.page = 0 | |
99 self.update_size(size) | |
100 | |
101 def update_size(self, new_size): | |
102 self.size = new_size | |
103 self.real_size = Query.RESULTS_PER_PAGE if (self.size > | |
104 Query.RESULTS_PER_PAGE or self.size == 0) else self.size | |
105 | |
106 def to_params(self): | |
107 """Transform search parameters from instance properties to a dictionary | |
108 | |
109 :rtype: dict | |
110 :return: search parameters | |
111 """ | |
112 params = {'start': self.start, 'size': self.real_size} | |
113 | |
114 if self.q: | |
115 params['q'] = self.q | |
116 | |
117 if self.bq: | |
118 params['bq'] = self.bq | |
119 | |
120 if self.rank: | |
121 params['rank'] = ','.join(self.rank) | |
122 | |
123 if self.return_fields: | |
124 params['return-fields'] = ','.join(self.return_fields) | |
125 | |
126 if self.facet: | |
127 params['facet'] = ','.join(self.facet) | |
128 | |
129 if self.facet_constraints: | |
130 for k, v in six.iteritems(self.facet_constraints): | |
131 params['facet-%s-constraints' % k] = v | |
132 | |
133 if self.facet_sort: | |
134 for k, v in six.iteritems(self.facet_sort): | |
135 params['facet-%s-sort' % k] = v | |
136 | |
137 if self.facet_top_n: | |
138 for k, v in six.iteritems(self.facet_top_n): | |
139 params['facet-%s-top-n' % k] = v | |
140 | |
141 if self.t: | |
142 for k, v in six.iteritems(self.t): | |
143 params['t-%s' % k] = v | |
144 return params | |
145 | |
146 | |
147 class SearchConnection(object): | |
148 | |
149 def __init__(self, domain=None, endpoint=None): | |
150 self.domain = domain | |
151 self.endpoint = endpoint | |
152 if not endpoint: | |
153 self.endpoint = domain.search_service_endpoint | |
154 | |
155 def build_query(self, q=None, bq=None, rank=None, return_fields=None, | |
156 size=10, start=0, facet=None, facet_constraints=None, | |
157 facet_sort=None, facet_top_n=None, t=None): | |
158 return Query(q=q, bq=bq, rank=rank, return_fields=return_fields, | |
159 size=size, start=start, facet=facet, | |
160 facet_constraints=facet_constraints, | |
161 facet_sort=facet_sort, facet_top_n=facet_top_n, t=t) | |
162 | |
163 def search(self, q=None, bq=None, rank=None, return_fields=None, | |
164 size=10, start=0, facet=None, facet_constraints=None, | |
165 facet_sort=None, facet_top_n=None, t=None): | |
166 """ | |
167 Send a query to CloudSearch | |
168 | |
169 Each search query should use at least the q or bq argument to specify | |
170 the search parameter. The other options are used to specify the | |
171 criteria of the search. | |
172 | |
173 :type q: string | |
174 :param q: A string to search the default search fields for. | |
175 | |
176 :type bq: string | |
177 :param bq: A string to perform a Boolean search. This can be used to | |
178 create advanced searches. | |
179 | |
180 :type rank: List of strings | |
181 :param rank: A list of fields or rank expressions used to order the | |
182 search results. A field can be reversed by using the - operator. | |
183 ``['-year', 'author']`` | |
184 | |
185 :type return_fields: List of strings | |
186 :param return_fields: A list of fields which should be returned by the | |
187 search. If this field is not specified, only IDs will be returned. | |
188 ``['headline']`` | |
189 | |
190 :type size: int | |
191 :param size: Number of search results to specify | |
192 | |
193 :type start: int | |
194 :param start: Offset of the first search result to return (can be used | |
195 for paging) | |
196 | |
197 :type facet: list | |
198 :param facet: List of fields for which facets should be returned | |
199 ``['colour', 'size']`` | |
200 | |
201 :type facet_constraints: dict | |
202 :param facet_constraints: Use to limit facets to specific values | |
203 specified as comma-delimited strings in a Dictionary of facets | |
204 ``{'colour': "'blue','white','red'", 'size': "big"}`` | |
205 | |
206 :type facet_sort: dict | |
207 :param facet_sort: Rules used to specify the order in which facet | |
208 values should be returned. Allowed values are *alpha*, *count*, | |
209 *max*, *sum*. Use *alpha* to sort alphabetical, and *count* to sort | |
210 the facet by number of available result. | |
211 ``{'color': 'alpha', 'size': 'count'}`` | |
212 | |
213 :type facet_top_n: dict | |
214 :param facet_top_n: Dictionary of facets and number of facets to | |
215 return. | |
216 ``{'colour': 2}`` | |
217 | |
218 :type t: dict | |
219 :param t: Specify ranges for specific fields | |
220 ``{'year': '2000..2005'}`` | |
221 | |
222 :rtype: :class:`boto.cloudsearch.search.SearchResults` | |
223 :return: Returns the results of this search | |
224 | |
225 The following examples all assume we have indexed a set of documents | |
226 with fields: *author*, *date*, *headline* | |
227 | |
228 A simple search will look for documents whose default text search | |
229 fields will contain the search word exactly: | |
230 | |
231 >>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy) | |
232 | |
233 A simple search with more keywords will return documents whose default | |
234 text search fields contain the search strings together or separately. | |
235 | |
236 >>> search(q='Tim apple') # Will match "tim" and "apple" | |
237 | |
238 More complex searches require the boolean search operator. | |
239 | |
240 Wildcard searches can be used to search for any words that start with | |
241 the search string. | |
242 | |
243 >>> search(bq="'Tim*'") # Return documents with words like Tim or Timothy) | |
244 | |
245 Search terms can also be combined. Allowed operators are "and", "or", | |
246 "not", "field", "optional", "token", "phrase", or "filter" | |
247 | |
248 >>> search(bq="(and 'Tim' (field author 'John Smith'))") | |
249 | |
250 Facets allow you to show classification information about the search | |
251 results. For example, you can retrieve the authors who have written | |
252 about Tim: | |
253 | |
254 >>> search(q='Tim', facet=['Author']) | |
255 | |
256 With facet_constraints, facet_top_n and facet_sort more complicated | |
257 constraints can be specified such as returning the top author out of | |
258 John Smith and Mark Smith who have a document with the word Tim in it. | |
259 | |
260 >>> search(q='Tim', | |
261 ... facet=['Author'], | |
262 ... facet_constraints={'author': "'John Smith','Mark Smith'"}, | |
263 ... facet=['author'], | |
264 ... facet_top_n={'author': 1}, | |
265 ... facet_sort={'author': 'count'}) | |
266 """ | |
267 | |
268 query = self.build_query(q=q, bq=bq, rank=rank, | |
269 return_fields=return_fields, | |
270 size=size, start=start, facet=facet, | |
271 facet_constraints=facet_constraints, | |
272 facet_sort=facet_sort, | |
273 facet_top_n=facet_top_n, t=t) | |
274 return self(query) | |
275 | |
276 def __call__(self, query): | |
277 """Make a call to CloudSearch | |
278 | |
279 :type query: :class:`boto.cloudsearch.search.Query` | |
280 :param query: A group of search criteria | |
281 | |
282 :rtype: :class:`boto.cloudsearch.search.SearchResults` | |
283 :return: search results | |
284 """ | |
285 url = "http://%s/2011-02-01/search" % (self.endpoint) | |
286 params = query.to_params() | |
287 | |
288 r = requests.get(url, params=params) | |
289 body = r.content.decode('utf-8') | |
290 try: | |
291 data = json.loads(body) | |
292 except ValueError as e: | |
293 if r.status_code == 403: | |
294 msg = '' | |
295 import re | |
296 g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', body) | |
297 try: | |
298 msg = ': %s' % (g.groups()[0].strip()) | |
299 except AttributeError: | |
300 pass | |
301 raise SearchServiceException('Authentication error from Amazon%s' % msg) | |
302 raise SearchServiceException("Got non-json response from Amazon. %s" % body, query) | |
303 | |
304 if 'messages' in data and 'error' in data: | |
305 for m in data['messages']: | |
306 if m['severity'] == 'fatal': | |
307 raise SearchServiceException("Error processing search %s " | |
308 "=> %s" % (params, m['message']), query) | |
309 elif 'error' in data: | |
310 raise SearchServiceException("Unknown error processing search %s" | |
311 % json.dumps(data), query) | |
312 | |
313 data['query'] = query | |
314 data['search_service'] = self | |
315 | |
316 return SearchResults(**data) | |
317 | |
318 def get_all_paged(self, query, per_page): | |
319 """Get a generator to iterate over all pages of search results | |
320 | |
321 :type query: :class:`boto.cloudsearch.search.Query` | |
322 :param query: A group of search criteria | |
323 | |
324 :type per_page: int | |
325 :param per_page: Number of docs in each :class:`boto.cloudsearch.search.SearchResults` object. | |
326 | |
327 :rtype: generator | |
328 :return: Generator containing :class:`boto.cloudsearch.search.SearchResults` | |
329 """ | |
330 query.update_size(per_page) | |
331 page = 0 | |
332 num_pages_needed = 0 | |
333 while page <= num_pages_needed: | |
334 results = self(query) | |
335 num_pages_needed = results.num_pages_needed | |
336 yield results | |
337 query.start += query.real_size | |
338 page += 1 | |
339 | |
340 def get_all_hits(self, query): | |
341 """Get a generator to iterate over all search results | |
342 | |
343 Transparently handles the results paging from Cloudsearch | |
344 search results so even if you have many thousands of results | |
345 you can iterate over all results in a reasonably efficient | |
346 manner. | |
347 | |
348 :type query: :class:`boto.cloudsearch.search.Query` | |
349 :param query: A group of search criteria | |
350 | |
351 :rtype: generator | |
352 :return: All docs matching query | |
353 """ | |
354 page = 0 | |
355 num_pages_needed = 0 | |
356 while page <= num_pages_needed: | |
357 results = self(query) | |
358 num_pages_needed = results.num_pages_needed | |
359 for doc in results: | |
360 yield doc | |
361 query.start += query.real_size | |
362 page += 1 | |
363 | |
364 def get_num_hits(self, query): | |
365 """Return the total number of hits for query | |
366 | |
367 :type query: :class:`boto.cloudsearch.search.Query` | |
368 :param query: a group of search criteria | |
369 | |
370 :rtype: int | |
371 :return: Total number of hits for query | |
372 """ | |
373 query.update_size(1) | |
374 return self(query).hits | |
375 | |
376 | |
377 |