Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/cloudsearch/search.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | |
| 2 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. | |
| 3 # All Rights Reserved | |
| 4 # | |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 6 # copy of this software and associated documentation files (the | |
| 7 # "Software"), to deal in the Software without restriction, including | |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 11 # lowing conditions: | |
| 12 # | |
| 13 # The above copyright notice and this permission notice shall be included | |
| 14 # in all copies or substantial portions of the Software. | |
| 15 # | |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 22 # IN THE SOFTWARE. | |
| 23 # | |
| 24 from math import ceil | |
| 25 from boto.compat import json, map, six | |
| 26 import requests | |
| 27 | |
| 28 | |
| 29 class SearchServiceException(Exception): | |
| 30 pass | |
| 31 | |
| 32 | |
| 33 class CommitMismatchError(Exception): | |
| 34 pass | |
| 35 | |
| 36 | |
| 37 class SearchResults(object): | |
| 38 def __init__(self, **attrs): | |
| 39 self.rid = attrs['info']['rid'] | |
| 40 # self.doc_coverage_pct = attrs['info']['doc-coverage-pct'] | |
| 41 self.cpu_time_ms = attrs['info']['cpu-time-ms'] | |
| 42 self.time_ms = attrs['info']['time-ms'] | |
| 43 self.hits = attrs['hits']['found'] | |
| 44 self.docs = attrs['hits']['hit'] | |
| 45 self.start = attrs['hits']['start'] | |
| 46 self.rank = attrs['rank'] | |
| 47 self.match_expression = attrs['match-expr'] | |
| 48 self.query = attrs['query'] | |
| 49 self.search_service = attrs['search_service'] | |
| 50 | |
| 51 self.facets = {} | |
| 52 if 'facets' in attrs: | |
| 53 for (facet, values) in attrs['facets'].items(): | |
| 54 if 'constraints' in values: | |
| 55 self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values['constraints'])) | |
| 56 | |
| 57 self.num_pages_needed = ceil(self.hits / self.query.real_size) | |
| 58 | |
| 59 def __len__(self): | |
| 60 return len(self.docs) | |
| 61 | |
| 62 def __iter__(self): | |
| 63 return iter(self.docs) | |
| 64 | |
| 65 def next_page(self): | |
| 66 """Call Cloudsearch to get the next page of search results | |
| 67 | |
| 68 :rtype: :class:`boto.cloudsearch.search.SearchResults` | |
| 69 :return: the following page of search results | |
| 70 """ | |
| 71 if self.query.page <= self.num_pages_needed: | |
| 72 self.query.start += self.query.real_size | |
| 73 self.query.page += 1 | |
| 74 return self.search_service(self.query) | |
| 75 else: | |
| 76 raise StopIteration | |
| 77 | |
| 78 | |
| 79 class Query(object): | |
| 80 | |
| 81 RESULTS_PER_PAGE = 500 | |
| 82 | |
| 83 def __init__(self, q=None, bq=None, rank=None, | |
| 84 return_fields=None, size=10, | |
| 85 start=0, facet=None, facet_constraints=None, | |
| 86 facet_sort=None, facet_top_n=None, t=None): | |
| 87 | |
| 88 self.q = q | |
| 89 self.bq = bq | |
| 90 self.rank = rank or [] | |
| 91 self.return_fields = return_fields or [] | |
| 92 self.start = start | |
| 93 self.facet = facet or [] | |
| 94 self.facet_constraints = facet_constraints or {} | |
| 95 self.facet_sort = facet_sort or {} | |
| 96 self.facet_top_n = facet_top_n or {} | |
| 97 self.t = t or {} | |
| 98 self.page = 0 | |
| 99 self.update_size(size) | |
| 100 | |
| 101 def update_size(self, new_size): | |
| 102 self.size = new_size | |
| 103 self.real_size = Query.RESULTS_PER_PAGE if (self.size > | |
| 104 Query.RESULTS_PER_PAGE or self.size == 0) else self.size | |
| 105 | |
| 106 def to_params(self): | |
| 107 """Transform search parameters from instance properties to a dictionary | |
| 108 | |
| 109 :rtype: dict | |
| 110 :return: search parameters | |
| 111 """ | |
| 112 params = {'start': self.start, 'size': self.real_size} | |
| 113 | |
| 114 if self.q: | |
| 115 params['q'] = self.q | |
| 116 | |
| 117 if self.bq: | |
| 118 params['bq'] = self.bq | |
| 119 | |
| 120 if self.rank: | |
| 121 params['rank'] = ','.join(self.rank) | |
| 122 | |
| 123 if self.return_fields: | |
| 124 params['return-fields'] = ','.join(self.return_fields) | |
| 125 | |
| 126 if self.facet: | |
| 127 params['facet'] = ','.join(self.facet) | |
| 128 | |
| 129 if self.facet_constraints: | |
| 130 for k, v in six.iteritems(self.facet_constraints): | |
| 131 params['facet-%s-constraints' % k] = v | |
| 132 | |
| 133 if self.facet_sort: | |
| 134 for k, v in six.iteritems(self.facet_sort): | |
| 135 params['facet-%s-sort' % k] = v | |
| 136 | |
| 137 if self.facet_top_n: | |
| 138 for k, v in six.iteritems(self.facet_top_n): | |
| 139 params['facet-%s-top-n' % k] = v | |
| 140 | |
| 141 if self.t: | |
| 142 for k, v in six.iteritems(self.t): | |
| 143 params['t-%s' % k] = v | |
| 144 return params | |
| 145 | |
| 146 | |
| 147 class SearchConnection(object): | |
| 148 | |
| 149 def __init__(self, domain=None, endpoint=None): | |
| 150 self.domain = domain | |
| 151 self.endpoint = endpoint | |
| 152 if not endpoint: | |
| 153 self.endpoint = domain.search_service_endpoint | |
| 154 | |
| 155 def build_query(self, q=None, bq=None, rank=None, return_fields=None, | |
| 156 size=10, start=0, facet=None, facet_constraints=None, | |
| 157 facet_sort=None, facet_top_n=None, t=None): | |
| 158 return Query(q=q, bq=bq, rank=rank, return_fields=return_fields, | |
| 159 size=size, start=start, facet=facet, | |
| 160 facet_constraints=facet_constraints, | |
| 161 facet_sort=facet_sort, facet_top_n=facet_top_n, t=t) | |
| 162 | |
| 163 def search(self, q=None, bq=None, rank=None, return_fields=None, | |
| 164 size=10, start=0, facet=None, facet_constraints=None, | |
| 165 facet_sort=None, facet_top_n=None, t=None): | |
| 166 """ | |
| 167 Send a query to CloudSearch | |
| 168 | |
| 169 Each search query should use at least the q or bq argument to specify | |
| 170 the search parameter. The other options are used to specify the | |
| 171 criteria of the search. | |
| 172 | |
| 173 :type q: string | |
| 174 :param q: A string to search the default search fields for. | |
| 175 | |
| 176 :type bq: string | |
| 177 :param bq: A string to perform a Boolean search. This can be used to | |
| 178 create advanced searches. | |
| 179 | |
| 180 :type rank: List of strings | |
| 181 :param rank: A list of fields or rank expressions used to order the | |
| 182 search results. A field can be reversed by using the - operator. | |
| 183 ``['-year', 'author']`` | |
| 184 | |
| 185 :type return_fields: List of strings | |
| 186 :param return_fields: A list of fields which should be returned by the | |
| 187 search. If this field is not specified, only IDs will be returned. | |
| 188 ``['headline']`` | |
| 189 | |
| 190 :type size: int | |
| 191 :param size: Number of search results to specify | |
| 192 | |
| 193 :type start: int | |
| 194 :param start: Offset of the first search result to return (can be used | |
| 195 for paging) | |
| 196 | |
| 197 :type facet: list | |
| 198 :param facet: List of fields for which facets should be returned | |
| 199 ``['colour', 'size']`` | |
| 200 | |
| 201 :type facet_constraints: dict | |
| 202 :param facet_constraints: Use to limit facets to specific values | |
| 203 specified as comma-delimited strings in a Dictionary of facets | |
| 204 ``{'colour': "'blue','white','red'", 'size': "big"}`` | |
| 205 | |
| 206 :type facet_sort: dict | |
| 207 :param facet_sort: Rules used to specify the order in which facet | |
| 208 values should be returned. Allowed values are *alpha*, *count*, | |
| 209 *max*, *sum*. Use *alpha* to sort alphabetical, and *count* to sort | |
| 210 the facet by number of available result. | |
| 211 ``{'color': 'alpha', 'size': 'count'}`` | |
| 212 | |
| 213 :type facet_top_n: dict | |
| 214 :param facet_top_n: Dictionary of facets and number of facets to | |
| 215 return. | |
| 216 ``{'colour': 2}`` | |
| 217 | |
| 218 :type t: dict | |
| 219 :param t: Specify ranges for specific fields | |
| 220 ``{'year': '2000..2005'}`` | |
| 221 | |
| 222 :rtype: :class:`boto.cloudsearch.search.SearchResults` | |
| 223 :return: Returns the results of this search | |
| 224 | |
| 225 The following examples all assume we have indexed a set of documents | |
| 226 with fields: *author*, *date*, *headline* | |
| 227 | |
| 228 A simple search will look for documents whose default text search | |
| 229 fields will contain the search word exactly: | |
| 230 | |
| 231 >>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy) | |
| 232 | |
| 233 A simple search with more keywords will return documents whose default | |
| 234 text search fields contain the search strings together or separately. | |
| 235 | |
| 236 >>> search(q='Tim apple') # Will match "tim" and "apple" | |
| 237 | |
| 238 More complex searches require the boolean search operator. | |
| 239 | |
| 240 Wildcard searches can be used to search for any words that start with | |
| 241 the search string. | |
| 242 | |
| 243 >>> search(bq="'Tim*'") # Return documents with words like Tim or Timothy) | |
| 244 | |
| 245 Search terms can also be combined. Allowed operators are "and", "or", | |
| 246 "not", "field", "optional", "token", "phrase", or "filter" | |
| 247 | |
| 248 >>> search(bq="(and 'Tim' (field author 'John Smith'))") | |
| 249 | |
| 250 Facets allow you to show classification information about the search | |
| 251 results. For example, you can retrieve the authors who have written | |
| 252 about Tim: | |
| 253 | |
| 254 >>> search(q='Tim', facet=['Author']) | |
| 255 | |
| 256 With facet_constraints, facet_top_n and facet_sort more complicated | |
| 257 constraints can be specified such as returning the top author out of | |
| 258 John Smith and Mark Smith who have a document with the word Tim in it. | |
| 259 | |
| 260 >>> search(q='Tim', | |
| 261 ... facet=['Author'], | |
| 262 ... facet_constraints={'author': "'John Smith','Mark Smith'"}, | |
| 263 ... facet=['author'], | |
| 264 ... facet_top_n={'author': 1}, | |
| 265 ... facet_sort={'author': 'count'}) | |
| 266 """ | |
| 267 | |
| 268 query = self.build_query(q=q, bq=bq, rank=rank, | |
| 269 return_fields=return_fields, | |
| 270 size=size, start=start, facet=facet, | |
| 271 facet_constraints=facet_constraints, | |
| 272 facet_sort=facet_sort, | |
| 273 facet_top_n=facet_top_n, t=t) | |
| 274 return self(query) | |
| 275 | |
| 276 def __call__(self, query): | |
| 277 """Make a call to CloudSearch | |
| 278 | |
| 279 :type query: :class:`boto.cloudsearch.search.Query` | |
| 280 :param query: A group of search criteria | |
| 281 | |
| 282 :rtype: :class:`boto.cloudsearch.search.SearchResults` | |
| 283 :return: search results | |
| 284 """ | |
| 285 url = "http://%s/2011-02-01/search" % (self.endpoint) | |
| 286 params = query.to_params() | |
| 287 | |
| 288 r = requests.get(url, params=params) | |
| 289 body = r.content.decode('utf-8') | |
| 290 try: | |
| 291 data = json.loads(body) | |
| 292 except ValueError as e: | |
| 293 if r.status_code == 403: | |
| 294 msg = '' | |
| 295 import re | |
| 296 g = re.search('<html><body><h1>403 Forbidden</h1>([^<]+)<', body) | |
| 297 try: | |
| 298 msg = ': %s' % (g.groups()[0].strip()) | |
| 299 except AttributeError: | |
| 300 pass | |
| 301 raise SearchServiceException('Authentication error from Amazon%s' % msg) | |
| 302 raise SearchServiceException("Got non-json response from Amazon. %s" % body, query) | |
| 303 | |
| 304 if 'messages' in data and 'error' in data: | |
| 305 for m in data['messages']: | |
| 306 if m['severity'] == 'fatal': | |
| 307 raise SearchServiceException("Error processing search %s " | |
| 308 "=> %s" % (params, m['message']), query) | |
| 309 elif 'error' in data: | |
| 310 raise SearchServiceException("Unknown error processing search %s" | |
| 311 % json.dumps(data), query) | |
| 312 | |
| 313 data['query'] = query | |
| 314 data['search_service'] = self | |
| 315 | |
| 316 return SearchResults(**data) | |
| 317 | |
| 318 def get_all_paged(self, query, per_page): | |
| 319 """Get a generator to iterate over all pages of search results | |
| 320 | |
| 321 :type query: :class:`boto.cloudsearch.search.Query` | |
| 322 :param query: A group of search criteria | |
| 323 | |
| 324 :type per_page: int | |
| 325 :param per_page: Number of docs in each :class:`boto.cloudsearch.search.SearchResults` object. | |
| 326 | |
| 327 :rtype: generator | |
| 328 :return: Generator containing :class:`boto.cloudsearch.search.SearchResults` | |
| 329 """ | |
| 330 query.update_size(per_page) | |
| 331 page = 0 | |
| 332 num_pages_needed = 0 | |
| 333 while page <= num_pages_needed: | |
| 334 results = self(query) | |
| 335 num_pages_needed = results.num_pages_needed | |
| 336 yield results | |
| 337 query.start += query.real_size | |
| 338 page += 1 | |
| 339 | |
| 340 def get_all_hits(self, query): | |
| 341 """Get a generator to iterate over all search results | |
| 342 | |
| 343 Transparently handles the results paging from Cloudsearch | |
| 344 search results so even if you have many thousands of results | |
| 345 you can iterate over all results in a reasonably efficient | |
| 346 manner. | |
| 347 | |
| 348 :type query: :class:`boto.cloudsearch.search.Query` | |
| 349 :param query: A group of search criteria | |
| 350 | |
| 351 :rtype: generator | |
| 352 :return: All docs matching query | |
| 353 """ | |
| 354 page = 0 | |
| 355 num_pages_needed = 0 | |
| 356 while page <= num_pages_needed: | |
| 357 results = self(query) | |
| 358 num_pages_needed = results.num_pages_needed | |
| 359 for doc in results: | |
| 360 yield doc | |
| 361 query.start += query.real_size | |
| 362 page += 1 | |
| 363 | |
| 364 def get_num_hits(self, query): | |
| 365 """Return the total number of hits for query | |
| 366 | |
| 367 :type query: :class:`boto.cloudsearch.search.Query` | |
| 368 :param query: a group of search criteria | |
| 369 | |
| 370 :rtype: int | |
| 371 :return: Total number of hits for query | |
| 372 """ | |
| 373 query.update_size(1) | |
| 374 return self(query).hits | |
| 375 | |
| 376 | |
| 377 |
