Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/boto/cloudsearch2/document.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | |
2 # Copyright (c) 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved | |
3 # | |
4 # Permission is hereby granted, free of charge, to any person obtaining a | |
5 # copy of this software and associated documentation files (the | |
6 # "Software"), to deal in the Software without restriction, including | |
7 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
8 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
9 # persons to whom the Software is furnished to do so, subject to the fol- | |
10 # lowing conditions: | |
11 # | |
12 # The above copyright notice and this permission notice shall be included | |
13 # in all copies or substantial portions of the Software. | |
14 # | |
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 # IN THE SOFTWARE. | |
22 # | |
23 | |
24 import boto.exception | |
25 from boto.compat import json | |
26 import requests | |
27 import boto | |
28 from boto.cloudsearchdomain.layer1 import CloudSearchDomainConnection | |
29 | |
30 | |
31 class SearchServiceException(Exception): | |
32 pass | |
33 | |
34 | |
35 class CommitMismatchError(Exception): | |
36 # Let's do some extra work and let the user handle errors on his/her own. | |
37 | |
38 errors = None | |
39 | |
40 | |
41 class EncodingError(Exception): | |
42 """ | |
43 Content sent for Cloud Search indexing was incorrectly encoded. | |
44 | |
45 This usually happens when a document is marked as unicode but non-unicode | |
46 characters are present. | |
47 """ | |
48 pass | |
49 | |
50 | |
51 class ContentTooLongError(Exception): | |
52 """ | |
53 Content sent for Cloud Search indexing was too long | |
54 | |
55 This will usually happen when documents queued for indexing add up to more | |
56 than the limit allowed per upload batch (5MB) | |
57 | |
58 """ | |
59 pass | |
60 | |
61 | |
62 class DocumentServiceConnection(object): | |
63 """ | |
64 A CloudSearch document service. | |
65 | |
66 The DocumentServiceConection is used to add, remove and update documents in | |
67 CloudSearch. Commands are uploaded to CloudSearch in SDF (Search Document | |
68 Format). | |
69 | |
70 To generate an appropriate SDF, use :func:`add` to add or update documents, | |
71 as well as :func:`delete` to remove documents. | |
72 | |
73 Once the set of documents is ready to be index, use :func:`commit` to send | |
74 the commands to CloudSearch. | |
75 | |
76 If there are a lot of documents to index, it may be preferable to split the | |
77 generation of SDF data and the actual uploading into CloudSearch. Retrieve | |
78 the current SDF with :func:`get_sdf`. If this file is the uploaded into S3, | |
79 it can be retrieved back afterwards for upload into CloudSearch using | |
80 :func:`add_sdf_from_s3`. | |
81 | |
82 The SDF is not cleared after a :func:`commit`. If you wish to continue | |
83 using the DocumentServiceConnection for another batch upload of commands, | |
84 you will need to :func:`clear_sdf` first to stop the previous batch of | |
85 commands from being uploaded again. | |
86 | |
87 """ | |
88 | |
89 def __init__(self, domain=None, endpoint=None): | |
90 self.domain = domain | |
91 self.endpoint = endpoint | |
92 if not self.endpoint: | |
93 self.endpoint = domain.doc_service_endpoint | |
94 self.documents_batch = [] | |
95 self._sdf = None | |
96 | |
97 # Copy proxy settings from connection and check if request should be signed | |
98 self.proxy = {} | |
99 self.sign_request = False | |
100 if self.domain and self.domain.layer1: | |
101 if self.domain.layer1.use_proxy: | |
102 self.proxy = {'http': self.domain.layer1.get_proxy_url_with_auth()} | |
103 | |
104 self.sign_request = getattr(self.domain.layer1, 'sign_request', False) | |
105 | |
106 if self.sign_request: | |
107 # Create a domain connection to send signed requests | |
108 layer1 = self.domain.layer1 | |
109 self.domain_connection = CloudSearchDomainConnection( | |
110 host=self.endpoint, | |
111 aws_access_key_id=layer1.aws_access_key_id, | |
112 aws_secret_access_key=layer1.aws_secret_access_key, | |
113 region=layer1.region, | |
114 provider=layer1.provider | |
115 ) | |
116 | |
117 def add(self, _id, fields): | |
118 """ | |
119 Add a document to be processed by the DocumentService | |
120 | |
121 The document will not actually be added until :func:`commit` is called | |
122 | |
123 :type _id: string | |
124 :param _id: A unique ID used to refer to this document. | |
125 | |
126 :type fields: dict | |
127 :param fields: A dictionary of key-value pairs to be uploaded . | |
128 """ | |
129 | |
130 d = {'type': 'add', 'id': _id, 'fields': fields} | |
131 self.documents_batch.append(d) | |
132 | |
133 def delete(self, _id): | |
134 """ | |
135 Schedule a document to be removed from the CloudSearch service | |
136 | |
137 The document will not actually be scheduled for removal until | |
138 :func:`commit` is called | |
139 | |
140 :type _id: string | |
141 :param _id: The unique ID of this document. | |
142 """ | |
143 | |
144 d = {'type': 'delete', 'id': _id} | |
145 self.documents_batch.append(d) | |
146 | |
147 def get_sdf(self): | |
148 """ | |
149 Generate the working set of documents in Search Data Format (SDF) | |
150 | |
151 :rtype: string | |
152 :returns: JSON-formatted string of the documents in SDF | |
153 """ | |
154 | |
155 return self._sdf if self._sdf else json.dumps(self.documents_batch) | |
156 | |
157 def clear_sdf(self): | |
158 """ | |
159 Clear the working documents from this DocumentServiceConnection | |
160 | |
161 This should be used after :func:`commit` if the connection will be | |
162 reused for another set of documents. | |
163 """ | |
164 | |
165 self._sdf = None | |
166 self.documents_batch = [] | |
167 | |
168 def add_sdf_from_s3(self, key_obj): | |
169 """ | |
170 Load an SDF from S3 | |
171 | |
172 Using this method will result in documents added through | |
173 :func:`add` and :func:`delete` being ignored. | |
174 | |
175 :type key_obj: :class:`boto.s3.key.Key` | |
176 :param key_obj: An S3 key which contains an SDF | |
177 """ | |
178 #@todo:: (lucas) would be nice if this could just take an s3://uri..." | |
179 | |
180 self._sdf = key_obj.get_contents_as_string() | |
181 | |
182 def _commit_with_auth(self, sdf, api_version): | |
183 return self.domain_connection.upload_documents(sdf, 'application/json') | |
184 | |
185 def _commit_without_auth(self, sdf, api_version): | |
186 url = "http://%s/%s/documents/batch" % (self.endpoint, api_version) | |
187 | |
188 # Keep-alive is automatic in a post-1.0 requests world. | |
189 session = requests.Session() | |
190 session.proxies = self.proxy | |
191 adapter = requests.adapters.HTTPAdapter( | |
192 pool_connections=20, | |
193 pool_maxsize=50, | |
194 max_retries=5 | |
195 ) | |
196 session.mount('http://', adapter) | |
197 session.mount('https://', adapter) | |
198 | |
199 resp = session.post(url, data=sdf, headers={'Content-Type': 'application/json'}) | |
200 return resp | |
201 | |
202 def commit(self): | |
203 """ | |
204 Actually send an SDF to CloudSearch for processing | |
205 | |
206 If an SDF file has been explicitly loaded it will be used. Otherwise, | |
207 documents added through :func:`add` and :func:`delete` will be used. | |
208 | |
209 :rtype: :class:`CommitResponse` | |
210 :returns: A summary of documents added and deleted | |
211 """ | |
212 | |
213 sdf = self.get_sdf() | |
214 | |
215 if ': null' in sdf: | |
216 boto.log.error('null value in sdf detected. This will probably ' | |
217 'raise 500 error.') | |
218 index = sdf.index(': null') | |
219 boto.log.error(sdf[index - 100:index + 100]) | |
220 | |
221 api_version = '2013-01-01' | |
222 if self.domain and self.domain.layer1: | |
223 api_version = self.domain.layer1.APIVersion | |
224 | |
225 if self.sign_request: | |
226 r = self._commit_with_auth(sdf, api_version) | |
227 else: | |
228 r = self._commit_without_auth(sdf, api_version) | |
229 | |
230 return CommitResponse(r, self, sdf, signed_request=self.sign_request) | |
231 | |
232 | |
233 class CommitResponse(object): | |
234 """Wrapper for response to Cloudsearch document batch commit. | |
235 | |
236 :type response: :class:`requests.models.Response` | |
237 :param response: Response from Cloudsearch /documents/batch API | |
238 | |
239 :type doc_service: :class:`boto.cloudsearch2.document.DocumentServiceConnection` | |
240 :param doc_service: Object containing the documents posted and methods to | |
241 retry | |
242 | |
243 :raises: :class:`boto.exception.BotoServerError` | |
244 :raises: :class:`boto.cloudsearch2.document.SearchServiceException` | |
245 :raises: :class:`boto.cloudsearch2.document.EncodingError` | |
246 :raises: :class:`boto.cloudsearch2.document.ContentTooLongError` | |
247 """ | |
248 def __init__(self, response, doc_service, sdf, signed_request=False): | |
249 self.response = response | |
250 self.doc_service = doc_service | |
251 self.sdf = sdf | |
252 self.signed_request = signed_request | |
253 | |
254 if self.signed_request: | |
255 self.content = response | |
256 else: | |
257 _body = response.content.decode('utf-8') | |
258 | |
259 try: | |
260 self.content = json.loads(_body) | |
261 except: | |
262 boto.log.error('Error indexing documents.\nResponse Content:\n{0}' | |
263 '\n\nSDF:\n{1}'.format(_body, self.sdf)) | |
264 raise boto.exception.BotoServerError(self.response.status_code, '', | |
265 body=_body) | |
266 | |
267 self.status = self.content['status'] | |
268 if self.status == 'error': | |
269 self.errors = [e.get('message') for e in self.content.get('errors', | |
270 [])] | |
271 for e in self.errors: | |
272 if "Illegal Unicode character" in e: | |
273 raise EncodingError("Illegal Unicode character in document") | |
274 elif e == "The Content-Length is too long": | |
275 raise ContentTooLongError("Content was too long") | |
276 else: | |
277 self.errors = [] | |
278 | |
279 self.adds = self.content['adds'] | |
280 self.deletes = self.content['deletes'] | |
281 self._check_num_ops('add', self.adds) | |
282 self._check_num_ops('delete', self.deletes) | |
283 | |
284 def _check_num_ops(self, type_, response_num): | |
285 """Raise exception if number of ops in response doesn't match commit | |
286 | |
287 :type type_: str | |
288 :param type_: Type of commit operation: 'add' or 'delete' | |
289 | |
290 :type response_num: int | |
291 :param response_num: Number of adds or deletes in the response. | |
292 | |
293 :raises: :class:`boto.cloudsearch2.document.CommitMismatchError` | |
294 """ | |
295 commit_num = len([d for d in self.doc_service.documents_batch | |
296 if d['type'] == type_]) | |
297 | |
298 if response_num != commit_num: | |
299 if self.signed_request: | |
300 boto.log.debug(self.response) | |
301 else: | |
302 boto.log.debug(self.response.content) | |
303 # There will always be a commit mismatch error if there is any | |
304 # errors on cloudsearch. self.errors gets lost when this | |
305 # CommitMismatchError is raised. Whoever is using boto has no idea | |
306 # why their commit failed. They can't even notify the user of the | |
307 # cause by parsing the error messages from amazon. So let's | |
308 # attach the self.errors to the exceptions if we already spent | |
309 # time and effort collecting them out of the response. | |
310 exc = CommitMismatchError( | |
311 'Incorrect number of {0}s returned. Commit: {1} Response: {2}' | |
312 .format(type_, commit_num, response_num) | |
313 ) | |
314 exc.errors = self.errors | |
315 raise exc |