Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 |
parents | 26e78fe6e8c4 |
children |
comparison
equal
deleted
inserted
replaced
1:75ca89e9b81c | 2:6af9afd405e9 |
---|---|
1 # $Id: frontmatter.py 8389 2019-09-11 11:39:13Z milde $ | |
2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 Transforms related to the front matter of a document or a section | |
7 (information found before the main text): | |
8 | |
9 - `DocTitle`: Used to transform a lone top level section's title to | |
10 the document title, promote a remaining lone top-level section's | |
11 title to the document subtitle, and determine the document's title | |
12 metadata (document['title']) based on the document title and/or the | |
13 "title" setting. | |
14 | |
15 - `SectionSubTitle`: Used to transform a lone subsection into a | |
16 subtitle. | |
17 | |
18 - `DocInfo`: Used to transform a bibliographic field list into docinfo | |
19 elements. | |
20 """ | |
21 | |
22 __docformat__ = 'reStructuredText' | |
23 | |
24 import re | |
25 import sys | |
26 | |
27 from docutils import nodes, utils | |
28 from docutils.transforms import TransformError, Transform | |
29 | |
30 | |
31 if sys.version_info >= (3, 0): | |
32 unicode = str # noqa | |
33 | |
34 | |
35 class TitlePromoter(Transform): | |
36 | |
37 """ | |
38 Abstract base class for DocTitle and SectionSubTitle transforms. | |
39 """ | |
40 | |
41 def promote_title(self, node): | |
42 """ | |
43 Transform the following tree:: | |
44 | |
45 <node> | |
46 <section> | |
47 <title> | |
48 ... | |
49 | |
50 into :: | |
51 | |
52 <node> | |
53 <title> | |
54 ... | |
55 | |
56 `node` is normally a document. | |
57 """ | |
58 # Type check | |
59 if not isinstance(node, nodes.Element): | |
60 raise TypeError('node must be of Element-derived type.') | |
61 | |
62 # `node` must not have a title yet. | |
63 assert not (len(node) and isinstance(node[0], nodes.title)) | |
64 section, index = self.candidate_index(node) | |
65 if index is None: | |
66 return False | |
67 | |
68 # Transfer the section's attributes to the node: | |
69 # NOTE: Change `replace` to False to NOT replace attributes that | |
70 # already exist in node with those in section. | |
71 # NOTE: Remove `and_source` to NOT copy the 'source' | |
72 # attribute from section | |
73 node.update_all_atts_concatenating(section, replace=True, and_source=True) | |
74 | |
75 # setup_child is called automatically for all nodes. | |
76 node[:] = (section[:1] # section title | |
77 + node[:index] # everything that was in the | |
78 # node before the section | |
79 + section[1:]) # everything that was in the section | |
80 assert isinstance(node[0], nodes.title) | |
81 return True | |
82 | |
83 def promote_subtitle(self, node): | |
84 """ | |
85 Transform the following node tree:: | |
86 | |
87 <node> | |
88 <title> | |
89 <section> | |
90 <title> | |
91 ... | |
92 | |
93 into :: | |
94 | |
95 <node> | |
96 <title> | |
97 <subtitle> | |
98 ... | |
99 """ | |
100 # Type check | |
101 if not isinstance(node, nodes.Element): | |
102 raise TypeError('node must be of Element-derived type.') | |
103 | |
104 subsection, index = self.candidate_index(node) | |
105 if index is None: | |
106 return False | |
107 subtitle = nodes.subtitle() | |
108 | |
109 # Transfer the subsection's attributes to the new subtitle | |
110 # NOTE: Change `replace` to False to NOT replace attributes | |
111 # that already exist in node with those in section. | |
112 # NOTE: Remove `and_source` to NOT copy the 'source' | |
113 # attribute from section. | |
114 subtitle.update_all_atts_concatenating(subsection, replace=True, and_source=True) | |
115 | |
116 # Transfer the contents of the subsection's title to the | |
117 # subtitle: | |
118 subtitle[:] = subsection[0][:] | |
119 node[:] = (node[:1] # title | |
120 + [subtitle] | |
121 # everything that was before the section: | |
122 + node[1:index] | |
123 # everything that was in the subsection: | |
124 + subsection[1:]) | |
125 return True | |
126 | |
127 def candidate_index(self, node): | |
128 """ | |
129 Find and return the promotion candidate and its index. | |
130 | |
131 Return (None, None) if no valid candidate was found. | |
132 """ | |
133 index = node.first_child_not_matching_class( | |
134 nodes.PreBibliographic) | |
135 if (index is None or len(node) > (index + 1) | |
136 or not isinstance(node[index], nodes.section)): | |
137 return None, None | |
138 else: | |
139 return node[index], index | |
140 | |
141 | |
142 class DocTitle(TitlePromoter): | |
143 | |
144 """ | |
145 In reStructuredText_, there is no way to specify a document title | |
146 and subtitle explicitly. Instead, we can supply the document title | |
147 (and possibly the subtitle as well) implicitly, and use this | |
148 two-step transform to "raise" or "promote" the title(s) (and their | |
149 corresponding section contents) to the document level. | |
150 | |
151 1. If the document contains a single top-level section as its | |
152 first non-comment element, the top-level section's title | |
153 becomes the document's title, and the top-level section's | |
154 contents become the document's immediate contents. The lone | |
155 top-level section header must be the first non-comment element | |
156 in the document. | |
157 | |
158 For example, take this input text:: | |
159 | |
160 ================= | |
161 Top-Level Title | |
162 ================= | |
163 | |
164 A paragraph. | |
165 | |
166 Once parsed, it looks like this:: | |
167 | |
168 <document> | |
169 <section names="top-level title"> | |
170 <title> | |
171 Top-Level Title | |
172 <paragraph> | |
173 A paragraph. | |
174 | |
175 After running the DocTitle transform, we have:: | |
176 | |
177 <document names="top-level title"> | |
178 <title> | |
179 Top-Level Title | |
180 <paragraph> | |
181 A paragraph. | |
182 | |
183 2. If step 1 successfully determines the document title, we | |
184 continue by checking for a subtitle. | |
185 | |
186 If the lone top-level section itself contains a single | |
187 second-level section as its first non-comment element, that | |
188 section's title is promoted to the document's subtitle, and | |
189 that section's contents become the document's immediate | |
190 contents. Given this input text:: | |
191 | |
192 ================= | |
193 Top-Level Title | |
194 ================= | |
195 | |
196 Second-Level Title | |
197 ~~~~~~~~~~~~~~~~~~ | |
198 | |
199 A paragraph. | |
200 | |
201 After parsing and running the Section Promotion transform, the | |
202 result is:: | |
203 | |
204 <document names="top-level title"> | |
205 <title> | |
206 Top-Level Title | |
207 <subtitle names="second-level title"> | |
208 Second-Level Title | |
209 <paragraph> | |
210 A paragraph. | |
211 | |
212 (Note that the implicit hyperlink target generated by the | |
213 "Second-Level Title" is preserved on the "subtitle" element | |
214 itself.) | |
215 | |
216 Any comment elements occurring before the document title or | |
217 subtitle are accumulated and inserted as the first body elements | |
218 after the title(s). | |
219 | |
220 This transform also sets the document's metadata title | |
221 (document['title']). | |
222 | |
223 .. _reStructuredText: http://docutils.sf.net/rst.html | |
224 """ | |
225 | |
226 default_priority = 320 | |
227 | |
228 def set_metadata(self): | |
229 """ | |
230 Set document['title'] metadata title from the following | |
231 sources, listed in order of priority: | |
232 | |
233 * Existing document['title'] attribute. | |
234 * "title" setting. | |
235 * Document title node (as promoted by promote_title). | |
236 """ | |
237 if not self.document.hasattr('title'): | |
238 if self.document.settings.title is not None: | |
239 self.document['title'] = self.document.settings.title | |
240 elif len(self.document) and isinstance(self.document[0], nodes.title): | |
241 self.document['title'] = self.document[0].astext() | |
242 | |
243 def apply(self): | |
244 if getattr(self.document.settings, 'doctitle_xform', 1): | |
245 # promote_(sub)title defined in TitlePromoter base class. | |
246 if self.promote_title(self.document): | |
247 # If a title has been promoted, also try to promote a | |
248 # subtitle. | |
249 self.promote_subtitle(self.document) | |
250 # Set document['title']. | |
251 self.set_metadata() | |
252 | |
253 | |
254 class SectionSubTitle(TitlePromoter): | |
255 | |
256 """ | |
257 This works like document subtitles, but for sections. For example, :: | |
258 | |
259 <section> | |
260 <title> | |
261 Title | |
262 <section> | |
263 <title> | |
264 Subtitle | |
265 ... | |
266 | |
267 is transformed into :: | |
268 | |
269 <section> | |
270 <title> | |
271 Title | |
272 <subtitle> | |
273 Subtitle | |
274 ... | |
275 | |
276 For details refer to the docstring of DocTitle. | |
277 """ | |
278 | |
279 default_priority = 350 | |
280 | |
281 def apply(self): | |
282 if not getattr(self.document.settings, 'sectsubtitle_xform', 1): | |
283 return | |
284 for section in self.document._traverse(nodes.section): | |
285 # On our way through the node tree, we are modifying it | |
286 # but only the not-yet-visited part, so that the iterator | |
287 # returned by _traverse() is not corrupted. | |
288 self.promote_subtitle(section) | |
289 | |
290 | |
291 class DocInfo(Transform): | |
292 | |
293 """ | |
294 This transform is specific to the reStructuredText_ markup syntax; | |
295 see "Bibliographic Fields" in the `reStructuredText Markup | |
296 Specification`_ for a high-level description. This transform | |
297 should be run *after* the `DocTitle` transform. | |
298 | |
299 Given a field list as the first non-comment element after the | |
300 document title and subtitle (if present), registered bibliographic | |
301 field names are transformed to the corresponding DTD elements, | |
302 becoming child elements of the "docinfo" element (except for a | |
303 dedication and/or an abstract, which become "topic" elements after | |
304 "docinfo"). | |
305 | |
306 For example, given this document fragment after parsing:: | |
307 | |
308 <document> | |
309 <title> | |
310 Document Title | |
311 <field_list> | |
312 <field> | |
313 <field_name> | |
314 Author | |
315 <field_body> | |
316 <paragraph> | |
317 A. Name | |
318 <field> | |
319 <field_name> | |
320 Status | |
321 <field_body> | |
322 <paragraph> | |
323 $RCSfile$ | |
324 ... | |
325 | |
326 After running the bibliographic field list transform, the | |
327 resulting document tree would look like this:: | |
328 | |
329 <document> | |
330 <title> | |
331 Document Title | |
332 <docinfo> | |
333 <author> | |
334 A. Name | |
335 <status> | |
336 frontmatter.py | |
337 ... | |
338 | |
339 The "Status" field contained an expanded RCS keyword, which is | |
340 normally (but optionally) cleaned up by the transform. The sole | |
341 contents of the field body must be a paragraph containing an | |
342 expanded RCS keyword of the form "$keyword: expansion text $". Any | |
343 RCS keyword can be processed in any bibliographic field. The | |
344 dollar signs and leading RCS keyword name are removed. Extra | |
345 processing is done for the following RCS keywords: | |
346 | |
347 - "RCSfile" expands to the name of the file in the RCS or CVS | |
348 repository, which is the name of the source file with a ",v" | |
349 suffix appended. The transform will remove the ",v" suffix. | |
350 | |
351 - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC | |
352 time zone). The RCS Keywords transform will extract just the | |
353 date itself and transform it to an ISO 8601 format date, as in | |
354 "2000-12-31". | |
355 | |
356 (Since the source file for this text is itself stored under CVS, | |
357 we can't show an example of the "Date" RCS keyword because we | |
358 can't prevent any RCS keywords used in this explanation from | |
359 being expanded. Only the "RCSfile" keyword is stable; its | |
360 expansion text changes only if the file name changes.) | |
361 | |
362 .. _reStructuredText: http://docutils.sf.net/rst.html | |
363 .. _reStructuredText Markup Specification: | |
364 http://docutils.sf.net/docs/ref/rst/restructuredtext.html | |
365 """ | |
366 | |
367 default_priority = 340 | |
368 | |
369 biblio_nodes = { | |
370 'author': nodes.author, | |
371 'authors': nodes.authors, | |
372 'organization': nodes.organization, | |
373 'address': nodes.address, | |
374 'contact': nodes.contact, | |
375 'version': nodes.version, | |
376 'revision': nodes.revision, | |
377 'status': nodes.status, | |
378 'date': nodes.date, | |
379 'copyright': nodes.copyright, | |
380 'dedication': nodes.topic, | |
381 'abstract': nodes.topic} | |
382 """Canonical field name (lowcased) to node class name mapping for | |
383 bibliographic fields (field_list).""" | |
384 | |
385 def apply(self): | |
386 if not getattr(self.document.settings, 'docinfo_xform', 1): | |
387 return | |
388 document = self.document | |
389 index = document.first_child_not_matching_class( | |
390 nodes.PreBibliographic) | |
391 if index is None: | |
392 return | |
393 candidate = document[index] | |
394 if isinstance(candidate, nodes.field_list): | |
395 biblioindex = document.first_child_not_matching_class( | |
396 (nodes.Titular, nodes.Decorative)) | |
397 nodelist = self.extract_bibliographic(candidate) | |
398 del document[index] # untransformed field list (candidate) | |
399 document[biblioindex:biblioindex] = nodelist | |
400 | |
401 def extract_bibliographic(self, field_list): | |
402 docinfo = nodes.docinfo() | |
403 bibliofields = self.language.bibliographic_fields | |
404 labels = self.language.labels | |
405 topics = {'dedication': None, 'abstract': None} | |
406 for field in field_list: | |
407 try: | |
408 name = field[0][0].astext() | |
409 normedname = nodes.fully_normalize_name(name) | |
410 if not (len(field) == 2 and normedname in bibliofields | |
411 and self.check_empty_biblio_field(field, name)): | |
412 raise TransformError | |
413 canonical = bibliofields[normedname] | |
414 biblioclass = self.biblio_nodes[canonical] | |
415 if issubclass(biblioclass, nodes.TextElement): | |
416 if not self.check_compound_biblio_field(field, name): | |
417 raise TransformError | |
418 utils.clean_rcs_keywords( | |
419 field[1][0], self.rcs_keyword_substitutions) | |
420 docinfo.append(biblioclass('', '', *field[1][0])) | |
421 elif issubclass(biblioclass, nodes.authors): | |
422 self.extract_authors(field, name, docinfo) | |
423 elif issubclass(biblioclass, nodes.topic): | |
424 if topics[canonical]: | |
425 field[-1] += self.document.reporter.warning( | |
426 'There can only be one "%s" field.' % name, | |
427 base_node=field) | |
428 raise TransformError | |
429 title = nodes.title(name, labels[canonical]) | |
430 title[0].rawsource = labels[canonical] | |
431 topics[canonical] = biblioclass( | |
432 '', title, classes=[canonical], *field[1].children) | |
433 else: | |
434 docinfo.append(biblioclass('', *field[1].children)) | |
435 except TransformError: | |
436 if len(field[-1]) == 1 \ | |
437 and isinstance(field[-1][0], nodes.paragraph): | |
438 utils.clean_rcs_keywords( | |
439 field[-1][0], self.rcs_keyword_substitutions) | |
440 # if normedname not in bibliofields: | |
441 classvalue = nodes.make_id(normedname) | |
442 if classvalue: | |
443 field['classes'].append(classvalue) | |
444 docinfo.append(field) | |
445 nodelist = [] | |
446 if len(docinfo) != 0: | |
447 nodelist.append(docinfo) | |
448 for name in ('dedication', 'abstract'): | |
449 if topics[name]: | |
450 nodelist.append(topics[name]) | |
451 return nodelist | |
452 | |
453 def check_empty_biblio_field(self, field, name): | |
454 if len(field[-1]) < 1: | |
455 field[-1] += self.document.reporter.warning( | |
456 'Cannot extract empty bibliographic field "%s".' % name, | |
457 base_node=field) | |
458 return None | |
459 return 1 | |
460 | |
461 def check_compound_biblio_field(self, field, name): | |
462 if len(field[-1]) > 1: | |
463 field[-1] += self.document.reporter.warning( | |
464 'Cannot extract compound bibliographic field "%s".' % name, | |
465 base_node=field) | |
466 return None | |
467 if not isinstance(field[-1][0], nodes.paragraph): | |
468 field[-1] += self.document.reporter.warning( | |
469 'Cannot extract bibliographic field "%s" containing ' | |
470 'anything other than a single paragraph.' % name, | |
471 base_node=field) | |
472 return None | |
473 return 1 | |
474 | |
475 rcs_keyword_substitutions = [ | |
476 (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' | |
477 r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), | |
478 (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), | |
479 (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] | |
480 | |
481 def extract_authors(self, field, name, docinfo): | |
482 try: | |
483 if len(field[1]) == 1: | |
484 if isinstance(field[1][0], nodes.paragraph): | |
485 authors = self.authors_from_one_paragraph(field) | |
486 elif isinstance(field[1][0], nodes.bullet_list): | |
487 authors = self.authors_from_bullet_list(field) | |
488 else: | |
489 raise TransformError | |
490 else: | |
491 authors = self.authors_from_paragraphs(field) | |
492 authornodes = [nodes.author('', '', *author) | |
493 for author in authors if author] | |
494 if len(authornodes) >= 1: | |
495 docinfo.append(nodes.authors('', *authornodes)) | |
496 else: | |
497 raise TransformError | |
498 except TransformError: | |
499 field[-1] += self.document.reporter.warning( | |
500 'Bibliographic field "%s" incompatible with extraction: ' | |
501 'it must contain either a single paragraph (with authors ' | |
502 'separated by one of "%s"), multiple paragraphs (one per ' | |
503 'author), or a bullet list with one paragraph (one author) ' | |
504 'per item.' | |
505 % (name, ''.join(self.language.author_separators)), | |
506 base_node=field) | |
507 raise | |
508 | |
509 def authors_from_one_paragraph(self, field): | |
510 """Return list of Text nodes for authornames. | |
511 | |
512 The set of separators is locale dependent (default: ";"- or ","). | |
513 """ | |
514 # @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``) | |
515 text = ''.join(unicode(node) | |
516 for node in field[1].traverse(nodes.Text)) | |
517 if not text: | |
518 raise TransformError | |
519 for authorsep in self.language.author_separators: | |
520 # don't split at escaped `authorsep`: | |
521 pattern = '(?<!\x00)%s' % authorsep | |
522 authornames = re.split(pattern, text) | |
523 if len(authornames) > 1: | |
524 break | |
525 authornames = (name.strip() for name in authornames) | |
526 authors = [[nodes.Text(name, utils.unescape(name, True))] | |
527 for name in authornames if name] | |
528 return authors | |
529 | |
530 def authors_from_bullet_list(self, field): | |
531 authors = [] | |
532 for item in field[1][0]: | |
533 if isinstance(item, nodes.comment): | |
534 continue | |
535 if len(item) != 1 or not isinstance(item[0], nodes.paragraph): | |
536 raise TransformError | |
537 authors.append(item[0].children) | |
538 if not authors: | |
539 raise TransformError | |
540 return authors | |
541 | |
542 def authors_from_paragraphs(self, field): | |
543 for item in field[1]: | |
544 if not isinstance(item, (nodes.paragraph, nodes.comment)): | |
545 raise TransformError | |
546 authors = [item.children for item in field[1] | |
547 if not isinstance(item, nodes.comment)] | |
548 return authors |