comparison env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # $Id: frontmatter.py 8389 2019-09-11 11:39:13Z milde $
2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
4
5 """
6 Transforms related to the front matter of a document or a section
7 (information found before the main text):
8
9 - `DocTitle`: Used to transform a lone top level section's title to
10 the document title, promote a remaining lone top-level section's
11 title to the document subtitle, and determine the document's title
12 metadata (document['title']) based on the document title and/or the
13 "title" setting.
14
15 - `SectionSubTitle`: Used to transform a lone subsection into a
16 subtitle.
17
18 - `DocInfo`: Used to transform a bibliographic field list into docinfo
19 elements.
20 """
21
22 __docformat__ = 'reStructuredText'
23
24 import re
25 import sys
26
27 from docutils import nodes, utils
28 from docutils.transforms import TransformError, Transform
29
30
31 if sys.version_info >= (3, 0):
32 unicode = str # noqa
33
34
35 class TitlePromoter(Transform):
36
37 """
38 Abstract base class for DocTitle and SectionSubTitle transforms.
39 """
40
41 def promote_title(self, node):
42 """
43 Transform the following tree::
44
45 <node>
46 <section>
47 <title>
48 ...
49
50 into ::
51
52 <node>
53 <title>
54 ...
55
56 `node` is normally a document.
57 """
58 # Type check
59 if not isinstance(node, nodes.Element):
60 raise TypeError('node must be of Element-derived type.')
61
62 # `node` must not have a title yet.
63 assert not (len(node) and isinstance(node[0], nodes.title))
64 section, index = self.candidate_index(node)
65 if index is None:
66 return False
67
68 # Transfer the section's attributes to the node:
69 # NOTE: Change `replace` to False to NOT replace attributes that
70 # already exist in node with those in section.
71 # NOTE: Remove `and_source` to NOT copy the 'source'
72 # attribute from section
73 node.update_all_atts_concatenating(section, replace=True, and_source=True)
74
75 # setup_child is called automatically for all nodes.
76 node[:] = (section[:1] # section title
77 + node[:index] # everything that was in the
78 # node before the section
79 + section[1:]) # everything that was in the section
80 assert isinstance(node[0], nodes.title)
81 return True
82
83 def promote_subtitle(self, node):
84 """
85 Transform the following node tree::
86
87 <node>
88 <title>
89 <section>
90 <title>
91 ...
92
93 into ::
94
95 <node>
96 <title>
97 <subtitle>
98 ...
99 """
100 # Type check
101 if not isinstance(node, nodes.Element):
102 raise TypeError('node must be of Element-derived type.')
103
104 subsection, index = self.candidate_index(node)
105 if index is None:
106 return False
107 subtitle = nodes.subtitle()
108
109 # Transfer the subsection's attributes to the new subtitle
110 # NOTE: Change `replace` to False to NOT replace attributes
111 # that already exist in node with those in section.
112 # NOTE: Remove `and_source` to NOT copy the 'source'
113 # attribute from section.
114 subtitle.update_all_atts_concatenating(subsection, replace=True, and_source=True)
115
116 # Transfer the contents of the subsection's title to the
117 # subtitle:
118 subtitle[:] = subsection[0][:]
119 node[:] = (node[:1] # title
120 + [subtitle]
121 # everything that was before the section:
122 + node[1:index]
123 # everything that was in the subsection:
124 + subsection[1:])
125 return True
126
127 def candidate_index(self, node):
128 """
129 Find and return the promotion candidate and its index.
130
131 Return (None, None) if no valid candidate was found.
132 """
133 index = node.first_child_not_matching_class(
134 nodes.PreBibliographic)
135 if (index is None or len(node) > (index + 1)
136 or not isinstance(node[index], nodes.section)):
137 return None, None
138 else:
139 return node[index], index
140
141
142 class DocTitle(TitlePromoter):
143
144 """
145 In reStructuredText_, there is no way to specify a document title
146 and subtitle explicitly. Instead, we can supply the document title
147 (and possibly the subtitle as well) implicitly, and use this
148 two-step transform to "raise" or "promote" the title(s) (and their
149 corresponding section contents) to the document level.
150
151 1. If the document contains a single top-level section as its
152 first non-comment element, the top-level section's title
153 becomes the document's title, and the top-level section's
154 contents become the document's immediate contents. The lone
155 top-level section header must be the first non-comment element
156 in the document.
157
158 For example, take this input text::
159
160 =================
161 Top-Level Title
162 =================
163
164 A paragraph.
165
166 Once parsed, it looks like this::
167
168 <document>
169 <section names="top-level title">
170 <title>
171 Top-Level Title
172 <paragraph>
173 A paragraph.
174
175 After running the DocTitle transform, we have::
176
177 <document names="top-level title">
178 <title>
179 Top-Level Title
180 <paragraph>
181 A paragraph.
182
183 2. If step 1 successfully determines the document title, we
184 continue by checking for a subtitle.
185
186 If the lone top-level section itself contains a single
187 second-level section as its first non-comment element, that
188 section's title is promoted to the document's subtitle, and
189 that section's contents become the document's immediate
190 contents. Given this input text::
191
192 =================
193 Top-Level Title
194 =================
195
196 Second-Level Title
197 ~~~~~~~~~~~~~~~~~~
198
199 A paragraph.
200
201 After parsing and running the Section Promotion transform, the
202 result is::
203
204 <document names="top-level title">
205 <title>
206 Top-Level Title
207 <subtitle names="second-level title">
208 Second-Level Title
209 <paragraph>
210 A paragraph.
211
212 (Note that the implicit hyperlink target generated by the
213 "Second-Level Title" is preserved on the "subtitle" element
214 itself.)
215
216 Any comment elements occurring before the document title or
217 subtitle are accumulated and inserted as the first body elements
218 after the title(s).
219
220 This transform also sets the document's metadata title
221 (document['title']).
222
223 .. _reStructuredText: http://docutils.sf.net/rst.html
224 """
225
226 default_priority = 320
227
228 def set_metadata(self):
229 """
230 Set document['title'] metadata title from the following
231 sources, listed in order of priority:
232
233 * Existing document['title'] attribute.
234 * "title" setting.
235 * Document title node (as promoted by promote_title).
236 """
237 if not self.document.hasattr('title'):
238 if self.document.settings.title is not None:
239 self.document['title'] = self.document.settings.title
240 elif len(self.document) and isinstance(self.document[0], nodes.title):
241 self.document['title'] = self.document[0].astext()
242
243 def apply(self):
244 if getattr(self.document.settings, 'doctitle_xform', 1):
245 # promote_(sub)title defined in TitlePromoter base class.
246 if self.promote_title(self.document):
247 # If a title has been promoted, also try to promote a
248 # subtitle.
249 self.promote_subtitle(self.document)
250 # Set document['title'].
251 self.set_metadata()
252
253
254 class SectionSubTitle(TitlePromoter):
255
256 """
257 This works like document subtitles, but for sections. For example, ::
258
259 <section>
260 <title>
261 Title
262 <section>
263 <title>
264 Subtitle
265 ...
266
267 is transformed into ::
268
269 <section>
270 <title>
271 Title
272 <subtitle>
273 Subtitle
274 ...
275
276 For details refer to the docstring of DocTitle.
277 """
278
279 default_priority = 350
280
281 def apply(self):
282 if not getattr(self.document.settings, 'sectsubtitle_xform', 1):
283 return
284 for section in self.document._traverse(nodes.section):
285 # On our way through the node tree, we are modifying it
286 # but only the not-yet-visited part, so that the iterator
287 # returned by _traverse() is not corrupted.
288 self.promote_subtitle(section)
289
290
291 class DocInfo(Transform):
292
293 """
294 This transform is specific to the reStructuredText_ markup syntax;
295 see "Bibliographic Fields" in the `reStructuredText Markup
296 Specification`_ for a high-level description. This transform
297 should be run *after* the `DocTitle` transform.
298
299 Given a field list as the first non-comment element after the
300 document title and subtitle (if present), registered bibliographic
301 field names are transformed to the corresponding DTD elements,
302 becoming child elements of the "docinfo" element (except for a
303 dedication and/or an abstract, which become "topic" elements after
304 "docinfo").
305
306 For example, given this document fragment after parsing::
307
308 <document>
309 <title>
310 Document Title
311 <field_list>
312 <field>
313 <field_name>
314 Author
315 <field_body>
316 <paragraph>
317 A. Name
318 <field>
319 <field_name>
320 Status
321 <field_body>
322 <paragraph>
323 $RCSfile$
324 ...
325
326 After running the bibliographic field list transform, the
327 resulting document tree would look like this::
328
329 <document>
330 <title>
331 Document Title
332 <docinfo>
333 <author>
334 A. Name
335 <status>
336 frontmatter.py
337 ...
338
339 The "Status" field contained an expanded RCS keyword, which is
340 normally (but optionally) cleaned up by the transform. The sole
341 contents of the field body must be a paragraph containing an
342 expanded RCS keyword of the form "$keyword: expansion text $". Any
343 RCS keyword can be processed in any bibliographic field. The
344 dollar signs and leading RCS keyword name are removed. Extra
345 processing is done for the following RCS keywords:
346
347 - "RCSfile" expands to the name of the file in the RCS or CVS
348 repository, which is the name of the source file with a ",v"
349 suffix appended. The transform will remove the ",v" suffix.
350
351 - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC
352 time zone). The RCS Keywords transform will extract just the
353 date itself and transform it to an ISO 8601 format date, as in
354 "2000-12-31".
355
356 (Since the source file for this text is itself stored under CVS,
357 we can't show an example of the "Date" RCS keyword because we
358 can't prevent any RCS keywords used in this explanation from
359 being expanded. Only the "RCSfile" keyword is stable; its
360 expansion text changes only if the file name changes.)
361
362 .. _reStructuredText: http://docutils.sf.net/rst.html
363 .. _reStructuredText Markup Specification:
364 http://docutils.sf.net/docs/ref/rst/restructuredtext.html
365 """
366
367 default_priority = 340
368
369 biblio_nodes = {
370 'author': nodes.author,
371 'authors': nodes.authors,
372 'organization': nodes.organization,
373 'address': nodes.address,
374 'contact': nodes.contact,
375 'version': nodes.version,
376 'revision': nodes.revision,
377 'status': nodes.status,
378 'date': nodes.date,
379 'copyright': nodes.copyright,
380 'dedication': nodes.topic,
381 'abstract': nodes.topic}
382 """Canonical field name (lowcased) to node class name mapping for
383 bibliographic fields (field_list)."""
384
385 def apply(self):
386 if not getattr(self.document.settings, 'docinfo_xform', 1):
387 return
388 document = self.document
389 index = document.first_child_not_matching_class(
390 nodes.PreBibliographic)
391 if index is None:
392 return
393 candidate = document[index]
394 if isinstance(candidate, nodes.field_list):
395 biblioindex = document.first_child_not_matching_class(
396 (nodes.Titular, nodes.Decorative))
397 nodelist = self.extract_bibliographic(candidate)
398 del document[index] # untransformed field list (candidate)
399 document[biblioindex:biblioindex] = nodelist
400
401 def extract_bibliographic(self, field_list):
402 docinfo = nodes.docinfo()
403 bibliofields = self.language.bibliographic_fields
404 labels = self.language.labels
405 topics = {'dedication': None, 'abstract': None}
406 for field in field_list:
407 try:
408 name = field[0][0].astext()
409 normedname = nodes.fully_normalize_name(name)
410 if not (len(field) == 2 and normedname in bibliofields
411 and self.check_empty_biblio_field(field, name)):
412 raise TransformError
413 canonical = bibliofields[normedname]
414 biblioclass = self.biblio_nodes[canonical]
415 if issubclass(biblioclass, nodes.TextElement):
416 if not self.check_compound_biblio_field(field, name):
417 raise TransformError
418 utils.clean_rcs_keywords(
419 field[1][0], self.rcs_keyword_substitutions)
420 docinfo.append(biblioclass('', '', *field[1][0]))
421 elif issubclass(biblioclass, nodes.authors):
422 self.extract_authors(field, name, docinfo)
423 elif issubclass(biblioclass, nodes.topic):
424 if topics[canonical]:
425 field[-1] += self.document.reporter.warning(
426 'There can only be one "%s" field.' % name,
427 base_node=field)
428 raise TransformError
429 title = nodes.title(name, labels[canonical])
430 title[0].rawsource = labels[canonical]
431 topics[canonical] = biblioclass(
432 '', title, classes=[canonical], *field[1].children)
433 else:
434 docinfo.append(biblioclass('', *field[1].children))
435 except TransformError:
436 if len(field[-1]) == 1 \
437 and isinstance(field[-1][0], nodes.paragraph):
438 utils.clean_rcs_keywords(
439 field[-1][0], self.rcs_keyword_substitutions)
440 # if normedname not in bibliofields:
441 classvalue = nodes.make_id(normedname)
442 if classvalue:
443 field['classes'].append(classvalue)
444 docinfo.append(field)
445 nodelist = []
446 if len(docinfo) != 0:
447 nodelist.append(docinfo)
448 for name in ('dedication', 'abstract'):
449 if topics[name]:
450 nodelist.append(topics[name])
451 return nodelist
452
453 def check_empty_biblio_field(self, field, name):
454 if len(field[-1]) < 1:
455 field[-1] += self.document.reporter.warning(
456 'Cannot extract empty bibliographic field "%s".' % name,
457 base_node=field)
458 return None
459 return 1
460
461 def check_compound_biblio_field(self, field, name):
462 if len(field[-1]) > 1:
463 field[-1] += self.document.reporter.warning(
464 'Cannot extract compound bibliographic field "%s".' % name,
465 base_node=field)
466 return None
467 if not isinstance(field[-1][0], nodes.paragraph):
468 field[-1] += self.document.reporter.warning(
469 'Cannot extract bibliographic field "%s" containing '
470 'anything other than a single paragraph.' % name,
471 base_node=field)
472 return None
473 return 1
474
475 rcs_keyword_substitutions = [
476 (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
477 r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'),
478 (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'),
479 (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),]
480
481 def extract_authors(self, field, name, docinfo):
482 try:
483 if len(field[1]) == 1:
484 if isinstance(field[1][0], nodes.paragraph):
485 authors = self.authors_from_one_paragraph(field)
486 elif isinstance(field[1][0], nodes.bullet_list):
487 authors = self.authors_from_bullet_list(field)
488 else:
489 raise TransformError
490 else:
491 authors = self.authors_from_paragraphs(field)
492 authornodes = [nodes.author('', '', *author)
493 for author in authors if author]
494 if len(authornodes) >= 1:
495 docinfo.append(nodes.authors('', *authornodes))
496 else:
497 raise TransformError
498 except TransformError:
499 field[-1] += self.document.reporter.warning(
500 'Bibliographic field "%s" incompatible with extraction: '
501 'it must contain either a single paragraph (with authors '
502 'separated by one of "%s"), multiple paragraphs (one per '
503 'author), or a bullet list with one paragraph (one author) '
504 'per item.'
505 % (name, ''.join(self.language.author_separators)),
506 base_node=field)
507 raise
508
509 def authors_from_one_paragraph(self, field):
510 """Return list of Text nodes for authornames.
511
512 The set of separators is locale dependent (default: ";"- or ",").
513 """
514 # @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``)
515 text = ''.join(unicode(node)
516 for node in field[1].traverse(nodes.Text))
517 if not text:
518 raise TransformError
519 for authorsep in self.language.author_separators:
520 # don't split at escaped `authorsep`:
521 pattern = '(?<!\x00)%s' % authorsep
522 authornames = re.split(pattern, text)
523 if len(authornames) > 1:
524 break
525 authornames = (name.strip() for name in authornames)
526 authors = [[nodes.Text(name, utils.unescape(name, True))]
527 for name in authornames if name]
528 return authors
529
530 def authors_from_bullet_list(self, field):
531 authors = []
532 for item in field[1][0]:
533 if isinstance(item, nodes.comment):
534 continue
535 if len(item) != 1 or not isinstance(item[0], nodes.paragraph):
536 raise TransformError
537 authors.append(item[0].children)
538 if not authors:
539 raise TransformError
540 return authors
541
542 def authors_from_paragraphs(self, field):
543 for item in field[1]:
544 if not isinstance(item, (nodes.paragraph, nodes.comment)):
545 raise TransformError
546 authors = [item.children for item in field[1]
547 if not isinstance(item, nodes.comment)]
548 return authors