Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 # $Id: frontmatter.py 8389 2019-09-11 11:39:13Z milde $ | |
| 2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org> | |
| 3 # Copyright: This module has been placed in the public domain. | |
| 4 | |
| 5 """ | |
| 6 Transforms related to the front matter of a document or a section | |
| 7 (information found before the main text): | |
| 8 | |
| 9 - `DocTitle`: Used to transform a lone top level section's title to | |
| 10 the document title, promote a remaining lone top-level section's | |
| 11 title to the document subtitle, and determine the document's title | |
| 12 metadata (document['title']) based on the document title and/or the | |
| 13 "title" setting. | |
| 14 | |
| 15 - `SectionSubTitle`: Used to transform a lone subsection into a | |
| 16 subtitle. | |
| 17 | |
| 18 - `DocInfo`: Used to transform a bibliographic field list into docinfo | |
| 19 elements. | |
| 20 """ | |
| 21 | |
| 22 __docformat__ = 'reStructuredText' | |
| 23 | |
| 24 import re | |
| 25 import sys | |
| 26 | |
| 27 from docutils import nodes, utils | |
| 28 from docutils.transforms import TransformError, Transform | |
| 29 | |
| 30 | |
| 31 if sys.version_info >= (3, 0): | |
| 32 unicode = str # noqa | |
| 33 | |
| 34 | |
| 35 class TitlePromoter(Transform): | |
| 36 | |
| 37 """ | |
| 38 Abstract base class for DocTitle and SectionSubTitle transforms. | |
| 39 """ | |
| 40 | |
| 41 def promote_title(self, node): | |
| 42 """ | |
| 43 Transform the following tree:: | |
| 44 | |
| 45 <node> | |
| 46 <section> | |
| 47 <title> | |
| 48 ... | |
| 49 | |
| 50 into :: | |
| 51 | |
| 52 <node> | |
| 53 <title> | |
| 54 ... | |
| 55 | |
| 56 `node` is normally a document. | |
| 57 """ | |
| 58 # Type check | |
| 59 if not isinstance(node, nodes.Element): | |
| 60 raise TypeError('node must be of Element-derived type.') | |
| 61 | |
| 62 # `node` must not have a title yet. | |
| 63 assert not (len(node) and isinstance(node[0], nodes.title)) | |
| 64 section, index = self.candidate_index(node) | |
| 65 if index is None: | |
| 66 return False | |
| 67 | |
| 68 # Transfer the section's attributes to the node: | |
| 69 # NOTE: Change `replace` to False to NOT replace attributes that | |
| 70 # already exist in node with those in section. | |
| 71 # NOTE: Remove `and_source` to NOT copy the 'source' | |
| 72 # attribute from section | |
| 73 node.update_all_atts_concatenating(section, replace=True, and_source=True) | |
| 74 | |
| 75 # setup_child is called automatically for all nodes. | |
| 76 node[:] = (section[:1] # section title | |
| 77 + node[:index] # everything that was in the | |
| 78 # node before the section | |
| 79 + section[1:]) # everything that was in the section | |
| 80 assert isinstance(node[0], nodes.title) | |
| 81 return True | |
| 82 | |
| 83 def promote_subtitle(self, node): | |
| 84 """ | |
| 85 Transform the following node tree:: | |
| 86 | |
| 87 <node> | |
| 88 <title> | |
| 89 <section> | |
| 90 <title> | |
| 91 ... | |
| 92 | |
| 93 into :: | |
| 94 | |
| 95 <node> | |
| 96 <title> | |
| 97 <subtitle> | |
| 98 ... | |
| 99 """ | |
| 100 # Type check | |
| 101 if not isinstance(node, nodes.Element): | |
| 102 raise TypeError('node must be of Element-derived type.') | |
| 103 | |
| 104 subsection, index = self.candidate_index(node) | |
| 105 if index is None: | |
| 106 return False | |
| 107 subtitle = nodes.subtitle() | |
| 108 | |
| 109 # Transfer the subsection's attributes to the new subtitle | |
| 110 # NOTE: Change `replace` to False to NOT replace attributes | |
| 111 # that already exist in node with those in section. | |
| 112 # NOTE: Remove `and_source` to NOT copy the 'source' | |
| 113 # attribute from section. | |
| 114 subtitle.update_all_atts_concatenating(subsection, replace=True, and_source=True) | |
| 115 | |
| 116 # Transfer the contents of the subsection's title to the | |
| 117 # subtitle: | |
| 118 subtitle[:] = subsection[0][:] | |
| 119 node[:] = (node[:1] # title | |
| 120 + [subtitle] | |
| 121 # everything that was before the section: | |
| 122 + node[1:index] | |
| 123 # everything that was in the subsection: | |
| 124 + subsection[1:]) | |
| 125 return True | |
| 126 | |
| 127 def candidate_index(self, node): | |
| 128 """ | |
| 129 Find and return the promotion candidate and its index. | |
| 130 | |
| 131 Return (None, None) if no valid candidate was found. | |
| 132 """ | |
| 133 index = node.first_child_not_matching_class( | |
| 134 nodes.PreBibliographic) | |
| 135 if (index is None or len(node) > (index + 1) | |
| 136 or not isinstance(node[index], nodes.section)): | |
| 137 return None, None | |
| 138 else: | |
| 139 return node[index], index | |
| 140 | |
| 141 | |
| 142 class DocTitle(TitlePromoter): | |
| 143 | |
| 144 """ | |
| 145 In reStructuredText_, there is no way to specify a document title | |
| 146 and subtitle explicitly. Instead, we can supply the document title | |
| 147 (and possibly the subtitle as well) implicitly, and use this | |
| 148 two-step transform to "raise" or "promote" the title(s) (and their | |
| 149 corresponding section contents) to the document level. | |
| 150 | |
| 151 1. If the document contains a single top-level section as its | |
| 152 first non-comment element, the top-level section's title | |
| 153 becomes the document's title, and the top-level section's | |
| 154 contents become the document's immediate contents. The lone | |
| 155 top-level section header must be the first non-comment element | |
| 156 in the document. | |
| 157 | |
| 158 For example, take this input text:: | |
| 159 | |
| 160 ================= | |
| 161 Top-Level Title | |
| 162 ================= | |
| 163 | |
| 164 A paragraph. | |
| 165 | |
| 166 Once parsed, it looks like this:: | |
| 167 | |
| 168 <document> | |
| 169 <section names="top-level title"> | |
| 170 <title> | |
| 171 Top-Level Title | |
| 172 <paragraph> | |
| 173 A paragraph. | |
| 174 | |
| 175 After running the DocTitle transform, we have:: | |
| 176 | |
| 177 <document names="top-level title"> | |
| 178 <title> | |
| 179 Top-Level Title | |
| 180 <paragraph> | |
| 181 A paragraph. | |
| 182 | |
| 183 2. If step 1 successfully determines the document title, we | |
| 184 continue by checking for a subtitle. | |
| 185 | |
| 186 If the lone top-level section itself contains a single | |
| 187 second-level section as its first non-comment element, that | |
| 188 section's title is promoted to the document's subtitle, and | |
| 189 that section's contents become the document's immediate | |
| 190 contents. Given this input text:: | |
| 191 | |
| 192 ================= | |
| 193 Top-Level Title | |
| 194 ================= | |
| 195 | |
| 196 Second-Level Title | |
| 197 ~~~~~~~~~~~~~~~~~~ | |
| 198 | |
| 199 A paragraph. | |
| 200 | |
| 201 After parsing and running the Section Promotion transform, the | |
| 202 result is:: | |
| 203 | |
| 204 <document names="top-level title"> | |
| 205 <title> | |
| 206 Top-Level Title | |
| 207 <subtitle names="second-level title"> | |
| 208 Second-Level Title | |
| 209 <paragraph> | |
| 210 A paragraph. | |
| 211 | |
| 212 (Note that the implicit hyperlink target generated by the | |
| 213 "Second-Level Title" is preserved on the "subtitle" element | |
| 214 itself.) | |
| 215 | |
| 216 Any comment elements occurring before the document title or | |
| 217 subtitle are accumulated and inserted as the first body elements | |
| 218 after the title(s). | |
| 219 | |
| 220 This transform also sets the document's metadata title | |
| 221 (document['title']). | |
| 222 | |
| 223 .. _reStructuredText: http://docutils.sf.net/rst.html | |
| 224 """ | |
| 225 | |
| 226 default_priority = 320 | |
| 227 | |
| 228 def set_metadata(self): | |
| 229 """ | |
| 230 Set document['title'] metadata title from the following | |
| 231 sources, listed in order of priority: | |
| 232 | |
| 233 * Existing document['title'] attribute. | |
| 234 * "title" setting. | |
| 235 * Document title node (as promoted by promote_title). | |
| 236 """ | |
| 237 if not self.document.hasattr('title'): | |
| 238 if self.document.settings.title is not None: | |
| 239 self.document['title'] = self.document.settings.title | |
| 240 elif len(self.document) and isinstance(self.document[0], nodes.title): | |
| 241 self.document['title'] = self.document[0].astext() | |
| 242 | |
| 243 def apply(self): | |
| 244 if getattr(self.document.settings, 'doctitle_xform', 1): | |
| 245 # promote_(sub)title defined in TitlePromoter base class. | |
| 246 if self.promote_title(self.document): | |
| 247 # If a title has been promoted, also try to promote a | |
| 248 # subtitle. | |
| 249 self.promote_subtitle(self.document) | |
| 250 # Set document['title']. | |
| 251 self.set_metadata() | |
| 252 | |
| 253 | |
| 254 class SectionSubTitle(TitlePromoter): | |
| 255 | |
| 256 """ | |
| 257 This works like document subtitles, but for sections. For example, :: | |
| 258 | |
| 259 <section> | |
| 260 <title> | |
| 261 Title | |
| 262 <section> | |
| 263 <title> | |
| 264 Subtitle | |
| 265 ... | |
| 266 | |
| 267 is transformed into :: | |
| 268 | |
| 269 <section> | |
| 270 <title> | |
| 271 Title | |
| 272 <subtitle> | |
| 273 Subtitle | |
| 274 ... | |
| 275 | |
| 276 For details refer to the docstring of DocTitle. | |
| 277 """ | |
| 278 | |
| 279 default_priority = 350 | |
| 280 | |
| 281 def apply(self): | |
| 282 if not getattr(self.document.settings, 'sectsubtitle_xform', 1): | |
| 283 return | |
| 284 for section in self.document._traverse(nodes.section): | |
| 285 # On our way through the node tree, we are modifying it | |
| 286 # but only the not-yet-visited part, so that the iterator | |
| 287 # returned by _traverse() is not corrupted. | |
| 288 self.promote_subtitle(section) | |
| 289 | |
| 290 | |
| 291 class DocInfo(Transform): | |
| 292 | |
| 293 """ | |
| 294 This transform is specific to the reStructuredText_ markup syntax; | |
| 295 see "Bibliographic Fields" in the `reStructuredText Markup | |
| 296 Specification`_ for a high-level description. This transform | |
| 297 should be run *after* the `DocTitle` transform. | |
| 298 | |
| 299 Given a field list as the first non-comment element after the | |
| 300 document title and subtitle (if present), registered bibliographic | |
| 301 field names are transformed to the corresponding DTD elements, | |
| 302 becoming child elements of the "docinfo" element (except for a | |
| 303 dedication and/or an abstract, which become "topic" elements after | |
| 304 "docinfo"). | |
| 305 | |
| 306 For example, given this document fragment after parsing:: | |
| 307 | |
| 308 <document> | |
| 309 <title> | |
| 310 Document Title | |
| 311 <field_list> | |
| 312 <field> | |
| 313 <field_name> | |
| 314 Author | |
| 315 <field_body> | |
| 316 <paragraph> | |
| 317 A. Name | |
| 318 <field> | |
| 319 <field_name> | |
| 320 Status | |
| 321 <field_body> | |
| 322 <paragraph> | |
| 323 $RCSfile$ | |
| 324 ... | |
| 325 | |
| 326 After running the bibliographic field list transform, the | |
| 327 resulting document tree would look like this:: | |
| 328 | |
| 329 <document> | |
| 330 <title> | |
| 331 Document Title | |
| 332 <docinfo> | |
| 333 <author> | |
| 334 A. Name | |
| 335 <status> | |
| 336 frontmatter.py | |
| 337 ... | |
| 338 | |
| 339 The "Status" field contained an expanded RCS keyword, which is | |
| 340 normally (but optionally) cleaned up by the transform. The sole | |
| 341 contents of the field body must be a paragraph containing an | |
| 342 expanded RCS keyword of the form "$keyword: expansion text $". Any | |
| 343 RCS keyword can be processed in any bibliographic field. The | |
| 344 dollar signs and leading RCS keyword name are removed. Extra | |
| 345 processing is done for the following RCS keywords: | |
| 346 | |
| 347 - "RCSfile" expands to the name of the file in the RCS or CVS | |
| 348 repository, which is the name of the source file with a ",v" | |
| 349 suffix appended. The transform will remove the ",v" suffix. | |
| 350 | |
| 351 - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC | |
| 352 time zone). The RCS Keywords transform will extract just the | |
| 353 date itself and transform it to an ISO 8601 format date, as in | |
| 354 "2000-12-31". | |
| 355 | |
| 356 (Since the source file for this text is itself stored under CVS, | |
| 357 we can't show an example of the "Date" RCS keyword because we | |
| 358 can't prevent any RCS keywords used in this explanation from | |
| 359 being expanded. Only the "RCSfile" keyword is stable; its | |
| 360 expansion text changes only if the file name changes.) | |
| 361 | |
| 362 .. _reStructuredText: http://docutils.sf.net/rst.html | |
| 363 .. _reStructuredText Markup Specification: | |
| 364 http://docutils.sf.net/docs/ref/rst/restructuredtext.html | |
| 365 """ | |
| 366 | |
| 367 default_priority = 340 | |
| 368 | |
| 369 biblio_nodes = { | |
| 370 'author': nodes.author, | |
| 371 'authors': nodes.authors, | |
| 372 'organization': nodes.organization, | |
| 373 'address': nodes.address, | |
| 374 'contact': nodes.contact, | |
| 375 'version': nodes.version, | |
| 376 'revision': nodes.revision, | |
| 377 'status': nodes.status, | |
| 378 'date': nodes.date, | |
| 379 'copyright': nodes.copyright, | |
| 380 'dedication': nodes.topic, | |
| 381 'abstract': nodes.topic} | |
| 382 """Canonical field name (lowcased) to node class name mapping for | |
| 383 bibliographic fields (field_list).""" | |
| 384 | |
| 385 def apply(self): | |
| 386 if not getattr(self.document.settings, 'docinfo_xform', 1): | |
| 387 return | |
| 388 document = self.document | |
| 389 index = document.first_child_not_matching_class( | |
| 390 nodes.PreBibliographic) | |
| 391 if index is None: | |
| 392 return | |
| 393 candidate = document[index] | |
| 394 if isinstance(candidate, nodes.field_list): | |
| 395 biblioindex = document.first_child_not_matching_class( | |
| 396 (nodes.Titular, nodes.Decorative)) | |
| 397 nodelist = self.extract_bibliographic(candidate) | |
| 398 del document[index] # untransformed field list (candidate) | |
| 399 document[biblioindex:biblioindex] = nodelist | |
| 400 | |
| 401 def extract_bibliographic(self, field_list): | |
| 402 docinfo = nodes.docinfo() | |
| 403 bibliofields = self.language.bibliographic_fields | |
| 404 labels = self.language.labels | |
| 405 topics = {'dedication': None, 'abstract': None} | |
| 406 for field in field_list: | |
| 407 try: | |
| 408 name = field[0][0].astext() | |
| 409 normedname = nodes.fully_normalize_name(name) | |
| 410 if not (len(field) == 2 and normedname in bibliofields | |
| 411 and self.check_empty_biblio_field(field, name)): | |
| 412 raise TransformError | |
| 413 canonical = bibliofields[normedname] | |
| 414 biblioclass = self.biblio_nodes[canonical] | |
| 415 if issubclass(biblioclass, nodes.TextElement): | |
| 416 if not self.check_compound_biblio_field(field, name): | |
| 417 raise TransformError | |
| 418 utils.clean_rcs_keywords( | |
| 419 field[1][0], self.rcs_keyword_substitutions) | |
| 420 docinfo.append(biblioclass('', '', *field[1][0])) | |
| 421 elif issubclass(biblioclass, nodes.authors): | |
| 422 self.extract_authors(field, name, docinfo) | |
| 423 elif issubclass(biblioclass, nodes.topic): | |
| 424 if topics[canonical]: | |
| 425 field[-1] += self.document.reporter.warning( | |
| 426 'There can only be one "%s" field.' % name, | |
| 427 base_node=field) | |
| 428 raise TransformError | |
| 429 title = nodes.title(name, labels[canonical]) | |
| 430 title[0].rawsource = labels[canonical] | |
| 431 topics[canonical] = biblioclass( | |
| 432 '', title, classes=[canonical], *field[1].children) | |
| 433 else: | |
| 434 docinfo.append(biblioclass('', *field[1].children)) | |
| 435 except TransformError: | |
| 436 if len(field[-1]) == 1 \ | |
| 437 and isinstance(field[-1][0], nodes.paragraph): | |
| 438 utils.clean_rcs_keywords( | |
| 439 field[-1][0], self.rcs_keyword_substitutions) | |
| 440 # if normedname not in bibliofields: | |
| 441 classvalue = nodes.make_id(normedname) | |
| 442 if classvalue: | |
| 443 field['classes'].append(classvalue) | |
| 444 docinfo.append(field) | |
| 445 nodelist = [] | |
| 446 if len(docinfo) != 0: | |
| 447 nodelist.append(docinfo) | |
| 448 for name in ('dedication', 'abstract'): | |
| 449 if topics[name]: | |
| 450 nodelist.append(topics[name]) | |
| 451 return nodelist | |
| 452 | |
| 453 def check_empty_biblio_field(self, field, name): | |
| 454 if len(field[-1]) < 1: | |
| 455 field[-1] += self.document.reporter.warning( | |
| 456 'Cannot extract empty bibliographic field "%s".' % name, | |
| 457 base_node=field) | |
| 458 return None | |
| 459 return 1 | |
| 460 | |
| 461 def check_compound_biblio_field(self, field, name): | |
| 462 if len(field[-1]) > 1: | |
| 463 field[-1] += self.document.reporter.warning( | |
| 464 'Cannot extract compound bibliographic field "%s".' % name, | |
| 465 base_node=field) | |
| 466 return None | |
| 467 if not isinstance(field[-1][0], nodes.paragraph): | |
| 468 field[-1] += self.document.reporter.warning( | |
| 469 'Cannot extract bibliographic field "%s" containing ' | |
| 470 'anything other than a single paragraph.' % name, | |
| 471 base_node=field) | |
| 472 return None | |
| 473 return 1 | |
| 474 | |
| 475 rcs_keyword_substitutions = [ | |
| 476 (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' | |
| 477 r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), | |
| 478 (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), | |
| 479 (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] | |
| 480 | |
| 481 def extract_authors(self, field, name, docinfo): | |
| 482 try: | |
| 483 if len(field[1]) == 1: | |
| 484 if isinstance(field[1][0], nodes.paragraph): | |
| 485 authors = self.authors_from_one_paragraph(field) | |
| 486 elif isinstance(field[1][0], nodes.bullet_list): | |
| 487 authors = self.authors_from_bullet_list(field) | |
| 488 else: | |
| 489 raise TransformError | |
| 490 else: | |
| 491 authors = self.authors_from_paragraphs(field) | |
| 492 authornodes = [nodes.author('', '', *author) | |
| 493 for author in authors if author] | |
| 494 if len(authornodes) >= 1: | |
| 495 docinfo.append(nodes.authors('', *authornodes)) | |
| 496 else: | |
| 497 raise TransformError | |
| 498 except TransformError: | |
| 499 field[-1] += self.document.reporter.warning( | |
| 500 'Bibliographic field "%s" incompatible with extraction: ' | |
| 501 'it must contain either a single paragraph (with authors ' | |
| 502 'separated by one of "%s"), multiple paragraphs (one per ' | |
| 503 'author), or a bullet list with one paragraph (one author) ' | |
| 504 'per item.' | |
| 505 % (name, ''.join(self.language.author_separators)), | |
| 506 base_node=field) | |
| 507 raise | |
| 508 | |
| 509 def authors_from_one_paragraph(self, field): | |
| 510 """Return list of Text nodes for authornames. | |
| 511 | |
| 512 The set of separators is locale dependent (default: ";"- or ","). | |
| 513 """ | |
| 514 # @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``) | |
| 515 text = ''.join(unicode(node) | |
| 516 for node in field[1].traverse(nodes.Text)) | |
| 517 if not text: | |
| 518 raise TransformError | |
| 519 for authorsep in self.language.author_separators: | |
| 520 # don't split at escaped `authorsep`: | |
| 521 pattern = '(?<!\x00)%s' % authorsep | |
| 522 authornames = re.split(pattern, text) | |
| 523 if len(authornames) > 1: | |
| 524 break | |
| 525 authornames = (name.strip() for name in authornames) | |
| 526 authors = [[nodes.Text(name, utils.unescape(name, True))] | |
| 527 for name in authornames if name] | |
| 528 return authors | |
| 529 | |
| 530 def authors_from_bullet_list(self, field): | |
| 531 authors = [] | |
| 532 for item in field[1][0]: | |
| 533 if isinstance(item, nodes.comment): | |
| 534 continue | |
| 535 if len(item) != 1 or not isinstance(item[0], nodes.paragraph): | |
| 536 raise TransformError | |
| 537 authors.append(item[0].children) | |
| 538 if not authors: | |
| 539 raise TransformError | |
| 540 return authors | |
| 541 | |
| 542 def authors_from_paragraphs(self, field): | |
| 543 for item in field[1]: | |
| 544 if not isinstance(item, (nodes.paragraph, nodes.comment)): | |
| 545 raise TransformError | |
| 546 authors = [item.children for item in field[1] | |
| 547 if not isinstance(item, nodes.comment)] | |
| 548 return authors |
