guppy_basecaller: env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py comparison

comparison env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"

author	shellac
date	Sat, 02 May 2020 07:14:21 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:26e78fe6e8c4
+# $Id: frontmatter.py 8389 2019-09-11 11:39:13Z milde $
+# Author: David Goodger, Ueli Schlaepfer <goodger@python.org>
+# Copyright: This module has been placed in the public domain.
+"""
+Transforms related to the front matter of a document or a section
+(information found before the main text):
+- `DocTitle`: Used to transform a lone top level section's title to
+the document title, promote a remaining lone top-level section's
+title to the document subtitle, and determine the document's title
+metadata (document['title']) based on the document title and/or the
+"title" setting.
+- `SectionSubTitle`: Used to transform a lone subsection into a
+subtitle.
+- `DocInfo`: Used to transform a bibliographic field list into docinfo
+elements.
+"""
+__docformat__ = 'reStructuredText'
+import re
+import sys
+from docutils import nodes, utils
+from docutils.transforms import TransformError, Transform
+if sys.version_info >= (3, 0):
+unicode = str  # noqa
+class TitlePromoter(Transform):
+"""
+Abstract base class for DocTitle and SectionSubTitle transforms.
+"""
+def promote_title(self, node):
+"""
+Transform the following tree::
+<node>
+<section>
+<title>
+...
+into ::
+<node>
+<title>
+...
+`node` is normally a document.
+"""
+# Type check
+if not isinstance(node, nodes.Element):
+raise TypeError('node must be of Element-derived type.')
+# `node` must not have a title yet.
+assert not (len(node) and isinstance(node[0], nodes.title))
+section, index = self.candidate_index(node)
+if index is None:
+return False
+# Transfer the section's attributes to the node:
+# NOTE: Change `replace` to False to NOT replace attributes that
+#       already exist in node with those in section.
+# NOTE: Remove `and_source` to NOT copy the 'source'
+#       attribute from section
+node.update_all_atts_concatenating(section, replace=True, and_source=True)
+# setup_child is called automatically for all nodes.
+node[:] = (section[:1]        # section title
++ node[:index]     # everything that was in the
+# node before the section
++ section[1:])     # everything that was in the section
+assert isinstance(node[0], nodes.title)
+return True
+def promote_subtitle(self, node):
+"""
+Transform the following node tree::
+<node>
+<title>
+<section>
+<title>
+...
+into ::
+<node>
+<title>
+<subtitle>
+...
+"""
+# Type check
+if not isinstance(node, nodes.Element):
+raise TypeError('node must be of Element-derived type.')
+subsection, index = self.candidate_index(node)
+if index is None:
+return False
+subtitle = nodes.subtitle()
+# Transfer the subsection's attributes to the new subtitle
+# NOTE: Change `replace` to False to NOT replace attributes
+#       that already exist in node with those in section.
+# NOTE: Remove `and_source` to NOT copy the 'source'
+#       attribute from section.
+subtitle.update_all_atts_concatenating(subsection, replace=True, and_source=True)
+# Transfer the contents of the subsection's title to the
+# subtitle:
+subtitle[:] = subsection[0][:]
+node[:] = (node[:1]       # title
++ [subtitle]
+# everything that was before the section:
++ node[1:index]
+# everything that was in the subsection:
++ subsection[1:])
+return True
+def candidate_index(self, node):
+"""
+Find and return the promotion candidate and its index.
+Return (None, None) if no valid candidate was found.
+"""
+index = node.first_child_not_matching_class(
+nodes.PreBibliographic)
+if (index is None or len(node) > (index + 1)
+or not isinstance(node[index], nodes.section)):
+return None, None
+else:
+return node[index], index
+class DocTitle(TitlePromoter):
+"""
+In reStructuredText_, there is no way to specify a document title
+and subtitle explicitly. Instead, we can supply the document title
+(and possibly the subtitle as well) implicitly, and use this
+two-step transform to "raise" or "promote" the title(s) (and their
+corresponding section contents) to the document level.
+1. If the document contains a single top-level section as its
+first non-comment element, the top-level section's title
+becomes the document's title, and the top-level section's
+contents become the document's immediate contents. The lone
+top-level section header must be the first non-comment element
+in the document.
+For example, take this input text::
+=================
+Top-Level Title
+=================
+A paragraph.
+Once parsed, it looks like this::
+<document>
+<section names="top-level title">
+<title>
+Top-Level Title
+<paragraph>
+A paragraph.
+After running the DocTitle transform, we have::
+<document names="top-level title">
+<title>
+Top-Level Title
+<paragraph>
+A paragraph.
+2. If step 1 successfully determines the document title, we
+continue by checking for a subtitle.
+If the lone top-level section itself contains a single
+second-level section as its first non-comment element, that
+section's title is promoted to the document's subtitle, and
+that section's contents become the document's immediate
+contents. Given this input text::
+=================
+Top-Level Title
+=================
+Second-Level Title
+~~~~~~~~~~~~~~~~~~
+A paragraph.
+After parsing and running the Section Promotion transform, the
+result is::
+<document names="top-level title">
+<title>
+Top-Level Title
+<subtitle names="second-level title">
+Second-Level Title
+<paragraph>
+A paragraph.
+(Note that the implicit hyperlink target generated by the
+"Second-Level Title" is preserved on the "subtitle" element
+itself.)
+Any comment elements occurring before the document title or
+subtitle are accumulated and inserted as the first body elements
+after the title(s).
+This transform also sets the document's metadata title
+(document['title']).
+.. _reStructuredText: http://docutils.sf.net/rst.html
+"""
+default_priority = 320
+def set_metadata(self):
+"""
+Set document['title'] metadata title from the following
+sources, listed in order of priority:
+* Existing document['title'] attribute.
+* "title" setting.
+* Document title node (as promoted by promote_title).
+"""
+if not self.document.hasattr('title'):
+if self.document.settings.title is not None:
+self.document['title'] = self.document.settings.title
+elif len(self.document) and isinstance(self.document[0], nodes.title):
+self.document['title'] = self.document[0].astext()
+def apply(self):
+if getattr(self.document.settings, 'doctitle_xform', 1):
+# promote_(sub)title defined in TitlePromoter base class.
+if self.promote_title(self.document):
+# If a title has been promoted, also try to promote a
+# subtitle.
+self.promote_subtitle(self.document)
+# Set document['title'].
+self.set_metadata()
+class SectionSubTitle(TitlePromoter):
+"""
+This works like document subtitles, but for sections.  For example, ::
+<section>
+<title>
+Title
+<section>
+<title>
+Subtitle
+...
+is transformed into ::
+<section>
+<title>
+Title
+<subtitle>
+Subtitle
+...
+For details refer to the docstring of DocTitle.
+"""
+default_priority = 350
+def apply(self):
+if not getattr(self.document.settings, 'sectsubtitle_xform', 1):
+return
+for section in self.document._traverse(nodes.section):
+# On our way through the node tree, we are modifying it
+# but only the not-yet-visited part, so that the iterator
+# returned by _traverse() is not corrupted.
+self.promote_subtitle(section)
+class DocInfo(Transform):
+"""
+This transform is specific to the reStructuredText_ markup syntax;
+see "Bibliographic Fields" in the `reStructuredText Markup
+Specification`_ for a high-level description. This transform
+should be run *after* the `DocTitle` transform.
+Given a field list as the first non-comment element after the
+document title and subtitle (if present), registered bibliographic
+field names are transformed to the corresponding DTD elements,
+becoming child elements of the "docinfo" element (except for a
+dedication and/or an abstract, which become "topic" elements after
+"docinfo").
+For example, given this document fragment after parsing::
+<document>
+<title>
+Document Title
+<field_list>
+<field>
+<field_name>
+Author
+<field_body>
+<paragraph>
+A. Name
+<field>
+<field_name>
+Status
+<field_body>
+<paragraph>
+$RCSfile$
+...
+After running the bibliographic field list transform, the
+resulting document tree would look like this::
+<document>
+<title>
+Document Title
+<docinfo>
+<author>
+A. Name
+<status>
+frontmatter.py
+...
+The "Status" field contained an expanded RCS keyword, which is
+normally (but optionally) cleaned up by the transform. The sole
+contents of the field body must be a paragraph containing an
+expanded RCS keyword of the form "$keyword: expansion text $". Any
+RCS keyword can be processed in any bibliographic field. The
+dollar signs and leading RCS keyword name are removed. Extra
+processing is done for the following RCS keywords:
+- "RCSfile" expands to the name of the file in the RCS or CVS
+repository, which is the name of the source file with a ",v"
+suffix appended. The transform will remove the ",v" suffix.
+- "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC
+time zone). The RCS Keywords transform will extract just the
+date itself and transform it to an ISO 8601 format date, as in
+"2000-12-31".
+(Since the source file for this text is itself stored under CVS,
+we can't show an example of the "Date" RCS keyword because we
+can't prevent any RCS keywords used in this explanation from
+being expanded. Only the "RCSfile" keyword is stable; its
+expansion text changes only if the file name changes.)
+.. _reStructuredText: http://docutils.sf.net/rst.html
+.. _reStructuredText Markup Specification:
+http://docutils.sf.net/docs/ref/rst/restructuredtext.html
+"""
+default_priority = 340
+biblio_nodes = {
+'author': nodes.author,
+'authors': nodes.authors,
+'organization': nodes.organization,
+'address': nodes.address,
+'contact': nodes.contact,
+'version': nodes.version,
+'revision': nodes.revision,
+'status': nodes.status,
+'date': nodes.date,
+'copyright': nodes.copyright,
+'dedication': nodes.topic,
+'abstract': nodes.topic}
+"""Canonical field name (lowcased) to node class name mapping for
+bibliographic fields (field_list)."""
+def apply(self):
+if not getattr(self.document.settings, 'docinfo_xform', 1):
+return
+document = self.document
+index = document.first_child_not_matching_class(
+nodes.PreBibliographic)
+if index is None:
+return
+candidate = document[index]
+if isinstance(candidate, nodes.field_list):
+biblioindex = document.first_child_not_matching_class(
+(nodes.Titular, nodes.Decorative))
+nodelist = self.extract_bibliographic(candidate)
+del document[index]         # untransformed field list (candidate)
+document[biblioindex:biblioindex] = nodelist
+def extract_bibliographic(self, field_list):
+docinfo = nodes.docinfo()
+bibliofields = self.language.bibliographic_fields
+labels = self.language.labels
+topics = {'dedication': None, 'abstract': None}
+for field in field_list:
+try:
+name = field[0][0].astext()
+normedname = nodes.fully_normalize_name(name)
+if not (len(field) == 2 and normedname in bibliofields
+and self.check_empty_biblio_field(field, name)):
+raise TransformError
+canonical = bibliofields[normedname]
+biblioclass = self.biblio_nodes[canonical]
+if issubclass(biblioclass, nodes.TextElement):
+if not self.check_compound_biblio_field(field, name):
+raise TransformError
+utils.clean_rcs_keywords(
+field[1][0], self.rcs_keyword_substitutions)
+docinfo.append(biblioclass('', '', *field[1][0]))
+elif issubclass(biblioclass, nodes.authors):
+self.extract_authors(field, name, docinfo)
+elif issubclass(biblioclass, nodes.topic):
+if topics[canonical]:
+field[-1] += self.document.reporter.warning(
+'There can only be one "%s" field.' % name,
+base_node=field)
+raise TransformError
+title = nodes.title(name, labels[canonical])
+title[0].rawsource =  labels[canonical]
+topics[canonical] = biblioclass(
+'', title, classes=[canonical], *field[1].children)
+else:
+docinfo.append(biblioclass('', *field[1].children))
+except TransformError:
+if len(field[-1]) == 1 \
+and isinstance(field[-1][0], nodes.paragraph):
+utils.clean_rcs_keywords(
+field[-1][0], self.rcs_keyword_substitutions)
+# if normedname not in bibliofields:
+classvalue = nodes.make_id(normedname)
+if classvalue:
+field['classes'].append(classvalue)
+docinfo.append(field)
+nodelist = []
+if len(docinfo) != 0:
+nodelist.append(docinfo)
+for name in ('dedication', 'abstract'):
+if topics[name]:
+nodelist.append(topics[name])
+return nodelist
+def check_empty_biblio_field(self, field, name):
+if len(field[-1]) < 1:
+field[-1] += self.document.reporter.warning(
+'Cannot extract empty bibliographic field "%s".' % name,
+base_node=field)
+return None
+return 1
+def check_compound_biblio_field(self, field, name):
+if len(field[-1]) > 1:
+field[-1] += self.document.reporter.warning(
+'Cannot extract compound bibliographic field "%s".' % name,
+base_node=field)
+return None
+if not isinstance(field[-1][0], nodes.paragraph):
+field[-1] += self.document.reporter.warning(
+'Cannot extract bibliographic field "%s" containing '
+'anything other than a single paragraph.' % name,
+base_node=field)
+return None
+return 1
+rcs_keyword_substitutions = [
+(re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
+r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'),
+(re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'),
+(re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),]
+def extract_authors(self, field, name, docinfo):
+try:
+if len(field[1]) == 1:
+if isinstance(field[1][0], nodes.paragraph):
+authors = self.authors_from_one_paragraph(field)
+elif isinstance(field[1][0], nodes.bullet_list):
+authors = self.authors_from_bullet_list(field)
+else:
+raise TransformError
+else:
+authors = self.authors_from_paragraphs(field)
+authornodes = [nodes.author('', '', *author)
+for author in authors if author]
+if len(authornodes) >= 1:
+docinfo.append(nodes.authors('', *authornodes))
+else:
+raise TransformError
+except TransformError:
+field[-1] += self.document.reporter.warning(
+'Bibliographic field "%s" incompatible with extraction: '
+'it must contain either a single paragraph (with authors '
+'separated by one of "%s"), multiple paragraphs (one per '
+'author), or a bullet list with one paragraph (one author) '
+'per item.'
+% (name, ''.join(self.language.author_separators)),
+base_node=field)
+raise
+def authors_from_one_paragraph(self, field):
+"""Return list of Text nodes for authornames.
+The set of separators is locale dependent (default: ";"- or ",").
+"""
+# @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``)
+text = ''.join(unicode(node)
+for node in field[1].traverse(nodes.Text))
+if not text:
+raise TransformError
+for authorsep in self.language.author_separators:
+# don't split at escaped `authorsep`:
+pattern = '(?<!\x00)%s' % authorsep
+authornames = re.split(pattern, text)
+if len(authornames) > 1:
+break
+authornames = (name.strip() for name in authornames)
+authors = [[nodes.Text(name, utils.unescape(name, True))]
+for name in authornames if name]
+return authors
+def authors_from_bullet_list(self, field):
+authors = []
+for item in field[1][0]:
+if isinstance(item, nodes.comment):
+continue
+if len(item) != 1 or not isinstance(item[0], nodes.paragraph):
+raise TransformError
+authors.append(item[0].children)
+if not authors:
+raise TransformError
+return authors
+def authors_from_paragraphs(self, field):
+for item in field[1]:
+if not isinstance(item, (nodes.paragraph, nodes.comment)):
+raise TransformError
+authors = [item.children for item in field[1]
+if not isinstance(item, nodes.comment)]
+return authors

Mercurial > repos > shellac > guppy_basecaller

comparison env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py @ 0:26e78fe6e8c4 draft