Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/docutils/utils/math/latex2mathml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/docutils/utils/math/latex2mathml.py Sat May 02 07:14:21 2020 -0400 @@ -0,0 +1,568 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# :Id: $Id: latex2mathml.py 8366 2019-08-27 12:09:19Z milde $ +# :Copyright: © 2010 Günter Milde. +# Based on rst2mathml.py from the latex_math sandbox project +# © 2005 Jens Jørgen Mortensen +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + + +"""Convert LaTex math code into presentational MathML""" + +# Based on the `latex_math` sandbox project by Jens Jørgen Mortensen + +import docutils.utils.math.tex2unichar as tex2unichar + +# TeX spacing combining +over = {'acute': u'\u00B4', # u'\u0301', + 'bar': u'\u00AF', # u'\u0304', + 'breve': u'\u02D8', # u'\u0306', + 'check': u'\u02C7', # u'\u030C', + 'dot': u'\u02D9', # u'\u0307', + 'ddot': u'\u00A8', # u'\u0308', + 'dddot': u'\u20DB', + 'grave': u'`', # u'\u0300', + 'hat': u'^', # u'\u0302', + 'mathring': u'\u02DA', # u'\u030A', + 'overleftrightarrow': u'\u20e1', + # 'overline': # u'\u0305', + 'tilde': u'\u02DC', # u'\u0303', + 'vec': u'\u20D7'} + +Greek = { # Capital Greek letters: (upright in TeX style) + 'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3', + 'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398', + 'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9', + 'Gamma':u'\u0393', 'Lambda':u'\u039b'} + +letters = tex2unichar.mathalpha + +special = tex2unichar.mathbin # Binary symbols +special.update(tex2unichar.mathrel) # Relation symbols, arrow symbols +special.update(tex2unichar.mathord) # Miscellaneous symbols +special.update(tex2unichar.mathop) # Variable-sized symbols +special.update(tex2unichar.mathopen) # Braces +special.update(tex2unichar.mathclose) # Braces +special.update(tex2unichar.mathfence) + +sumintprod = ''.join([special[symbol] for symbol in + ['sum', 'int', 'oint', 'prod']]) + +functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh', + 'cot', 'coth', 'csc', 'deg', 'det', 'dim', + 'exp', 'gcd', 'hom', 'inf', 'ker', 'lg', + 'lim', 'liminf', 'limsup', 'ln', 'log', 'max', + 'min', 'Pr', 'sec', 'sin', 'sinh', 'sup', + 'tan', 'tanh', + 'injlim', 'varinjlim', 'varlimsup', + 'projlim', 'varliminf', 'varprojlim'] + + +mathbb = { + 'A': u'\U0001D538', + 'B': u'\U0001D539', + 'C': u'\u2102', + 'D': u'\U0001D53B', + 'E': u'\U0001D53C', + 'F': u'\U0001D53D', + 'G': u'\U0001D53E', + 'H': u'\u210D', + 'I': u'\U0001D540', + 'J': u'\U0001D541', + 'K': u'\U0001D542', + 'L': u'\U0001D543', + 'M': u'\U0001D544', + 'N': u'\u2115', + 'O': u'\U0001D546', + 'P': u'\u2119', + 'Q': u'\u211A', + 'R': u'\u211D', + 'S': u'\U0001D54A', + 'T': u'\U0001D54B', + 'U': u'\U0001D54C', + 'V': u'\U0001D54D', + 'W': u'\U0001D54E', + 'X': u'\U0001D54F', + 'Y': u'\U0001D550', + 'Z': u'\u2124', + } + +mathscr = { + 'A': u'\U0001D49C', + 'B': u'\u212C', # bernoulli function + 'C': u'\U0001D49E', + 'D': u'\U0001D49F', + 'E': u'\u2130', + 'F': u'\u2131', + 'G': u'\U0001D4A2', + 'H': u'\u210B', # hamiltonian + 'I': u'\u2110', + 'J': u'\U0001D4A5', + 'K': u'\U0001D4A6', + 'L': u'\u2112', # lagrangian + 'M': u'\u2133', # physics m-matrix + 'N': u'\U0001D4A9', + 'O': u'\U0001D4AA', + 'P': u'\U0001D4AB', + 'Q': u'\U0001D4AC', + 'R': u'\u211B', + 'S': u'\U0001D4AE', + 'T': u'\U0001D4AF', + 'U': u'\U0001D4B0', + 'V': u'\U0001D4B1', + 'W': u'\U0001D4B2', + 'X': u'\U0001D4B3', + 'Y': u'\U0001D4B4', + 'Z': u'\U0001D4B5', + 'a': u'\U0001D4B6', + 'b': u'\U0001D4B7', + 'c': u'\U0001D4B8', + 'd': u'\U0001D4B9', + 'e': u'\u212F', + 'f': u'\U0001D4BB', + 'g': u'\u210A', + 'h': u'\U0001D4BD', + 'i': u'\U0001D4BE', + 'j': u'\U0001D4BF', + 'k': u'\U0001D4C0', + 'l': u'\U0001D4C1', + 'm': u'\U0001D4C2', + 'n': u'\U0001D4C3', + 'o': u'\u2134', # order of + 'p': u'\U0001D4C5', + 'q': u'\U0001D4C6', + 'r': u'\U0001D4C7', + 's': u'\U0001D4C8', + 't': u'\U0001D4C9', + 'u': u'\U0001D4CA', + 'v': u'\U0001D4CB', + 'w': u'\U0001D4CC', + 'x': u'\U0001D4CD', + 'y': u'\U0001D4CE', + 'z': u'\U0001D4CF', + } + +negatables = {'=': u'\u2260', + r'\in': u'\u2209', + r'\equiv': u'\u2262'} + +# LaTeX to MathML translation stuff: +class math(object): + """Base class for MathML elements.""" + + nchildren = 1000000 + """Required number of children""" + + def __init__(self, children=None, inline=None): + """math([children]) -> MathML element + + children can be one child or a list of children.""" + + self.children = [] + if children is not None: + if isinstance(children, list): + for child in children: + self.append(child) + else: + # Only one child: + self.append(children) + + if inline is not None: + self.inline = inline + + def __repr__(self): + if hasattr(self, 'children'): + return self.__class__.__name__ + '(%s)' % \ + ','.join([repr(child) for child in self.children]) + else: + return self.__class__.__name__ + + def full(self): + """Room for more children?""" + + return len(self.children) >= self.nchildren + + def append(self, child): + """append(child) -> element + + Appends child and returns self if self is not full or first + non-full parent.""" + + assert not self.full() + self.children.append(child) + child.parent = self + node = self + while node.full(): + node = node.parent + return node + + def delete_child(self): + """delete_child() -> child + + Delete last child and return it.""" + + child = self.children[-1] + del self.children[-1] + return child + + def close(self): + """close() -> parent + + Close element and return first non-full element.""" + + parent = self.parent + while parent.full(): + parent = parent.parent + return parent + + def xml(self): + """xml() -> xml-string""" + + return self.xml_start() + self.xml_body() + self.xml_end() + + def xml_start(self): + if not hasattr(self, 'inline'): + return ['<%s>' % self.__class__.__name__] + xmlns = 'http://www.w3.org/1998/Math/MathML' + if self.inline: + return ['<math xmlns="%s">' % xmlns] + else: + return ['<math xmlns="%s" mode="display">' % xmlns] + + def xml_end(self): + return ['</%s>' % self.__class__.__name__] + + def xml_body(self): + xml = [] + for child in self.children: + xml.extend(child.xml()) + return xml + +class mrow(math): + def xml_start(self): + return ['\n<%s>' % self.__class__.__name__] + +class mtable(math): + def xml_start(self): + return ['\n<%s>' % self.__class__.__name__] + +class mtr(mrow): pass +class mtd(mrow): pass + +class mx(math): + """Base class for mo, mi, and mn""" + + nchildren = 0 + def __init__(self, data): + self.data = data + + def xml_body(self): + return [self.data] + +class mo(mx): + translation = {'<': '<', '>': '>'} + def xml_body(self): + return [self.translation.get(self.data, self.data)] + +class mi(mx): pass +class mn(mx): pass + +class msub(math): + nchildren = 2 + +class msup(math): + nchildren = 2 + +class msqrt(math): + nchildren = 1 + +class mroot(math): + nchildren = 2 + +class mfrac(math): + nchildren = 2 + +class msubsup(math): + nchildren = 3 + def __init__(self, children=None, reversed=False): + self.reversed = reversed + math.__init__(self, children) + + def xml(self): + if self.reversed: +## self.children[1:3] = self.children[2:0:-1] + self.children[1:3] = [self.children[2], self.children[1]] + self.reversed = False + return math.xml(self) + +class mfenced(math): + translation = {'\\{': '{', '\\langle': u'\u2329', + '\\}': '}', '\\rangle': u'\u232A', + '.': ''} + def __init__(self, par): + self.openpar = par + math.__init__(self) + + def xml_start(self): + open = self.translation.get(self.openpar, self.openpar) + close = self.translation.get(self.closepar, self.closepar) + return ['<mfenced open="%s" close="%s">' % (open, close)] + +class mspace(math): + nchildren = 0 + +class mstyle(math): + def __init__(self, children=None, nchildren=None, **kwargs): + if nchildren is not None: + self.nchildren = nchildren + math.__init__(self, children) + self.attrs = kwargs + + def xml_start(self): + return ['<mstyle '] + ['%s="%s"' % item + for item in self.attrs.items()] + ['>'] + +class mover(math): + nchildren = 2 + def __init__(self, children=None, reversed=False): + self.reversed = reversed + math.__init__(self, children) + + def xml(self): + if self.reversed: + self.children.reverse() + self.reversed = False + return math.xml(self) + +class munder(math): + nchildren = 2 + +class munderover(math): + nchildren = 3 + def __init__(self, children=None): + math.__init__(self, children) + +class mtext(math): + nchildren = 0 + def __init__(self, text): + self.text = text + + def xml_body(self): + return [self.text] + +def parse_latex_math(string, inline=True): + """parse_latex_math(string [,inline]) -> MathML-tree + + Returns a MathML-tree parsed from string. inline=True is for + inline math and inline=False is for displayed math. + + tree is the whole tree and node is the current element.""" + + # Normalize white-space: + string = ' '.join(string.split()) + + if inline: + node = mrow() + tree = math(node, inline=True) + else: + node = mtd() + tree = math(mtable(mtr(node)), inline=False) + + while len(string) > 0: + n = len(string) + c = string[0] + skip = 1 # number of characters consumed + if n > 1: + c2 = string[1] + else: + c2 = '' + if c == ' ': + pass + elif c == '\\': + if c2 in '{}': + node = node.append(mo(c2)) + skip = 2 + elif c2 == ' ': + node = node.append(mspace()) + skip = 2 + elif c2 == ',': # TODO: small space + node = node.append(mspace()) + skip = 2 + elif c2.isalpha(): + # We have a LaTeX-name: + i = 2 + while i < n and string[i].isalpha(): + i += 1 + name = string[1:i] + node, skip = handle_keyword(name, node, string[i:]) + skip += i + elif c2 == '\\': + # End of a row: + entry = mtd() + row = mtr(entry) + node.close().close().append(row) + node = entry + skip = 2 + else: + raise SyntaxError(u'Syntax error: "%s%s"' % (c, c2)) + elif c.isalpha(): + node = node.append(mi(c)) + elif c.isdigit(): + node = node.append(mn(c)) + elif c in "+-*/=()[]|<>,.!?':;@": + node = node.append(mo(c)) + elif c == '_': + child = node.delete_child() + if isinstance(child, msup): + sub = msubsup(child.children, reversed=True) + elif isinstance(child, mo) and child.data in sumintprod: + sub = munder(child) + else: + sub = msub(child) + node.append(sub) + node = sub + elif c == '^': + child = node.delete_child() + if isinstance(child, msub): + sup = msubsup(child.children) + elif isinstance(child, mo) and child.data in sumintprod: + sup = mover(child) + elif (isinstance(child, munder) and + child.children[0].data in sumintprod): + sup = munderover(child.children) + else: + sup = msup(child) + node.append(sup) + node = sup + elif c == '{': + row = mrow() + node.append(row) + node = row + elif c == '}': + node = node.close() + elif c == '&': + entry = mtd() + node.close().append(entry) + node = entry + else: + raise SyntaxError(u'Illegal character: "%s"' % c) + string = string[skip:] + return tree + + +def handle_keyword(name, node, string): + skip = 0 + if len(string) > 0 and string[0] == ' ': + string = string[1:] + skip = 1 + if name == 'begin': + if not string.startswith('{matrix}'): + raise SyntaxError(u'Environment not supported! ' + u'Supported environment: "matrix".') + skip += 8 + entry = mtd() + table = mtable(mtr(entry)) + node.append(table) + node = entry + elif name == 'end': + if not string.startswith('{matrix}'): + raise SyntaxError(u'Expected "\\end{matrix}"!') + skip += 8 + node = node.close().close().close() + elif name in ('text', 'mathrm'): + if string[0] != '{': + raise SyntaxError(u'Expected "\\text{...}"!') + i = string.find('}') + if i == -1: + raise SyntaxError(u'Expected "\\text{...}"!') + node = node.append(mtext(string[1:i])) + skip += i + 1 + elif name == 'sqrt': + sqrt = msqrt() + node.append(sqrt) + node = sqrt + elif name == 'frac': + frac = mfrac() + node.append(frac) + node = frac + elif name == 'left': + for par in ['(', '[', '|', '\\{', '\\langle', '.']: + if string.startswith(par): + break + else: + raise SyntaxError(u'Missing left-brace!') + fenced = mfenced(par) + node.append(fenced) + row = mrow() + fenced.append(row) + node = row + skip += len(par) + elif name == 'right': + for par in [')', ']', '|', '\\}', '\\rangle', '.']: + if string.startswith(par): + break + else: + raise SyntaxError(u'Missing right-brace!') + node = node.close() + node.closepar = par + node = node.close() + skip += len(par) + elif name == 'not': + for operator in negatables: + if string.startswith(operator): + break + else: + raise SyntaxError(u'Expected something to negate: "\\not ..."!') + node = node.append(mo(negatables[operator])) + skip += len(operator) + elif name == 'mathbf': + style = mstyle(nchildren=1, fontweight='bold') + node.append(style) + node = style + elif name == 'mathbb': + if string[0] != '{' or not string[1].isupper() or string[2] != '}': + raise SyntaxError(u'Expected something like "\\mathbb{A}"!') + node = node.append(mi(mathbb[string[1]])) + skip += 3 + elif name in ('mathscr', 'mathcal'): + if string[0] != '{' or string[2] != '}': + raise SyntaxError(u'Expected something like "\\mathscr{A}"!') + node = node.append(mi(mathscr[string[1]])) + skip += 3 + elif name == 'colon': # "normal" colon, not binary operator + node = node.append(mo(':')) # TODO: add ``lspace="0pt"`` + elif name in Greek: # Greek capitals (upright in "TeX style") + node = node.append(mo(Greek[name])) + # TODO: "ISO style" sets them italic. Could we use a class argument + # to enable styling via CSS? + elif name in letters: + node = node.append(mi(letters[name])) + elif name in special: + node = node.append(mo(special[name])) + elif name in functions: + node = node.append(mo(name)) + elif name in over: + ovr = mover(mo(over[name]), reversed=True) + node.append(ovr) + node = ovr + else: + raise SyntaxError(u'Unknown LaTeX command: ' + name) + + return node, skip + +def tex2mathml(tex_math, inline=True): + """Return string with MathML code corresponding to `tex_math`. + + `inline`=True is for inline math and `inline`=False for displayed math. + """ + + mathml_tree = parse_latex_math(tex_math, inline=inline) + return ''.join(mathml_tree.xml())