comparison env/lib/python3.7/site-packages/docutils/utils/math/latex2mathml.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # :Id: $Id: latex2mathml.py 8366 2019-08-27 12:09:19Z milde $
5 # :Copyright: © 2010 Günter Milde.
6 # Based on rst2mathml.py from the latex_math sandbox project
7 # © 2005 Jens Jørgen Mortensen
8 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
9 #
10 # Copying and distribution of this file, with or without modification,
11 # are permitted in any medium without royalty provided the copyright
12 # notice and this notice are preserved.
13 # This file is offered as-is, without any warranty.
14 #
15 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
16
17
18 """Convert LaTex math code into presentational MathML"""
19
20 # Based on the `latex_math` sandbox project by Jens Jørgen Mortensen
21
22 import docutils.utils.math.tex2unichar as tex2unichar
23
24 # TeX spacing combining
25 over = {'acute': u'\u00B4', # u'\u0301',
26 'bar': u'\u00AF', # u'\u0304',
27 'breve': u'\u02D8', # u'\u0306',
28 'check': u'\u02C7', # u'\u030C',
29 'dot': u'\u02D9', # u'\u0307',
30 'ddot': u'\u00A8', # u'\u0308',
31 'dddot': u'\u20DB',
32 'grave': u'`', # u'\u0300',
33 'hat': u'^', # u'\u0302',
34 'mathring': u'\u02DA', # u'\u030A',
35 'overleftrightarrow': u'\u20e1',
36 # 'overline': # u'\u0305',
37 'tilde': u'\u02DC', # u'\u0303',
38 'vec': u'\u20D7'}
39
40 Greek = { # Capital Greek letters: (upright in TeX style)
41 'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3',
42 'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398',
43 'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9',
44 'Gamma':u'\u0393', 'Lambda':u'\u039b'}
45
46 letters = tex2unichar.mathalpha
47
48 special = tex2unichar.mathbin # Binary symbols
49 special.update(tex2unichar.mathrel) # Relation symbols, arrow symbols
50 special.update(tex2unichar.mathord) # Miscellaneous symbols
51 special.update(tex2unichar.mathop) # Variable-sized symbols
52 special.update(tex2unichar.mathopen) # Braces
53 special.update(tex2unichar.mathclose) # Braces
54 special.update(tex2unichar.mathfence)
55
56 sumintprod = ''.join([special[symbol] for symbol in
57 ['sum', 'int', 'oint', 'prod']])
58
59 functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh',
60 'cot', 'coth', 'csc', 'deg', 'det', 'dim',
61 'exp', 'gcd', 'hom', 'inf', 'ker', 'lg',
62 'lim', 'liminf', 'limsup', 'ln', 'log', 'max',
63 'min', 'Pr', 'sec', 'sin', 'sinh', 'sup',
64 'tan', 'tanh',
65 'injlim', 'varinjlim', 'varlimsup',
66 'projlim', 'varliminf', 'varprojlim']
67
68
69 mathbb = {
70 'A': u'\U0001D538',
71 'B': u'\U0001D539',
72 'C': u'\u2102',
73 'D': u'\U0001D53B',
74 'E': u'\U0001D53C',
75 'F': u'\U0001D53D',
76 'G': u'\U0001D53E',
77 'H': u'\u210D',
78 'I': u'\U0001D540',
79 'J': u'\U0001D541',
80 'K': u'\U0001D542',
81 'L': u'\U0001D543',
82 'M': u'\U0001D544',
83 'N': u'\u2115',
84 'O': u'\U0001D546',
85 'P': u'\u2119',
86 'Q': u'\u211A',
87 'R': u'\u211D',
88 'S': u'\U0001D54A',
89 'T': u'\U0001D54B',
90 'U': u'\U0001D54C',
91 'V': u'\U0001D54D',
92 'W': u'\U0001D54E',
93 'X': u'\U0001D54F',
94 'Y': u'\U0001D550',
95 'Z': u'\u2124',
96 }
97
98 mathscr = {
99 'A': u'\U0001D49C',
100 'B': u'\u212C', # bernoulli function
101 'C': u'\U0001D49E',
102 'D': u'\U0001D49F',
103 'E': u'\u2130',
104 'F': u'\u2131',
105 'G': u'\U0001D4A2',
106 'H': u'\u210B', # hamiltonian
107 'I': u'\u2110',
108 'J': u'\U0001D4A5',
109 'K': u'\U0001D4A6',
110 'L': u'\u2112', # lagrangian
111 'M': u'\u2133', # physics m-matrix
112 'N': u'\U0001D4A9',
113 'O': u'\U0001D4AA',
114 'P': u'\U0001D4AB',
115 'Q': u'\U0001D4AC',
116 'R': u'\u211B',
117 'S': u'\U0001D4AE',
118 'T': u'\U0001D4AF',
119 'U': u'\U0001D4B0',
120 'V': u'\U0001D4B1',
121 'W': u'\U0001D4B2',
122 'X': u'\U0001D4B3',
123 'Y': u'\U0001D4B4',
124 'Z': u'\U0001D4B5',
125 'a': u'\U0001D4B6',
126 'b': u'\U0001D4B7',
127 'c': u'\U0001D4B8',
128 'd': u'\U0001D4B9',
129 'e': u'\u212F',
130 'f': u'\U0001D4BB',
131 'g': u'\u210A',
132 'h': u'\U0001D4BD',
133 'i': u'\U0001D4BE',
134 'j': u'\U0001D4BF',
135 'k': u'\U0001D4C0',
136 'l': u'\U0001D4C1',
137 'm': u'\U0001D4C2',
138 'n': u'\U0001D4C3',
139 'o': u'\u2134', # order of
140 'p': u'\U0001D4C5',
141 'q': u'\U0001D4C6',
142 'r': u'\U0001D4C7',
143 's': u'\U0001D4C8',
144 't': u'\U0001D4C9',
145 'u': u'\U0001D4CA',
146 'v': u'\U0001D4CB',
147 'w': u'\U0001D4CC',
148 'x': u'\U0001D4CD',
149 'y': u'\U0001D4CE',
150 'z': u'\U0001D4CF',
151 }
152
153 negatables = {'=': u'\u2260',
154 r'\in': u'\u2209',
155 r'\equiv': u'\u2262'}
156
157 # LaTeX to MathML translation stuff:
158 class math(object):
159 """Base class for MathML elements."""
160
161 nchildren = 1000000
162 """Required number of children"""
163
164 def __init__(self, children=None, inline=None):
165 """math([children]) -> MathML element
166
167 children can be one child or a list of children."""
168
169 self.children = []
170 if children is not None:
171 if isinstance(children, list):
172 for child in children:
173 self.append(child)
174 else:
175 # Only one child:
176 self.append(children)
177
178 if inline is not None:
179 self.inline = inline
180
181 def __repr__(self):
182 if hasattr(self, 'children'):
183 return self.__class__.__name__ + '(%s)' % \
184 ','.join([repr(child) for child in self.children])
185 else:
186 return self.__class__.__name__
187
188 def full(self):
189 """Room for more children?"""
190
191 return len(self.children) >= self.nchildren
192
193 def append(self, child):
194 """append(child) -> element
195
196 Appends child and returns self if self is not full or first
197 non-full parent."""
198
199 assert not self.full()
200 self.children.append(child)
201 child.parent = self
202 node = self
203 while node.full():
204 node = node.parent
205 return node
206
207 def delete_child(self):
208 """delete_child() -> child
209
210 Delete last child and return it."""
211
212 child = self.children[-1]
213 del self.children[-1]
214 return child
215
216 def close(self):
217 """close() -> parent
218
219 Close element and return first non-full element."""
220
221 parent = self.parent
222 while parent.full():
223 parent = parent.parent
224 return parent
225
226 def xml(self):
227 """xml() -> xml-string"""
228
229 return self.xml_start() + self.xml_body() + self.xml_end()
230
231 def xml_start(self):
232 if not hasattr(self, 'inline'):
233 return ['<%s>' % self.__class__.__name__]
234 xmlns = 'http://www.w3.org/1998/Math/MathML'
235 if self.inline:
236 return ['<math xmlns="%s">' % xmlns]
237 else:
238 return ['<math xmlns="%s" mode="display">' % xmlns]
239
240 def xml_end(self):
241 return ['</%s>' % self.__class__.__name__]
242
243 def xml_body(self):
244 xml = []
245 for child in self.children:
246 xml.extend(child.xml())
247 return xml
248
249 class mrow(math):
250 def xml_start(self):
251 return ['\n<%s>' % self.__class__.__name__]
252
253 class mtable(math):
254 def xml_start(self):
255 return ['\n<%s>' % self.__class__.__name__]
256
257 class mtr(mrow): pass
258 class mtd(mrow): pass
259
260 class mx(math):
261 """Base class for mo, mi, and mn"""
262
263 nchildren = 0
264 def __init__(self, data):
265 self.data = data
266
267 def xml_body(self):
268 return [self.data]
269
270 class mo(mx):
271 translation = {'<': '&lt;', '>': '&gt;'}
272 def xml_body(self):
273 return [self.translation.get(self.data, self.data)]
274
275 class mi(mx): pass
276 class mn(mx): pass
277
278 class msub(math):
279 nchildren = 2
280
281 class msup(math):
282 nchildren = 2
283
284 class msqrt(math):
285 nchildren = 1
286
287 class mroot(math):
288 nchildren = 2
289
290 class mfrac(math):
291 nchildren = 2
292
293 class msubsup(math):
294 nchildren = 3
295 def __init__(self, children=None, reversed=False):
296 self.reversed = reversed
297 math.__init__(self, children)
298
299 def xml(self):
300 if self.reversed:
301 ## self.children[1:3] = self.children[2:0:-1]
302 self.children[1:3] = [self.children[2], self.children[1]]
303 self.reversed = False
304 return math.xml(self)
305
306 class mfenced(math):
307 translation = {'\\{': '{', '\\langle': u'\u2329',
308 '\\}': '}', '\\rangle': u'\u232A',
309 '.': ''}
310 def __init__(self, par):
311 self.openpar = par
312 math.__init__(self)
313
314 def xml_start(self):
315 open = self.translation.get(self.openpar, self.openpar)
316 close = self.translation.get(self.closepar, self.closepar)
317 return ['<mfenced open="%s" close="%s">' % (open, close)]
318
319 class mspace(math):
320 nchildren = 0
321
322 class mstyle(math):
323 def __init__(self, children=None, nchildren=None, **kwargs):
324 if nchildren is not None:
325 self.nchildren = nchildren
326 math.__init__(self, children)
327 self.attrs = kwargs
328
329 def xml_start(self):
330 return ['<mstyle '] + ['%s="%s"' % item
331 for item in self.attrs.items()] + ['>']
332
333 class mover(math):
334 nchildren = 2
335 def __init__(self, children=None, reversed=False):
336 self.reversed = reversed
337 math.__init__(self, children)
338
339 def xml(self):
340 if self.reversed:
341 self.children.reverse()
342 self.reversed = False
343 return math.xml(self)
344
345 class munder(math):
346 nchildren = 2
347
348 class munderover(math):
349 nchildren = 3
350 def __init__(self, children=None):
351 math.__init__(self, children)
352
353 class mtext(math):
354 nchildren = 0
355 def __init__(self, text):
356 self.text = text
357
358 def xml_body(self):
359 return [self.text]
360
361 def parse_latex_math(string, inline=True):
362 """parse_latex_math(string [,inline]) -> MathML-tree
363
364 Returns a MathML-tree parsed from string. inline=True is for
365 inline math and inline=False is for displayed math.
366
367 tree is the whole tree and node is the current element."""
368
369 # Normalize white-space:
370 string = ' '.join(string.split())
371
372 if inline:
373 node = mrow()
374 tree = math(node, inline=True)
375 else:
376 node = mtd()
377 tree = math(mtable(mtr(node)), inline=False)
378
379 while len(string) > 0:
380 n = len(string)
381 c = string[0]
382 skip = 1 # number of characters consumed
383 if n > 1:
384 c2 = string[1]
385 else:
386 c2 = ''
387 if c == ' ':
388 pass
389 elif c == '\\':
390 if c2 in '{}':
391 node = node.append(mo(c2))
392 skip = 2
393 elif c2 == ' ':
394 node = node.append(mspace())
395 skip = 2
396 elif c2 == ',': # TODO: small space
397 node = node.append(mspace())
398 skip = 2
399 elif c2.isalpha():
400 # We have a LaTeX-name:
401 i = 2
402 while i < n and string[i].isalpha():
403 i += 1
404 name = string[1:i]
405 node, skip = handle_keyword(name, node, string[i:])
406 skip += i
407 elif c2 == '\\':
408 # End of a row:
409 entry = mtd()
410 row = mtr(entry)
411 node.close().close().append(row)
412 node = entry
413 skip = 2
414 else:
415 raise SyntaxError(u'Syntax error: "%s%s"' % (c, c2))
416 elif c.isalpha():
417 node = node.append(mi(c))
418 elif c.isdigit():
419 node = node.append(mn(c))
420 elif c in "+-*/=()[]|<>,.!?':;@":
421 node = node.append(mo(c))
422 elif c == '_':
423 child = node.delete_child()
424 if isinstance(child, msup):
425 sub = msubsup(child.children, reversed=True)
426 elif isinstance(child, mo) and child.data in sumintprod:
427 sub = munder(child)
428 else:
429 sub = msub(child)
430 node.append(sub)
431 node = sub
432 elif c == '^':
433 child = node.delete_child()
434 if isinstance(child, msub):
435 sup = msubsup(child.children)
436 elif isinstance(child, mo) and child.data in sumintprod:
437 sup = mover(child)
438 elif (isinstance(child, munder) and
439 child.children[0].data in sumintprod):
440 sup = munderover(child.children)
441 else:
442 sup = msup(child)
443 node.append(sup)
444 node = sup
445 elif c == '{':
446 row = mrow()
447 node.append(row)
448 node = row
449 elif c == '}':
450 node = node.close()
451 elif c == '&':
452 entry = mtd()
453 node.close().append(entry)
454 node = entry
455 else:
456 raise SyntaxError(u'Illegal character: "%s"' % c)
457 string = string[skip:]
458 return tree
459
460
461 def handle_keyword(name, node, string):
462 skip = 0
463 if len(string) > 0 and string[0] == ' ':
464 string = string[1:]
465 skip = 1
466 if name == 'begin':
467 if not string.startswith('{matrix}'):
468 raise SyntaxError(u'Environment not supported! '
469 u'Supported environment: "matrix".')
470 skip += 8
471 entry = mtd()
472 table = mtable(mtr(entry))
473 node.append(table)
474 node = entry
475 elif name == 'end':
476 if not string.startswith('{matrix}'):
477 raise SyntaxError(u'Expected "\\end{matrix}"!')
478 skip += 8
479 node = node.close().close().close()
480 elif name in ('text', 'mathrm'):
481 if string[0] != '{':
482 raise SyntaxError(u'Expected "\\text{...}"!')
483 i = string.find('}')
484 if i == -1:
485 raise SyntaxError(u'Expected "\\text{...}"!')
486 node = node.append(mtext(string[1:i]))
487 skip += i + 1
488 elif name == 'sqrt':
489 sqrt = msqrt()
490 node.append(sqrt)
491 node = sqrt
492 elif name == 'frac':
493 frac = mfrac()
494 node.append(frac)
495 node = frac
496 elif name == 'left':
497 for par in ['(', '[', '|', '\\{', '\\langle', '.']:
498 if string.startswith(par):
499 break
500 else:
501 raise SyntaxError(u'Missing left-brace!')
502 fenced = mfenced(par)
503 node.append(fenced)
504 row = mrow()
505 fenced.append(row)
506 node = row
507 skip += len(par)
508 elif name == 'right':
509 for par in [')', ']', '|', '\\}', '\\rangle', '.']:
510 if string.startswith(par):
511 break
512 else:
513 raise SyntaxError(u'Missing right-brace!')
514 node = node.close()
515 node.closepar = par
516 node = node.close()
517 skip += len(par)
518 elif name == 'not':
519 for operator in negatables:
520 if string.startswith(operator):
521 break
522 else:
523 raise SyntaxError(u'Expected something to negate: "\\not ..."!')
524 node = node.append(mo(negatables[operator]))
525 skip += len(operator)
526 elif name == 'mathbf':
527 style = mstyle(nchildren=1, fontweight='bold')
528 node.append(style)
529 node = style
530 elif name == 'mathbb':
531 if string[0] != '{' or not string[1].isupper() or string[2] != '}':
532 raise SyntaxError(u'Expected something like "\\mathbb{A}"!')
533 node = node.append(mi(mathbb[string[1]]))
534 skip += 3
535 elif name in ('mathscr', 'mathcal'):
536 if string[0] != '{' or string[2] != '}':
537 raise SyntaxError(u'Expected something like "\\mathscr{A}"!')
538 node = node.append(mi(mathscr[string[1]]))
539 skip += 3
540 elif name == 'colon': # "normal" colon, not binary operator
541 node = node.append(mo(':')) # TODO: add ``lspace="0pt"``
542 elif name in Greek: # Greek capitals (upright in "TeX style")
543 node = node.append(mo(Greek[name]))
544 # TODO: "ISO style" sets them italic. Could we use a class argument
545 # to enable styling via CSS?
546 elif name in letters:
547 node = node.append(mi(letters[name]))
548 elif name in special:
549 node = node.append(mo(special[name]))
550 elif name in functions:
551 node = node.append(mo(name))
552 elif name in over:
553 ovr = mover(mo(over[name]), reversed=True)
554 node.append(ovr)
555 node = ovr
556 else:
557 raise SyntaxError(u'Unknown LaTeX command: ' + name)
558
559 return node, skip
560
561 def tex2mathml(tex_math, inline=True):
562 """Return string with MathML code corresponding to `tex_math`.
563
564 `inline`=True is for inline math and `inline`=False for displayed math.
565 """
566
567 mathml_tree = parse_latex_math(tex_math, inline=inline)
568 return ''.join(mathml_tree.xml())