Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/utils/math/latex2mathml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # :Id: $Id: latex2mathml.py 8366 2019-08-27 12:09:19Z milde $ | |
5 # :Copyright: © 2010 Günter Milde. | |
6 # Based on rst2mathml.py from the latex_math sandbox project | |
7 # © 2005 Jens Jørgen Mortensen | |
8 # :License: Released under the terms of the `2-Clause BSD license`_, in short: | |
9 # | |
10 # Copying and distribution of this file, with or without modification, | |
11 # are permitted in any medium without royalty provided the copyright | |
12 # notice and this notice are preserved. | |
13 # This file is offered as-is, without any warranty. | |
14 # | |
15 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause | |
16 | |
17 | |
18 """Convert LaTex math code into presentational MathML""" | |
19 | |
20 # Based on the `latex_math` sandbox project by Jens Jørgen Mortensen | |
21 | |
22 import docutils.utils.math.tex2unichar as tex2unichar | |
23 | |
24 # TeX spacing combining | |
25 over = {'acute': u'\u00B4', # u'\u0301', | |
26 'bar': u'\u00AF', # u'\u0304', | |
27 'breve': u'\u02D8', # u'\u0306', | |
28 'check': u'\u02C7', # u'\u030C', | |
29 'dot': u'\u02D9', # u'\u0307', | |
30 'ddot': u'\u00A8', # u'\u0308', | |
31 'dddot': u'\u20DB', | |
32 'grave': u'`', # u'\u0300', | |
33 'hat': u'^', # u'\u0302', | |
34 'mathring': u'\u02DA', # u'\u030A', | |
35 'overleftrightarrow': u'\u20e1', | |
36 # 'overline': # u'\u0305', | |
37 'tilde': u'\u02DC', # u'\u0303', | |
38 'vec': u'\u20D7'} | |
39 | |
40 Greek = { # Capital Greek letters: (upright in TeX style) | |
41 'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3', | |
42 'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398', | |
43 'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9', | |
44 'Gamma':u'\u0393', 'Lambda':u'\u039b'} | |
45 | |
46 letters = tex2unichar.mathalpha | |
47 | |
48 special = tex2unichar.mathbin # Binary symbols | |
49 special.update(tex2unichar.mathrel) # Relation symbols, arrow symbols | |
50 special.update(tex2unichar.mathord) # Miscellaneous symbols | |
51 special.update(tex2unichar.mathop) # Variable-sized symbols | |
52 special.update(tex2unichar.mathopen) # Braces | |
53 special.update(tex2unichar.mathclose) # Braces | |
54 special.update(tex2unichar.mathfence) | |
55 | |
56 sumintprod = ''.join([special[symbol] for symbol in | |
57 ['sum', 'int', 'oint', 'prod']]) | |
58 | |
59 functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh', | |
60 'cot', 'coth', 'csc', 'deg', 'det', 'dim', | |
61 'exp', 'gcd', 'hom', 'inf', 'ker', 'lg', | |
62 'lim', 'liminf', 'limsup', 'ln', 'log', 'max', | |
63 'min', 'Pr', 'sec', 'sin', 'sinh', 'sup', | |
64 'tan', 'tanh', | |
65 'injlim', 'varinjlim', 'varlimsup', | |
66 'projlim', 'varliminf', 'varprojlim'] | |
67 | |
68 | |
69 mathbb = { | |
70 'A': u'\U0001D538', | |
71 'B': u'\U0001D539', | |
72 'C': u'\u2102', | |
73 'D': u'\U0001D53B', | |
74 'E': u'\U0001D53C', | |
75 'F': u'\U0001D53D', | |
76 'G': u'\U0001D53E', | |
77 'H': u'\u210D', | |
78 'I': u'\U0001D540', | |
79 'J': u'\U0001D541', | |
80 'K': u'\U0001D542', | |
81 'L': u'\U0001D543', | |
82 'M': u'\U0001D544', | |
83 'N': u'\u2115', | |
84 'O': u'\U0001D546', | |
85 'P': u'\u2119', | |
86 'Q': u'\u211A', | |
87 'R': u'\u211D', | |
88 'S': u'\U0001D54A', | |
89 'T': u'\U0001D54B', | |
90 'U': u'\U0001D54C', | |
91 'V': u'\U0001D54D', | |
92 'W': u'\U0001D54E', | |
93 'X': u'\U0001D54F', | |
94 'Y': u'\U0001D550', | |
95 'Z': u'\u2124', | |
96 } | |
97 | |
98 mathscr = { | |
99 'A': u'\U0001D49C', | |
100 'B': u'\u212C', # bernoulli function | |
101 'C': u'\U0001D49E', | |
102 'D': u'\U0001D49F', | |
103 'E': u'\u2130', | |
104 'F': u'\u2131', | |
105 'G': u'\U0001D4A2', | |
106 'H': u'\u210B', # hamiltonian | |
107 'I': u'\u2110', | |
108 'J': u'\U0001D4A5', | |
109 'K': u'\U0001D4A6', | |
110 'L': u'\u2112', # lagrangian | |
111 'M': u'\u2133', # physics m-matrix | |
112 'N': u'\U0001D4A9', | |
113 'O': u'\U0001D4AA', | |
114 'P': u'\U0001D4AB', | |
115 'Q': u'\U0001D4AC', | |
116 'R': u'\u211B', | |
117 'S': u'\U0001D4AE', | |
118 'T': u'\U0001D4AF', | |
119 'U': u'\U0001D4B0', | |
120 'V': u'\U0001D4B1', | |
121 'W': u'\U0001D4B2', | |
122 'X': u'\U0001D4B3', | |
123 'Y': u'\U0001D4B4', | |
124 'Z': u'\U0001D4B5', | |
125 'a': u'\U0001D4B6', | |
126 'b': u'\U0001D4B7', | |
127 'c': u'\U0001D4B8', | |
128 'd': u'\U0001D4B9', | |
129 'e': u'\u212F', | |
130 'f': u'\U0001D4BB', | |
131 'g': u'\u210A', | |
132 'h': u'\U0001D4BD', | |
133 'i': u'\U0001D4BE', | |
134 'j': u'\U0001D4BF', | |
135 'k': u'\U0001D4C0', | |
136 'l': u'\U0001D4C1', | |
137 'm': u'\U0001D4C2', | |
138 'n': u'\U0001D4C3', | |
139 'o': u'\u2134', # order of | |
140 'p': u'\U0001D4C5', | |
141 'q': u'\U0001D4C6', | |
142 'r': u'\U0001D4C7', | |
143 's': u'\U0001D4C8', | |
144 't': u'\U0001D4C9', | |
145 'u': u'\U0001D4CA', | |
146 'v': u'\U0001D4CB', | |
147 'w': u'\U0001D4CC', | |
148 'x': u'\U0001D4CD', | |
149 'y': u'\U0001D4CE', | |
150 'z': u'\U0001D4CF', | |
151 } | |
152 | |
153 negatables = {'=': u'\u2260', | |
154 r'\in': u'\u2209', | |
155 r'\equiv': u'\u2262'} | |
156 | |
157 # LaTeX to MathML translation stuff: | |
158 class math(object): | |
159 """Base class for MathML elements.""" | |
160 | |
161 nchildren = 1000000 | |
162 """Required number of children""" | |
163 | |
164 def __init__(self, children=None, inline=None): | |
165 """math([children]) -> MathML element | |
166 | |
167 children can be one child or a list of children.""" | |
168 | |
169 self.children = [] | |
170 if children is not None: | |
171 if isinstance(children, list): | |
172 for child in children: | |
173 self.append(child) | |
174 else: | |
175 # Only one child: | |
176 self.append(children) | |
177 | |
178 if inline is not None: | |
179 self.inline = inline | |
180 | |
181 def __repr__(self): | |
182 if hasattr(self, 'children'): | |
183 return self.__class__.__name__ + '(%s)' % \ | |
184 ','.join([repr(child) for child in self.children]) | |
185 else: | |
186 return self.__class__.__name__ | |
187 | |
188 def full(self): | |
189 """Room for more children?""" | |
190 | |
191 return len(self.children) >= self.nchildren | |
192 | |
193 def append(self, child): | |
194 """append(child) -> element | |
195 | |
196 Appends child and returns self if self is not full or first | |
197 non-full parent.""" | |
198 | |
199 assert not self.full() | |
200 self.children.append(child) | |
201 child.parent = self | |
202 node = self | |
203 while node.full(): | |
204 node = node.parent | |
205 return node | |
206 | |
207 def delete_child(self): | |
208 """delete_child() -> child | |
209 | |
210 Delete last child and return it.""" | |
211 | |
212 child = self.children[-1] | |
213 del self.children[-1] | |
214 return child | |
215 | |
216 def close(self): | |
217 """close() -> parent | |
218 | |
219 Close element and return first non-full element.""" | |
220 | |
221 parent = self.parent | |
222 while parent.full(): | |
223 parent = parent.parent | |
224 return parent | |
225 | |
226 def xml(self): | |
227 """xml() -> xml-string""" | |
228 | |
229 return self.xml_start() + self.xml_body() + self.xml_end() | |
230 | |
231 def xml_start(self): | |
232 if not hasattr(self, 'inline'): | |
233 return ['<%s>' % self.__class__.__name__] | |
234 xmlns = 'http://www.w3.org/1998/Math/MathML' | |
235 if self.inline: | |
236 return ['<math xmlns="%s">' % xmlns] | |
237 else: | |
238 return ['<math xmlns="%s" mode="display">' % xmlns] | |
239 | |
240 def xml_end(self): | |
241 return ['</%s>' % self.__class__.__name__] | |
242 | |
243 def xml_body(self): | |
244 xml = [] | |
245 for child in self.children: | |
246 xml.extend(child.xml()) | |
247 return xml | |
248 | |
249 class mrow(math): | |
250 def xml_start(self): | |
251 return ['\n<%s>' % self.__class__.__name__] | |
252 | |
253 class mtable(math): | |
254 def xml_start(self): | |
255 return ['\n<%s>' % self.__class__.__name__] | |
256 | |
257 class mtr(mrow): pass | |
258 class mtd(mrow): pass | |
259 | |
260 class mx(math): | |
261 """Base class for mo, mi, and mn""" | |
262 | |
263 nchildren = 0 | |
264 def __init__(self, data): | |
265 self.data = data | |
266 | |
267 def xml_body(self): | |
268 return [self.data] | |
269 | |
270 class mo(mx): | |
271 translation = {'<': '<', '>': '>'} | |
272 def xml_body(self): | |
273 return [self.translation.get(self.data, self.data)] | |
274 | |
275 class mi(mx): pass | |
276 class mn(mx): pass | |
277 | |
278 class msub(math): | |
279 nchildren = 2 | |
280 | |
281 class msup(math): | |
282 nchildren = 2 | |
283 | |
284 class msqrt(math): | |
285 nchildren = 1 | |
286 | |
287 class mroot(math): | |
288 nchildren = 2 | |
289 | |
290 class mfrac(math): | |
291 nchildren = 2 | |
292 | |
293 class msubsup(math): | |
294 nchildren = 3 | |
295 def __init__(self, children=None, reversed=False): | |
296 self.reversed = reversed | |
297 math.__init__(self, children) | |
298 | |
299 def xml(self): | |
300 if self.reversed: | |
301 ## self.children[1:3] = self.children[2:0:-1] | |
302 self.children[1:3] = [self.children[2], self.children[1]] | |
303 self.reversed = False | |
304 return math.xml(self) | |
305 | |
306 class mfenced(math): | |
307 translation = {'\\{': '{', '\\langle': u'\u2329', | |
308 '\\}': '}', '\\rangle': u'\u232A', | |
309 '.': ''} | |
310 def __init__(self, par): | |
311 self.openpar = par | |
312 math.__init__(self) | |
313 | |
314 def xml_start(self): | |
315 open = self.translation.get(self.openpar, self.openpar) | |
316 close = self.translation.get(self.closepar, self.closepar) | |
317 return ['<mfenced open="%s" close="%s">' % (open, close)] | |
318 | |
319 class mspace(math): | |
320 nchildren = 0 | |
321 | |
322 class mstyle(math): | |
323 def __init__(self, children=None, nchildren=None, **kwargs): | |
324 if nchildren is not None: | |
325 self.nchildren = nchildren | |
326 math.__init__(self, children) | |
327 self.attrs = kwargs | |
328 | |
329 def xml_start(self): | |
330 return ['<mstyle '] + ['%s="%s"' % item | |
331 for item in self.attrs.items()] + ['>'] | |
332 | |
333 class mover(math): | |
334 nchildren = 2 | |
335 def __init__(self, children=None, reversed=False): | |
336 self.reversed = reversed | |
337 math.__init__(self, children) | |
338 | |
339 def xml(self): | |
340 if self.reversed: | |
341 self.children.reverse() | |
342 self.reversed = False | |
343 return math.xml(self) | |
344 | |
345 class munder(math): | |
346 nchildren = 2 | |
347 | |
348 class munderover(math): | |
349 nchildren = 3 | |
350 def __init__(self, children=None): | |
351 math.__init__(self, children) | |
352 | |
353 class mtext(math): | |
354 nchildren = 0 | |
355 def __init__(self, text): | |
356 self.text = text | |
357 | |
358 def xml_body(self): | |
359 return [self.text] | |
360 | |
361 def parse_latex_math(string, inline=True): | |
362 """parse_latex_math(string [,inline]) -> MathML-tree | |
363 | |
364 Returns a MathML-tree parsed from string. inline=True is for | |
365 inline math and inline=False is for displayed math. | |
366 | |
367 tree is the whole tree and node is the current element.""" | |
368 | |
369 # Normalize white-space: | |
370 string = ' '.join(string.split()) | |
371 | |
372 if inline: | |
373 node = mrow() | |
374 tree = math(node, inline=True) | |
375 else: | |
376 node = mtd() | |
377 tree = math(mtable(mtr(node)), inline=False) | |
378 | |
379 while len(string) > 0: | |
380 n = len(string) | |
381 c = string[0] | |
382 skip = 1 # number of characters consumed | |
383 if n > 1: | |
384 c2 = string[1] | |
385 else: | |
386 c2 = '' | |
387 if c == ' ': | |
388 pass | |
389 elif c == '\\': | |
390 if c2 in '{}': | |
391 node = node.append(mo(c2)) | |
392 skip = 2 | |
393 elif c2 == ' ': | |
394 node = node.append(mspace()) | |
395 skip = 2 | |
396 elif c2 == ',': # TODO: small space | |
397 node = node.append(mspace()) | |
398 skip = 2 | |
399 elif c2.isalpha(): | |
400 # We have a LaTeX-name: | |
401 i = 2 | |
402 while i < n and string[i].isalpha(): | |
403 i += 1 | |
404 name = string[1:i] | |
405 node, skip = handle_keyword(name, node, string[i:]) | |
406 skip += i | |
407 elif c2 == '\\': | |
408 # End of a row: | |
409 entry = mtd() | |
410 row = mtr(entry) | |
411 node.close().close().append(row) | |
412 node = entry | |
413 skip = 2 | |
414 else: | |
415 raise SyntaxError(u'Syntax error: "%s%s"' % (c, c2)) | |
416 elif c.isalpha(): | |
417 node = node.append(mi(c)) | |
418 elif c.isdigit(): | |
419 node = node.append(mn(c)) | |
420 elif c in "+-*/=()[]|<>,.!?':;@": | |
421 node = node.append(mo(c)) | |
422 elif c == '_': | |
423 child = node.delete_child() | |
424 if isinstance(child, msup): | |
425 sub = msubsup(child.children, reversed=True) | |
426 elif isinstance(child, mo) and child.data in sumintprod: | |
427 sub = munder(child) | |
428 else: | |
429 sub = msub(child) | |
430 node.append(sub) | |
431 node = sub | |
432 elif c == '^': | |
433 child = node.delete_child() | |
434 if isinstance(child, msub): | |
435 sup = msubsup(child.children) | |
436 elif isinstance(child, mo) and child.data in sumintprod: | |
437 sup = mover(child) | |
438 elif (isinstance(child, munder) and | |
439 child.children[0].data in sumintprod): | |
440 sup = munderover(child.children) | |
441 else: | |
442 sup = msup(child) | |
443 node.append(sup) | |
444 node = sup | |
445 elif c == '{': | |
446 row = mrow() | |
447 node.append(row) | |
448 node = row | |
449 elif c == '}': | |
450 node = node.close() | |
451 elif c == '&': | |
452 entry = mtd() | |
453 node.close().append(entry) | |
454 node = entry | |
455 else: | |
456 raise SyntaxError(u'Illegal character: "%s"' % c) | |
457 string = string[skip:] | |
458 return tree | |
459 | |
460 | |
461 def handle_keyword(name, node, string): | |
462 skip = 0 | |
463 if len(string) > 0 and string[0] == ' ': | |
464 string = string[1:] | |
465 skip = 1 | |
466 if name == 'begin': | |
467 if not string.startswith('{matrix}'): | |
468 raise SyntaxError(u'Environment not supported! ' | |
469 u'Supported environment: "matrix".') | |
470 skip += 8 | |
471 entry = mtd() | |
472 table = mtable(mtr(entry)) | |
473 node.append(table) | |
474 node = entry | |
475 elif name == 'end': | |
476 if not string.startswith('{matrix}'): | |
477 raise SyntaxError(u'Expected "\\end{matrix}"!') | |
478 skip += 8 | |
479 node = node.close().close().close() | |
480 elif name in ('text', 'mathrm'): | |
481 if string[0] != '{': | |
482 raise SyntaxError(u'Expected "\\text{...}"!') | |
483 i = string.find('}') | |
484 if i == -1: | |
485 raise SyntaxError(u'Expected "\\text{...}"!') | |
486 node = node.append(mtext(string[1:i])) | |
487 skip += i + 1 | |
488 elif name == 'sqrt': | |
489 sqrt = msqrt() | |
490 node.append(sqrt) | |
491 node = sqrt | |
492 elif name == 'frac': | |
493 frac = mfrac() | |
494 node.append(frac) | |
495 node = frac | |
496 elif name == 'left': | |
497 for par in ['(', '[', '|', '\\{', '\\langle', '.']: | |
498 if string.startswith(par): | |
499 break | |
500 else: | |
501 raise SyntaxError(u'Missing left-brace!') | |
502 fenced = mfenced(par) | |
503 node.append(fenced) | |
504 row = mrow() | |
505 fenced.append(row) | |
506 node = row | |
507 skip += len(par) | |
508 elif name == 'right': | |
509 for par in [')', ']', '|', '\\}', '\\rangle', '.']: | |
510 if string.startswith(par): | |
511 break | |
512 else: | |
513 raise SyntaxError(u'Missing right-brace!') | |
514 node = node.close() | |
515 node.closepar = par | |
516 node = node.close() | |
517 skip += len(par) | |
518 elif name == 'not': | |
519 for operator in negatables: | |
520 if string.startswith(operator): | |
521 break | |
522 else: | |
523 raise SyntaxError(u'Expected something to negate: "\\not ..."!') | |
524 node = node.append(mo(negatables[operator])) | |
525 skip += len(operator) | |
526 elif name == 'mathbf': | |
527 style = mstyle(nchildren=1, fontweight='bold') | |
528 node.append(style) | |
529 node = style | |
530 elif name == 'mathbb': | |
531 if string[0] != '{' or not string[1].isupper() or string[2] != '}': | |
532 raise SyntaxError(u'Expected something like "\\mathbb{A}"!') | |
533 node = node.append(mi(mathbb[string[1]])) | |
534 skip += 3 | |
535 elif name in ('mathscr', 'mathcal'): | |
536 if string[0] != '{' or string[2] != '}': | |
537 raise SyntaxError(u'Expected something like "\\mathscr{A}"!') | |
538 node = node.append(mi(mathscr[string[1]])) | |
539 skip += 3 | |
540 elif name == 'colon': # "normal" colon, not binary operator | |
541 node = node.append(mo(':')) # TODO: add ``lspace="0pt"`` | |
542 elif name in Greek: # Greek capitals (upright in "TeX style") | |
543 node = node.append(mo(Greek[name])) | |
544 # TODO: "ISO style" sets them italic. Could we use a class argument | |
545 # to enable styling via CSS? | |
546 elif name in letters: | |
547 node = node.append(mi(letters[name])) | |
548 elif name in special: | |
549 node = node.append(mo(special[name])) | |
550 elif name in functions: | |
551 node = node.append(mo(name)) | |
552 elif name in over: | |
553 ovr = mover(mo(over[name]), reversed=True) | |
554 node.append(ovr) | |
555 node = ovr | |
556 else: | |
557 raise SyntaxError(u'Unknown LaTeX command: ' + name) | |
558 | |
559 return node, skip | |
560 | |
561 def tex2mathml(tex_math, inline=True): | |
562 """Return string with MathML code corresponding to `tex_math`. | |
563 | |
564 `inline`=True is for inline math and `inline`=False for displayed math. | |
565 """ | |
566 | |
567 mathml_tree = parse_latex_math(tex_math, inline=inline) | |
568 return ''.join(mathml_tree.xml()) |