comparison env/lib/python3.7/site-packages/mistune.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 # coding: utf-8
2 """
3 mistune
4 ~~~~~~~
5
6 The fastest markdown parser in pure Python with renderer feature.
7
8 :copyright: (c) 2014 - 2018 by Hsiaoming Yang.
9 """
10
11 import re
12 import inspect
13
14 __version__ = '0.8.4'
15 __author__ = 'Hsiaoming Yang <me@lepture.com>'
16 __all__ = [
17 'BlockGrammar', 'BlockLexer',
18 'InlineGrammar', 'InlineLexer',
19 'Renderer', 'Markdown',
20 'markdown', 'escape',
21 ]
22
23
24 _key_pattern = re.compile(r'\s+')
25 _nonalpha_pattern = re.compile(r'\W')
26 _escape_pattern = re.compile(r'&(?!#?\w+;)')
27 _newline_pattern = re.compile(r'\r\n|\r')
28 _block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
29 _block_code_leading_pattern = re.compile(r'^ {4}', re.M)
30 _inline_tags = [
31 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
32 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
33 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
34 'img', 'font',
35 ]
36 _pre_tags = ['pre', 'script', 'style']
37 _valid_end = r'(?!:/|[^\w\s@]*@)\b'
38 _valid_attr = r'''\s*[a-zA-Z\-](?:\s*\=\s*(?:"[^"]*"|'[^']*'|[^\s'">]+))?'''
39 _block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
40 _scheme_blacklist = ('javascript:', 'vbscript:')
41
42
43 def _pure_pattern(regex):
44 pattern = regex.pattern
45 if pattern.startswith('^'):
46 pattern = pattern[1:]
47 return pattern
48
49
50 def _keyify(key):
51 key = escape(key.lower(), quote=True)
52 return _key_pattern.sub(' ', key)
53
54
55 def escape(text, quote=False, smart_amp=True):
56 """Replace special characters "&", "<" and ">" to HTML-safe sequences.
57
58 The original cgi.escape will always escape "&", but you can control
59 this one for a smart escape amp.
60
61 :param quote: if set to True, " and ' will be escaped.
62 :param smart_amp: if set to False, & will always be escaped.
63 """
64 if smart_amp:
65 text = _escape_pattern.sub('&amp;', text)
66 else:
67 text = text.replace('&', '&amp;')
68 text = text.replace('<', '&lt;')
69 text = text.replace('>', '&gt;')
70 if quote:
71 text = text.replace('"', '&quot;')
72 text = text.replace("'", '&#39;')
73 return text
74
75
76 def escape_link(url):
77 """Remove dangerous URL schemes like javascript: and escape afterwards."""
78 lower_url = url.lower().strip('\x00\x1a \n\r\t')
79
80 for scheme in _scheme_blacklist:
81 if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme):
82 return ''
83 return escape(url, quote=True, smart_amp=False)
84
85
86 def preprocessing(text, tab=4):
87 text = _newline_pattern.sub('\n', text)
88 text = text.expandtabs(tab)
89 text = text.replace('\u2424', '\n')
90 pattern = re.compile(r'^ +$', re.M)
91 return pattern.sub('', text)
92
93
94 class BlockGrammar(object):
95 """Grammars for block level tokens."""
96
97 def_links = re.compile(
98 r'^ *\[([^^\]]+)\]: *' # [key]:
99 r'<?([^\s>]+)>?' # <link> or link
100 r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
101 )
102 def_footnotes = re.compile(
103 r'^\[\^([^\]]+)\]: *('
104 r'[^\n]*(?:\n+|$)' # [^key]:
105 r'(?: {1,}[^\n]*(?:\n+|$))*'
106 r')'
107 )
108
109 newline = re.compile(r'^\n+')
110 block_code = re.compile(r'^( {4}[^\n]+\n*)+')
111 fences = re.compile(
112 r'^ *(`{3,}|~{3,}) *([^`\s]+)? *\n' # ```lang
113 r'([\s\S]+?)\s*'
114 r'\1 *(?:\n+|$)' # ```
115 )
116 hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
117 heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
118 lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
119 block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
120 list_block = re.compile(
121 r'^( *)(?=[*+-]|\d+\.)(([*+-])?(?:\d+\.)?) [\s\S]+?'
122 r'(?:'
123 r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
124 r'|\n+(?=%s)' # def links
125 r'|\n+(?=%s)' # def footnotes\
126 r'|\n+(?=\1(?(3)\d+\.|[*+-]) )' # heterogeneous bullet
127 r'|\n{2,}'
128 r'(?! )'
129 r'(?!\1(?:[*+-]|\d+\.) )\n*'
130 r'|'
131 r'\s*$)' % (
132 _pure_pattern(def_links),
133 _pure_pattern(def_footnotes),
134 )
135 )
136 list_item = re.compile(
137 r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
138 r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
139 flags=re.M
140 )
141 list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
142 paragraph = re.compile(
143 r'^((?:[^\n]+\n?(?!'
144 r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
145 r'))+)\n*' % (
146 _pure_pattern(fences).replace(r'\1', r'\2'),
147 _pure_pattern(list_block).replace(r'\1', r'\3'),
148 _pure_pattern(hrule),
149 _pure_pattern(heading),
150 _pure_pattern(lheading),
151 _pure_pattern(block_quote),
152 _pure_pattern(def_links),
153 _pure_pattern(def_footnotes),
154 '<' + _block_tag,
155 )
156 )
157 block_html = re.compile(
158 r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
159 r'<!--[\s\S]*?-->',
160 r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr),
161 r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr),
162 )
163 )
164 table = re.compile(
165 r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
166 )
167 nptable = re.compile(
168 r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
169 )
170 text = re.compile(r'^[^\n]+')
171
172
173 class BlockLexer(object):
174 """Block level lexer for block grammars."""
175 grammar_class = BlockGrammar
176
177 default_rules = [
178 'newline', 'hrule', 'block_code', 'fences', 'heading',
179 'nptable', 'lheading', 'block_quote',
180 'list_block', 'block_html', 'def_links',
181 'def_footnotes', 'table', 'paragraph', 'text'
182 ]
183
184 list_rules = (
185 'newline', 'block_code', 'fences', 'lheading', 'hrule',
186 'block_quote', 'list_block', 'block_html', 'text',
187 )
188
189 footnote_rules = (
190 'newline', 'block_code', 'fences', 'heading',
191 'nptable', 'lheading', 'hrule', 'block_quote',
192 'list_block', 'block_html', 'table', 'paragraph', 'text'
193 )
194
195 def __init__(self, rules=None, **kwargs):
196 self.tokens = []
197 self.def_links = {}
198 self.def_footnotes = {}
199
200 if not rules:
201 rules = self.grammar_class()
202
203 self.rules = rules
204 self._max_recursive_depth = kwargs.get('max_recursive_depth', 6)
205 self._list_depth = 0
206 self._blockquote_depth = 0
207
208 def __call__(self, text, rules=None):
209 return self.parse(text, rules)
210
211 def parse(self, text, rules=None):
212 text = text.rstrip('\n')
213
214 if not rules:
215 rules = self.default_rules
216
217 def manipulate(text):
218 for key in rules:
219 rule = getattr(self.rules, key)
220 m = rule.match(text)
221 if not m:
222 continue
223 getattr(self, 'parse_%s' % key)(m)
224 return m
225 return False # pragma: no cover
226
227 while text:
228 m = manipulate(text)
229 if m is not False:
230 text = text[len(m.group(0)):]
231 continue
232 if text: # pragma: no cover
233 raise RuntimeError('Infinite loop at: %s' % text)
234 return self.tokens
235
236 def parse_newline(self, m):
237 length = len(m.group(0))
238 if length > 1:
239 self.tokens.append({'type': 'newline'})
240
241 def parse_block_code(self, m):
242 # clean leading whitespace
243 code = _block_code_leading_pattern.sub('', m.group(0))
244 self.tokens.append({
245 'type': 'code',
246 'lang': None,
247 'text': code,
248 })
249
250 def parse_fences(self, m):
251 self.tokens.append({
252 'type': 'code',
253 'lang': m.group(2),
254 'text': m.group(3),
255 })
256
257 def parse_heading(self, m):
258 self.tokens.append({
259 'type': 'heading',
260 'level': len(m.group(1)),
261 'text': m.group(2),
262 })
263
264 def parse_lheading(self, m):
265 """Parse setext heading."""
266 self.tokens.append({
267 'type': 'heading',
268 'level': 1 if m.group(2) == '=' else 2,
269 'text': m.group(1),
270 })
271
272 def parse_hrule(self, m):
273 self.tokens.append({'type': 'hrule'})
274
275 def parse_list_block(self, m):
276 bull = m.group(2)
277 self.tokens.append({
278 'type': 'list_start',
279 'ordered': '.' in bull,
280 })
281 self._list_depth += 1
282 if self._list_depth > self._max_recursive_depth:
283 self.tokens.append({'type': 'list_item_start'})
284 self.parse_text(m)
285 self.tokens.append({'type': 'list_item_end'})
286 else:
287 cap = m.group(0)
288 self._process_list_item(cap, bull)
289 self.tokens.append({'type': 'list_end'})
290 self._list_depth -= 1
291
292 def _process_list_item(self, cap, bull):
293 cap = self.rules.list_item.findall(cap)
294
295 _next = False
296 length = len(cap)
297
298 for i in range(length):
299 item = cap[i][0]
300
301 # remove the bullet
302 space = len(item)
303 item = self.rules.list_bullet.sub('', item)
304
305 # outdent
306 if '\n ' in item:
307 space = space - len(item)
308 pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
309 item = pattern.sub('', item)
310
311 # determine whether item is loose or not
312 loose = _next
313 if not loose and re.search(r'\n\n(?!\s*$)', item):
314 loose = True
315
316 rest = len(item)
317 if i != length - 1 and rest:
318 _next = item[rest-1] == '\n'
319 if not loose:
320 loose = _next
321
322 if loose:
323 t = 'loose_item_start'
324 else:
325 t = 'list_item_start'
326
327 self.tokens.append({'type': t})
328 # recurse
329 self.parse(item, self.list_rules)
330 self.tokens.append({'type': 'list_item_end'})
331
332 def parse_block_quote(self, m):
333 self.tokens.append({'type': 'block_quote_start'})
334 self._blockquote_depth += 1
335 if self._blockquote_depth > self._max_recursive_depth:
336 self.parse_text(m)
337 else:
338 # clean leading >
339 cap = _block_quote_leading_pattern.sub('', m.group(0))
340 self.parse(cap)
341 self.tokens.append({'type': 'block_quote_end'})
342 self._blockquote_depth -= 1
343
344 def parse_def_links(self, m):
345 key = _keyify(m.group(1))
346 self.def_links[key] = {
347 'link': m.group(2),
348 'title': m.group(3),
349 }
350
351 def parse_def_footnotes(self, m):
352 key = _keyify(m.group(1))
353 if key in self.def_footnotes:
354 # footnote is already defined
355 return
356
357 self.def_footnotes[key] = 0
358
359 self.tokens.append({
360 'type': 'footnote_start',
361 'key': key,
362 })
363
364 text = m.group(2)
365
366 if '\n' in text:
367 lines = text.split('\n')
368 whitespace = None
369 for line in lines[1:]:
370 space = len(line) - len(line.lstrip())
371 if space and (not whitespace or space < whitespace):
372 whitespace = space
373 newlines = [lines[0]]
374 for line in lines[1:]:
375 newlines.append(line[whitespace:])
376 text = '\n'.join(newlines)
377
378 self.parse(text, self.footnote_rules)
379
380 self.tokens.append({
381 'type': 'footnote_end',
382 'key': key,
383 })
384
385 def parse_table(self, m):
386 item = self._process_table(m)
387
388 cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
389 cells = cells.split('\n')
390 for i, v in enumerate(cells):
391 v = re.sub(r'^ *\| *| *\| *$', '', v)
392 cells[i] = re.split(r' *(?<!\\)\| *', v)
393
394 item['cells'] = self._process_cells(cells)
395 self.tokens.append(item)
396
397 def parse_nptable(self, m):
398 item = self._process_table(m)
399
400 cells = re.sub(r'\n$', '', m.group(3))
401 cells = cells.split('\n')
402 for i, v in enumerate(cells):
403 cells[i] = re.split(r' *(?<!\\)\| *', v)
404
405 item['cells'] = self._process_cells(cells)
406 self.tokens.append(item)
407
408 def _process_table(self, m):
409 header = re.sub(r'^ *| *\| *$', '', m.group(1))
410 header = re.split(r' *\| *', header)
411 align = re.sub(r' *|\| *$', '', m.group(2))
412 align = re.split(r' *\| *', align)
413
414 for i, v in enumerate(align):
415 if re.search(r'^ *-+: *$', v):
416 align[i] = 'right'
417 elif re.search(r'^ *:-+: *$', v):
418 align[i] = 'center'
419 elif re.search(r'^ *:-+ *$', v):
420 align[i] = 'left'
421 else:
422 align[i] = None
423
424 item = {
425 'type': 'table',
426 'header': header,
427 'align': align,
428 }
429 return item
430
431 def _process_cells(self, cells):
432 for i, line in enumerate(cells):
433 for c, cell in enumerate(line):
434 # de-escape any pipe inside the cell here
435 cells[i][c] = re.sub('\\\\\|', '|', cell)
436
437 return cells
438
439 def parse_block_html(self, m):
440 tag = m.group(1)
441 if not tag:
442 text = m.group(0)
443 self.tokens.append({
444 'type': 'close_html',
445 'text': text
446 })
447 else:
448 attr = m.group(2)
449 text = m.group(3)
450 self.tokens.append({
451 'type': 'open_html',
452 'tag': tag,
453 'extra': attr,
454 'text': text
455 })
456
457 def parse_paragraph(self, m):
458 text = m.group(1).rstrip('\n')
459 self.tokens.append({'type': 'paragraph', 'text': text})
460
461 def parse_text(self, m):
462 text = m.group(0)
463 self.tokens.append({'type': 'text', 'text': text})
464
465
466 class InlineGrammar(object):
467 """Grammars for inline level tokens."""
468
469 escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
470 inline_html = re.compile(
471 r'^(?:%s|%s|%s)' % (
472 r'<!--[\s\S]*?-->',
473 r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % (
474 _valid_end, _valid_attr),
475 r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr),
476 )
477 )
478 autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
479 link = re.compile(
480 r'^!?\[('
481 r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
482 r')\]\('
483 r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
484 r'\)'
485 )
486 reflink = re.compile(
487 r'^!?\[('
488 r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
489 r')\]\s*\[([^^\]]*)\]'
490 )
491 nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
492 url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
493 double_emphasis = re.compile(
494 r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
495 r'|'
496 r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
497 )
498 emphasis = re.compile(
499 r'^\b_((?:__|[^_])+?)_\b' # _word_
500 r'|'
501 r'^\*((?:\*\*|[^\*])+?)\*(?!\*)' # *word*
502 )
503 code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
504 linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
505 strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~') # ~~word~~
506 footnote = re.compile(r'^\[\^([^\]]+)\]')
507 text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
508
509 def hard_wrap(self):
510 """Grammar for hard wrap linebreak. You don't need to add two
511 spaces at the end of a line.
512 """
513 self.linebreak = re.compile(r'^ *\n(?!\s*$)')
514 self.text = re.compile(
515 r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
516 )
517
518
519 class InlineLexer(object):
520 """Inline level lexer for inline grammars."""
521 grammar_class = InlineGrammar
522
523 default_rules = [
524 'escape', 'inline_html', 'autolink', 'url',
525 'footnote', 'link', 'reflink', 'nolink',
526 'double_emphasis', 'emphasis', 'code',
527 'linebreak', 'strikethrough', 'text',
528 ]
529 inline_html_rules = [
530 'escape', 'inline_html', 'autolink', 'url', 'link', 'reflink',
531 'nolink', 'double_emphasis', 'emphasis', 'code',
532 'linebreak', 'strikethrough', 'text',
533 ]
534
535 def __init__(self, renderer, rules=None, **kwargs):
536 self.renderer = renderer
537 self.links = {}
538 self.footnotes = {}
539 self.footnote_index = 0
540
541 if not rules:
542 rules = self.grammar_class()
543
544 kwargs.update(self.renderer.options)
545 if kwargs.get('hard_wrap'):
546 rules.hard_wrap()
547
548 self.rules = rules
549
550 self._in_link = False
551 self._in_footnote = False
552 self._parse_inline_html = kwargs.get('parse_inline_html')
553
554 def __call__(self, text, rules=None):
555 return self.output(text, rules)
556
557 def setup(self, links, footnotes):
558 self.footnote_index = 0
559 self.links = links or {}
560 self.footnotes = footnotes or {}
561
562 def output(self, text, rules=None):
563 text = text.rstrip('\n')
564 if not rules:
565 rules = list(self.default_rules)
566
567 if self._in_footnote and 'footnote' in rules:
568 rules.remove('footnote')
569
570 output = self.renderer.placeholder()
571
572 def manipulate(text):
573 for key in rules:
574 pattern = getattr(self.rules, key)
575 m = pattern.match(text)
576 if not m:
577 continue
578 self.line_match = m
579 out = getattr(self, 'output_%s' % key)(m)
580 if out is not None:
581 return m, out
582 return False # pragma: no cover
583
584 while text:
585 ret = manipulate(text)
586 if ret is not False:
587 m, out = ret
588 output += out
589 text = text[len(m.group(0)):]
590 continue
591 if text: # pragma: no cover
592 raise RuntimeError('Infinite loop at: %s' % text)
593
594 return output
595
596 def output_escape(self, m):
597 text = m.group(1)
598 return self.renderer.escape(text)
599
600 def output_autolink(self, m):
601 link = m.group(1)
602 if m.group(2) == '@':
603 is_email = True
604 else:
605 is_email = False
606 return self.renderer.autolink(link, is_email)
607
608 def output_url(self, m):
609 link = m.group(1)
610 if self._in_link:
611 return self.renderer.text(link)
612 return self.renderer.autolink(link, False)
613
614 def output_inline_html(self, m):
615 tag = m.group(1)
616 if self._parse_inline_html and tag in _inline_tags:
617 text = m.group(3)
618 if tag == 'a':
619 self._in_link = True
620 text = self.output(text, rules=self.inline_html_rules)
621 self._in_link = False
622 else:
623 text = self.output(text, rules=self.inline_html_rules)
624 extra = m.group(2) or ''
625 html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
626 else:
627 html = m.group(0)
628 return self.renderer.inline_html(html)
629
630 def output_footnote(self, m):
631 key = _keyify(m.group(1))
632 if key not in self.footnotes:
633 return None
634 if self.footnotes[key]:
635 return None
636 self.footnote_index += 1
637 self.footnotes[key] = self.footnote_index
638 return self.renderer.footnote_ref(key, self.footnote_index)
639
640 def output_link(self, m):
641 return self._process_link(m, m.group(3), m.group(4))
642
643 def output_reflink(self, m):
644 key = _keyify(m.group(2) or m.group(1))
645 if key not in self.links:
646 return None
647 ret = self.links[key]
648 return self._process_link(m, ret['link'], ret['title'])
649
650 def output_nolink(self, m):
651 key = _keyify(m.group(1))
652 if key not in self.links:
653 return None
654 ret = self.links[key]
655 return self._process_link(m, ret['link'], ret['title'])
656
657 def _process_link(self, m, link, title=None):
658 line = m.group(0)
659 text = m.group(1)
660 if line[0] == '!':
661 return self.renderer.image(link, title, text)
662
663 self._in_link = True
664 text = self.output(text)
665 self._in_link = False
666 return self.renderer.link(link, title, text)
667
668 def output_double_emphasis(self, m):
669 text = m.group(2) or m.group(1)
670 text = self.output(text)
671 return self.renderer.double_emphasis(text)
672
673 def output_emphasis(self, m):
674 text = m.group(2) or m.group(1)
675 text = self.output(text)
676 return self.renderer.emphasis(text)
677
678 def output_code(self, m):
679 text = m.group(2)
680 return self.renderer.codespan(text)
681
682 def output_linebreak(self, m):
683 return self.renderer.linebreak()
684
685 def output_strikethrough(self, m):
686 text = self.output(m.group(1))
687 return self.renderer.strikethrough(text)
688
689 def output_text(self, m):
690 text = m.group(0)
691 return self.renderer.text(text)
692
693
694 class Renderer(object):
695 """The default HTML renderer for rendering Markdown.
696 """
697
698 def __init__(self, **kwargs):
699 self.options = kwargs
700
701 def placeholder(self):
702 """Returns the default, empty output value for the renderer.
703
704 All renderer methods use the '+=' operator to append to this value.
705 Default is a string so rendering HTML can build up a result string with
706 the rendered Markdown.
707
708 Can be overridden by Renderer subclasses to be types like an empty
709 list, allowing the renderer to create a tree-like structure to
710 represent the document (which can then be reprocessed later into a
711 separate format like docx or pdf).
712 """
713 return ''
714
715 def block_code(self, code, lang=None):
716 """Rendering block level code. ``pre > code``.
717
718 :param code: text content of the code block.
719 :param lang: language of the given code.
720 """
721 code = code.rstrip('\n')
722 if not lang:
723 code = escape(code, smart_amp=False)
724 return '<pre><code>%s\n</code></pre>\n' % code
725 code = escape(code, quote=True, smart_amp=False)
726 return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
727
728 def block_quote(self, text):
729 """Rendering <blockquote> with the given text.
730
731 :param text: text content of the blockquote.
732 """
733 return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
734
735 def block_html(self, html):
736 """Rendering block level pure html content.
737
738 :param html: text content of the html snippet.
739 """
740 if self.options.get('skip_style') and \
741 html.lower().startswith('<style'):
742 return ''
743 if self.options.get('escape'):
744 return escape(html)
745 return html
746
747 def header(self, text, level, raw=None):
748 """Rendering header/heading tags like ``<h1>`` ``<h2>``.
749
750 :param text: rendered text content for the header.
751 :param level: a number for the header level, for example: 1.
752 :param raw: raw text content of the header.
753 """
754 return '<h%d>%s</h%d>\n' % (level, text, level)
755
756 def hrule(self):
757 """Rendering method for ``<hr>`` tag."""
758 if self.options.get('use_xhtml'):
759 return '<hr />\n'
760 return '<hr>\n'
761
762 def list(self, body, ordered=True):
763 """Rendering list tags like ``<ul>`` and ``<ol>``.
764
765 :param body: body contents of the list.
766 :param ordered: whether this list is ordered or not.
767 """
768 tag = 'ul'
769 if ordered:
770 tag = 'ol'
771 return '<%s>\n%s</%s>\n' % (tag, body, tag)
772
773 def list_item(self, text):
774 """Rendering list item snippet. Like ``<li>``."""
775 return '<li>%s</li>\n' % text
776
777 def paragraph(self, text):
778 """Rendering paragraph tags. Like ``<p>``."""
779 return '<p>%s</p>\n' % text.strip(' ')
780
781 def table(self, header, body):
782 """Rendering table element. Wrap header and body in it.
783
784 :param header: header part of the table.
785 :param body: body part of the table.
786 """
787 return (
788 '<table>\n<thead>%s</thead>\n'
789 '<tbody>\n%s</tbody>\n</table>\n'
790 ) % (header, body)
791
792 def table_row(self, content):
793 """Rendering a table row. Like ``<tr>``.
794
795 :param content: content of current table row.
796 """
797 return '<tr>\n%s</tr>\n' % content
798
799 def table_cell(self, content, **flags):
800 """Rendering a table cell. Like ``<th>`` ``<td>``.
801
802 :param content: content of current table cell.
803 :param header: whether this is header or not.
804 :param align: align of current table cell.
805 """
806 if flags['header']:
807 tag = 'th'
808 else:
809 tag = 'td'
810 align = flags['align']
811 if not align:
812 return '<%s>%s</%s>\n' % (tag, content, tag)
813 return '<%s style="text-align:%s">%s</%s>\n' % (
814 tag, align, content, tag
815 )
816
817 def double_emphasis(self, text):
818 """Rendering **strong** text.
819
820 :param text: text content for emphasis.
821 """
822 return '<strong>%s</strong>' % text
823
824 def emphasis(self, text):
825 """Rendering *emphasis* text.
826
827 :param text: text content for emphasis.
828 """
829 return '<em>%s</em>' % text
830
831 def codespan(self, text):
832 """Rendering inline `code` text.
833
834 :param text: text content for inline code.
835 """
836 text = escape(text.rstrip(), smart_amp=False)
837 return '<code>%s</code>' % text
838
839 def linebreak(self):
840 """Rendering line break like ``<br>``."""
841 if self.options.get('use_xhtml'):
842 return '<br />\n'
843 return '<br>\n'
844
845 def strikethrough(self, text):
846 """Rendering ~~strikethrough~~ text.
847
848 :param text: text content for strikethrough.
849 """
850 return '<del>%s</del>' % text
851
852 def text(self, text):
853 """Rendering unformatted text.
854
855 :param text: text content.
856 """
857 if self.options.get('parse_block_html'):
858 return text
859 return escape(text)
860
861 def escape(self, text):
862 """Rendering escape sequence.
863
864 :param text: text content.
865 """
866 return escape(text)
867
868 def autolink(self, link, is_email=False):
869 """Rendering a given link or email address.
870
871 :param link: link content or email address.
872 :param is_email: whether this is an email or not.
873 """
874 text = link = escape_link(link)
875 if is_email:
876 link = 'mailto:%s' % link
877 return '<a href="%s">%s</a>' % (link, text)
878
879 def link(self, link, title, text):
880 """Rendering a given link with content and title.
881
882 :param link: href link for ``<a>`` tag.
883 :param title: title content for `title` attribute.
884 :param text: text content for description.
885 """
886 link = escape_link(link)
887 if not title:
888 return '<a href="%s">%s</a>' % (link, text)
889 title = escape(title, quote=True)
890 return '<a href="%s" title="%s">%s</a>' % (link, title, text)
891
892 def image(self, src, title, text):
893 """Rendering a image with title and text.
894
895 :param src: source link of the image.
896 :param title: title text of the image.
897 :param text: alt text of the image.
898 """
899 src = escape_link(src)
900 text = escape(text, quote=True)
901 if title:
902 title = escape(title, quote=True)
903 html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
904 else:
905 html = '<img src="%s" alt="%s"' % (src, text)
906 if self.options.get('use_xhtml'):
907 return '%s />' % html
908 return '%s>' % html
909
910 def inline_html(self, html):
911 """Rendering span level pure html content.
912
913 :param html: text content of the html snippet.
914 """
915 if self.options.get('escape'):
916 return escape(html)
917 return html
918
919 def newline(self):
920 """Rendering newline element."""
921 return ''
922
923 def footnote_ref(self, key, index):
924 """Rendering the ref anchor of a footnote.
925
926 :param key: identity key for the footnote.
927 :param index: the index count of current footnote.
928 """
929 html = (
930 '<sup class="footnote-ref" id="fnref-%s">'
931 '<a href="#fn-%s">%d</a></sup>'
932 ) % (escape(key), escape(key), index)
933 return html
934
935 def footnote_item(self, key, text):
936 """Rendering a footnote item.
937
938 :param key: identity key for the footnote.
939 :param text: text content of the footnote.
940 """
941 back = (
942 '<a href="#fnref-%s" class="footnote">&#8617;</a>'
943 ) % escape(key)
944 text = text.rstrip()
945 if text.endswith('</p>'):
946 text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
947 else:
948 text = '%s<p>%s</p>' % (text, back)
949 html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
950 return html
951
952 def footnotes(self, text):
953 """Wrapper for all footnotes.
954
955 :param text: contents of all footnotes.
956 """
957 html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
958 return html % (self.hrule(), text)
959
960
961 class Markdown(object):
962 """The Markdown parser.
963
964 :param renderer: An instance of ``Renderer``.
965 :param inline: An inline lexer class or instance.
966 :param block: A block lexer class or instance.
967 """
968 def __init__(self, renderer=None, inline=None, block=None, **kwargs):
969 if not renderer:
970 renderer = Renderer(**kwargs)
971 else:
972 kwargs.update(renderer.options)
973
974 self.renderer = renderer
975
976 if inline and inspect.isclass(inline):
977 inline = inline(renderer, **kwargs)
978 if block and inspect.isclass(block):
979 block = block(**kwargs)
980
981 if inline:
982 self.inline = inline
983 else:
984 self.inline = InlineLexer(renderer, **kwargs)
985
986 self.block = block or BlockLexer(BlockGrammar())
987 self.footnotes = []
988 self.tokens = []
989
990 # detect if it should parse text in block html
991 self._parse_block_html = kwargs.get('parse_block_html')
992
993 def __call__(self, text):
994 return self.parse(text)
995
996 def render(self, text):
997 """Render the Markdown text.
998
999 :param text: markdown formatted text content.
1000 """
1001 return self.parse(text)
1002
1003 def parse(self, text):
1004 out = self.output(preprocessing(text))
1005
1006 keys = self.block.def_footnotes
1007
1008 # reset block
1009 self.block.def_links = {}
1010 self.block.def_footnotes = {}
1011
1012 # reset inline
1013 self.inline.links = {}
1014 self.inline.footnotes = {}
1015
1016 if not self.footnotes:
1017 return out
1018
1019 footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
1020 self.footnotes = sorted(
1021 footnotes, key=lambda o: keys.get(o['key']), reverse=True
1022 )
1023
1024 body = self.renderer.placeholder()
1025 while self.footnotes:
1026 note = self.footnotes.pop()
1027 body += self.renderer.footnote_item(
1028 note['key'], note['text']
1029 )
1030
1031 out += self.renderer.footnotes(body)
1032 return out
1033
1034 def pop(self):
1035 if not self.tokens:
1036 return None
1037 self.token = self.tokens.pop()
1038 return self.token
1039
1040 def peek(self):
1041 if self.tokens:
1042 return self.tokens[-1]
1043 return None # pragma: no cover
1044
1045 def output(self, text, rules=None):
1046 self.tokens = self.block(text, rules)
1047 self.tokens.reverse()
1048
1049 self.inline.setup(self.block.def_links, self.block.def_footnotes)
1050
1051 out = self.renderer.placeholder()
1052 while self.pop():
1053 out += self.tok()
1054 return out
1055
1056 def tok(self):
1057 t = self.token['type']
1058
1059 # sepcial cases
1060 if t.endswith('_start'):
1061 t = t[:-6]
1062
1063 return getattr(self, 'output_%s' % t)()
1064
1065 def tok_text(self):
1066 text = self.token['text']
1067 while self.peek()['type'] == 'text':
1068 text += '\n' + self.pop()['text']
1069 return self.inline(text)
1070
1071 def output_newline(self):
1072 return self.renderer.newline()
1073
1074 def output_hrule(self):
1075 return self.renderer.hrule()
1076
1077 def output_heading(self):
1078 return self.renderer.header(
1079 self.inline(self.token['text']),
1080 self.token['level'],
1081 self.token['text'],
1082 )
1083
1084 def output_code(self):
1085 return self.renderer.block_code(
1086 self.token['text'], self.token['lang']
1087 )
1088
1089 def output_table(self):
1090 aligns = self.token['align']
1091 aligns_length = len(aligns)
1092 cell = self.renderer.placeholder()
1093
1094 # header part
1095 header = self.renderer.placeholder()
1096 for i, value in enumerate(self.token['header']):
1097 align = aligns[i] if i < aligns_length else None
1098 flags = {'header': True, 'align': align}
1099 cell += self.renderer.table_cell(self.inline(value), **flags)
1100
1101 header += self.renderer.table_row(cell)
1102
1103 # body part
1104 body = self.renderer.placeholder()
1105 for i, row in enumerate(self.token['cells']):
1106 cell = self.renderer.placeholder()
1107 for j, value in enumerate(row):
1108 align = aligns[j] if j < aligns_length else None
1109 flags = {'header': False, 'align': align}
1110 cell += self.renderer.table_cell(self.inline(value), **flags)
1111 body += self.renderer.table_row(cell)
1112
1113 return self.renderer.table(header, body)
1114
1115 def output_block_quote(self):
1116 body = self.renderer.placeholder()
1117 while self.pop()['type'] != 'block_quote_end':
1118 body += self.tok()
1119 return self.renderer.block_quote(body)
1120
1121 def output_list(self):
1122 ordered = self.token['ordered']
1123 body = self.renderer.placeholder()
1124 while self.pop()['type'] != 'list_end':
1125 body += self.tok()
1126 return self.renderer.list(body, ordered)
1127
1128 def output_list_item(self):
1129 body = self.renderer.placeholder()
1130 while self.pop()['type'] != 'list_item_end':
1131 if self.token['type'] == 'text':
1132 body += self.tok_text()
1133 else:
1134 body += self.tok()
1135
1136 return self.renderer.list_item(body)
1137
1138 def output_loose_item(self):
1139 body = self.renderer.placeholder()
1140 while self.pop()['type'] != 'list_item_end':
1141 body += self.tok()
1142 return self.renderer.list_item(body)
1143
1144 def output_footnote(self):
1145 self.inline._in_footnote = True
1146 body = self.renderer.placeholder()
1147 key = self.token['key']
1148 while self.pop()['type'] != 'footnote_end':
1149 body += self.tok()
1150 self.footnotes.append({'key': key, 'text': body})
1151 self.inline._in_footnote = False
1152 return self.renderer.placeholder()
1153
1154 def output_close_html(self):
1155 text = self.token['text']
1156 return self.renderer.block_html(text)
1157
1158 def output_open_html(self):
1159 text = self.token['text']
1160 tag = self.token['tag']
1161 if self._parse_block_html and tag not in _pre_tags:
1162 text = self.inline(text, rules=self.inline.inline_html_rules)
1163 extra = self.token.get('extra') or ''
1164 html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
1165 return self.renderer.block_html(html)
1166
1167 def output_paragraph(self):
1168 return self.renderer.paragraph(self.inline(self.token['text']))
1169
1170 def output_text(self):
1171 return self.renderer.paragraph(self.tok_text())
1172
1173
1174 def markdown(text, escape=True, **kwargs):
1175 """Render markdown formatted text to html.
1176
1177 :param text: markdown formatted text content.
1178 :param escape: if set to False, all html tags will not be escaped.
1179 :param use_xhtml: output with xhtml tags.
1180 :param hard_wrap: if set to True, it will use the GFM line breaks feature.
1181 :param parse_block_html: parse text only in block level html.
1182 :param parse_inline_html: parse text only in inline level html.
1183 """
1184 return Markdown(escape=escape, **kwargs)(text)