Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/mistune.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 # coding: utf-8 | |
2 """ | |
3 mistune | |
4 ~~~~~~~ | |
5 | |
6 The fastest markdown parser in pure Python with renderer feature. | |
7 | |
8 :copyright: (c) 2014 - 2018 by Hsiaoming Yang. | |
9 """ | |
10 | |
11 import re | |
12 import inspect | |
13 | |
14 __version__ = '0.8.4' | |
15 __author__ = 'Hsiaoming Yang <me@lepture.com>' | |
16 __all__ = [ | |
17 'BlockGrammar', 'BlockLexer', | |
18 'InlineGrammar', 'InlineLexer', | |
19 'Renderer', 'Markdown', | |
20 'markdown', 'escape', | |
21 ] | |
22 | |
23 | |
24 _key_pattern = re.compile(r'\s+') | |
25 _nonalpha_pattern = re.compile(r'\W') | |
26 _escape_pattern = re.compile(r'&(?!#?\w+;)') | |
27 _newline_pattern = re.compile(r'\r\n|\r') | |
28 _block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M) | |
29 _block_code_leading_pattern = re.compile(r'^ {4}', re.M) | |
30 _inline_tags = [ | |
31 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data', | |
32 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark', | |
33 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del', | |
34 'img', 'font', | |
35 ] | |
36 _pre_tags = ['pre', 'script', 'style'] | |
37 _valid_end = r'(?!:/|[^\w\s@]*@)\b' | |
38 _valid_attr = r'''\s*[a-zA-Z\-](?:\s*\=\s*(?:"[^"]*"|'[^']*'|[^\s'">]+))?''' | |
39 _block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end) | |
40 _scheme_blacklist = ('javascript:', 'vbscript:') | |
41 | |
42 | |
43 def _pure_pattern(regex): | |
44 pattern = regex.pattern | |
45 if pattern.startswith('^'): | |
46 pattern = pattern[1:] | |
47 return pattern | |
48 | |
49 | |
50 def _keyify(key): | |
51 key = escape(key.lower(), quote=True) | |
52 return _key_pattern.sub(' ', key) | |
53 | |
54 | |
55 def escape(text, quote=False, smart_amp=True): | |
56 """Replace special characters "&", "<" and ">" to HTML-safe sequences. | |
57 | |
58 The original cgi.escape will always escape "&", but you can control | |
59 this one for a smart escape amp. | |
60 | |
61 :param quote: if set to True, " and ' will be escaped. | |
62 :param smart_amp: if set to False, & will always be escaped. | |
63 """ | |
64 if smart_amp: | |
65 text = _escape_pattern.sub('&', text) | |
66 else: | |
67 text = text.replace('&', '&') | |
68 text = text.replace('<', '<') | |
69 text = text.replace('>', '>') | |
70 if quote: | |
71 text = text.replace('"', '"') | |
72 text = text.replace("'", ''') | |
73 return text | |
74 | |
75 | |
76 def escape_link(url): | |
77 """Remove dangerous URL schemes like javascript: and escape afterwards.""" | |
78 lower_url = url.lower().strip('\x00\x1a \n\r\t') | |
79 | |
80 for scheme in _scheme_blacklist: | |
81 if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme): | |
82 return '' | |
83 return escape(url, quote=True, smart_amp=False) | |
84 | |
85 | |
86 def preprocessing(text, tab=4): | |
87 text = _newline_pattern.sub('\n', text) | |
88 text = text.expandtabs(tab) | |
89 text = text.replace('\u2424', '\n') | |
90 pattern = re.compile(r'^ +$', re.M) | |
91 return pattern.sub('', text) | |
92 | |
93 | |
94 class BlockGrammar(object): | |
95 """Grammars for block level tokens.""" | |
96 | |
97 def_links = re.compile( | |
98 r'^ *\[([^^\]]+)\]: *' # [key]: | |
99 r'<?([^\s>]+)>?' # <link> or link | |
100 r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)' | |
101 ) | |
102 def_footnotes = re.compile( | |
103 r'^\[\^([^\]]+)\]: *(' | |
104 r'[^\n]*(?:\n+|$)' # [^key]: | |
105 r'(?: {1,}[^\n]*(?:\n+|$))*' | |
106 r')' | |
107 ) | |
108 | |
109 newline = re.compile(r'^\n+') | |
110 block_code = re.compile(r'^( {4}[^\n]+\n*)+') | |
111 fences = re.compile( | |
112 r'^ *(`{3,}|~{3,}) *([^`\s]+)? *\n' # ```lang | |
113 r'([\s\S]+?)\s*' | |
114 r'\1 *(?:\n+|$)' # ``` | |
115 ) | |
116 hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)') | |
117 heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)') | |
118 lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)') | |
119 block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+') | |
120 list_block = re.compile( | |
121 r'^( *)(?=[*+-]|\d+\.)(([*+-])?(?:\d+\.)?) [\s\S]+?' | |
122 r'(?:' | |
123 r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule | |
124 r'|\n+(?=%s)' # def links | |
125 r'|\n+(?=%s)' # def footnotes\ | |
126 r'|\n+(?=\1(?(3)\d+\.|[*+-]) )' # heterogeneous bullet | |
127 r'|\n{2,}' | |
128 r'(?! )' | |
129 r'(?!\1(?:[*+-]|\d+\.) )\n*' | |
130 r'|' | |
131 r'\s*$)' % ( | |
132 _pure_pattern(def_links), | |
133 _pure_pattern(def_footnotes), | |
134 ) | |
135 ) | |
136 list_item = re.compile( | |
137 r'^(( *)(?:[*+-]|\d+\.) [^\n]*' | |
138 r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)', | |
139 flags=re.M | |
140 ) | |
141 list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +') | |
142 paragraph = re.compile( | |
143 r'^((?:[^\n]+\n?(?!' | |
144 r'%s|%s|%s|%s|%s|%s|%s|%s|%s' | |
145 r'))+)\n*' % ( | |
146 _pure_pattern(fences).replace(r'\1', r'\2'), | |
147 _pure_pattern(list_block).replace(r'\1', r'\3'), | |
148 _pure_pattern(hrule), | |
149 _pure_pattern(heading), | |
150 _pure_pattern(lheading), | |
151 _pure_pattern(block_quote), | |
152 _pure_pattern(def_links), | |
153 _pure_pattern(def_footnotes), | |
154 '<' + _block_tag, | |
155 ) | |
156 ) | |
157 block_html = re.compile( | |
158 r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % ( | |
159 r'<!--[\s\S]*?-->', | |
160 r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr), | |
161 r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr), | |
162 ) | |
163 ) | |
164 table = re.compile( | |
165 r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*' | |
166 ) | |
167 nptable = re.compile( | |
168 r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*' | |
169 ) | |
170 text = re.compile(r'^[^\n]+') | |
171 | |
172 | |
173 class BlockLexer(object): | |
174 """Block level lexer for block grammars.""" | |
175 grammar_class = BlockGrammar | |
176 | |
177 default_rules = [ | |
178 'newline', 'hrule', 'block_code', 'fences', 'heading', | |
179 'nptable', 'lheading', 'block_quote', | |
180 'list_block', 'block_html', 'def_links', | |
181 'def_footnotes', 'table', 'paragraph', 'text' | |
182 ] | |
183 | |
184 list_rules = ( | |
185 'newline', 'block_code', 'fences', 'lheading', 'hrule', | |
186 'block_quote', 'list_block', 'block_html', 'text', | |
187 ) | |
188 | |
189 footnote_rules = ( | |
190 'newline', 'block_code', 'fences', 'heading', | |
191 'nptable', 'lheading', 'hrule', 'block_quote', | |
192 'list_block', 'block_html', 'table', 'paragraph', 'text' | |
193 ) | |
194 | |
195 def __init__(self, rules=None, **kwargs): | |
196 self.tokens = [] | |
197 self.def_links = {} | |
198 self.def_footnotes = {} | |
199 | |
200 if not rules: | |
201 rules = self.grammar_class() | |
202 | |
203 self.rules = rules | |
204 self._max_recursive_depth = kwargs.get('max_recursive_depth', 6) | |
205 self._list_depth = 0 | |
206 self._blockquote_depth = 0 | |
207 | |
208 def __call__(self, text, rules=None): | |
209 return self.parse(text, rules) | |
210 | |
211 def parse(self, text, rules=None): | |
212 text = text.rstrip('\n') | |
213 | |
214 if not rules: | |
215 rules = self.default_rules | |
216 | |
217 def manipulate(text): | |
218 for key in rules: | |
219 rule = getattr(self.rules, key) | |
220 m = rule.match(text) | |
221 if not m: | |
222 continue | |
223 getattr(self, 'parse_%s' % key)(m) | |
224 return m | |
225 return False # pragma: no cover | |
226 | |
227 while text: | |
228 m = manipulate(text) | |
229 if m is not False: | |
230 text = text[len(m.group(0)):] | |
231 continue | |
232 if text: # pragma: no cover | |
233 raise RuntimeError('Infinite loop at: %s' % text) | |
234 return self.tokens | |
235 | |
236 def parse_newline(self, m): | |
237 length = len(m.group(0)) | |
238 if length > 1: | |
239 self.tokens.append({'type': 'newline'}) | |
240 | |
241 def parse_block_code(self, m): | |
242 # clean leading whitespace | |
243 code = _block_code_leading_pattern.sub('', m.group(0)) | |
244 self.tokens.append({ | |
245 'type': 'code', | |
246 'lang': None, | |
247 'text': code, | |
248 }) | |
249 | |
250 def parse_fences(self, m): | |
251 self.tokens.append({ | |
252 'type': 'code', | |
253 'lang': m.group(2), | |
254 'text': m.group(3), | |
255 }) | |
256 | |
257 def parse_heading(self, m): | |
258 self.tokens.append({ | |
259 'type': 'heading', | |
260 'level': len(m.group(1)), | |
261 'text': m.group(2), | |
262 }) | |
263 | |
264 def parse_lheading(self, m): | |
265 """Parse setext heading.""" | |
266 self.tokens.append({ | |
267 'type': 'heading', | |
268 'level': 1 if m.group(2) == '=' else 2, | |
269 'text': m.group(1), | |
270 }) | |
271 | |
272 def parse_hrule(self, m): | |
273 self.tokens.append({'type': 'hrule'}) | |
274 | |
275 def parse_list_block(self, m): | |
276 bull = m.group(2) | |
277 self.tokens.append({ | |
278 'type': 'list_start', | |
279 'ordered': '.' in bull, | |
280 }) | |
281 self._list_depth += 1 | |
282 if self._list_depth > self._max_recursive_depth: | |
283 self.tokens.append({'type': 'list_item_start'}) | |
284 self.parse_text(m) | |
285 self.tokens.append({'type': 'list_item_end'}) | |
286 else: | |
287 cap = m.group(0) | |
288 self._process_list_item(cap, bull) | |
289 self.tokens.append({'type': 'list_end'}) | |
290 self._list_depth -= 1 | |
291 | |
292 def _process_list_item(self, cap, bull): | |
293 cap = self.rules.list_item.findall(cap) | |
294 | |
295 _next = False | |
296 length = len(cap) | |
297 | |
298 for i in range(length): | |
299 item = cap[i][0] | |
300 | |
301 # remove the bullet | |
302 space = len(item) | |
303 item = self.rules.list_bullet.sub('', item) | |
304 | |
305 # outdent | |
306 if '\n ' in item: | |
307 space = space - len(item) | |
308 pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) | |
309 item = pattern.sub('', item) | |
310 | |
311 # determine whether item is loose or not | |
312 loose = _next | |
313 if not loose and re.search(r'\n\n(?!\s*$)', item): | |
314 loose = True | |
315 | |
316 rest = len(item) | |
317 if i != length - 1 and rest: | |
318 _next = item[rest-1] == '\n' | |
319 if not loose: | |
320 loose = _next | |
321 | |
322 if loose: | |
323 t = 'loose_item_start' | |
324 else: | |
325 t = 'list_item_start' | |
326 | |
327 self.tokens.append({'type': t}) | |
328 # recurse | |
329 self.parse(item, self.list_rules) | |
330 self.tokens.append({'type': 'list_item_end'}) | |
331 | |
332 def parse_block_quote(self, m): | |
333 self.tokens.append({'type': 'block_quote_start'}) | |
334 self._blockquote_depth += 1 | |
335 if self._blockquote_depth > self._max_recursive_depth: | |
336 self.parse_text(m) | |
337 else: | |
338 # clean leading > | |
339 cap = _block_quote_leading_pattern.sub('', m.group(0)) | |
340 self.parse(cap) | |
341 self.tokens.append({'type': 'block_quote_end'}) | |
342 self._blockquote_depth -= 1 | |
343 | |
344 def parse_def_links(self, m): | |
345 key = _keyify(m.group(1)) | |
346 self.def_links[key] = { | |
347 'link': m.group(2), | |
348 'title': m.group(3), | |
349 } | |
350 | |
351 def parse_def_footnotes(self, m): | |
352 key = _keyify(m.group(1)) | |
353 if key in self.def_footnotes: | |
354 # footnote is already defined | |
355 return | |
356 | |
357 self.def_footnotes[key] = 0 | |
358 | |
359 self.tokens.append({ | |
360 'type': 'footnote_start', | |
361 'key': key, | |
362 }) | |
363 | |
364 text = m.group(2) | |
365 | |
366 if '\n' in text: | |
367 lines = text.split('\n') | |
368 whitespace = None | |
369 for line in lines[1:]: | |
370 space = len(line) - len(line.lstrip()) | |
371 if space and (not whitespace or space < whitespace): | |
372 whitespace = space | |
373 newlines = [lines[0]] | |
374 for line in lines[1:]: | |
375 newlines.append(line[whitespace:]) | |
376 text = '\n'.join(newlines) | |
377 | |
378 self.parse(text, self.footnote_rules) | |
379 | |
380 self.tokens.append({ | |
381 'type': 'footnote_end', | |
382 'key': key, | |
383 }) | |
384 | |
385 def parse_table(self, m): | |
386 item = self._process_table(m) | |
387 | |
388 cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) | |
389 cells = cells.split('\n') | |
390 for i, v in enumerate(cells): | |
391 v = re.sub(r'^ *\| *| *\| *$', '', v) | |
392 cells[i] = re.split(r' *(?<!\\)\| *', v) | |
393 | |
394 item['cells'] = self._process_cells(cells) | |
395 self.tokens.append(item) | |
396 | |
397 def parse_nptable(self, m): | |
398 item = self._process_table(m) | |
399 | |
400 cells = re.sub(r'\n$', '', m.group(3)) | |
401 cells = cells.split('\n') | |
402 for i, v in enumerate(cells): | |
403 cells[i] = re.split(r' *(?<!\\)\| *', v) | |
404 | |
405 item['cells'] = self._process_cells(cells) | |
406 self.tokens.append(item) | |
407 | |
408 def _process_table(self, m): | |
409 header = re.sub(r'^ *| *\| *$', '', m.group(1)) | |
410 header = re.split(r' *\| *', header) | |
411 align = re.sub(r' *|\| *$', '', m.group(2)) | |
412 align = re.split(r' *\| *', align) | |
413 | |
414 for i, v in enumerate(align): | |
415 if re.search(r'^ *-+: *$', v): | |
416 align[i] = 'right' | |
417 elif re.search(r'^ *:-+: *$', v): | |
418 align[i] = 'center' | |
419 elif re.search(r'^ *:-+ *$', v): | |
420 align[i] = 'left' | |
421 else: | |
422 align[i] = None | |
423 | |
424 item = { | |
425 'type': 'table', | |
426 'header': header, | |
427 'align': align, | |
428 } | |
429 return item | |
430 | |
431 def _process_cells(self, cells): | |
432 for i, line in enumerate(cells): | |
433 for c, cell in enumerate(line): | |
434 # de-escape any pipe inside the cell here | |
435 cells[i][c] = re.sub('\\\\\|', '|', cell) | |
436 | |
437 return cells | |
438 | |
439 def parse_block_html(self, m): | |
440 tag = m.group(1) | |
441 if not tag: | |
442 text = m.group(0) | |
443 self.tokens.append({ | |
444 'type': 'close_html', | |
445 'text': text | |
446 }) | |
447 else: | |
448 attr = m.group(2) | |
449 text = m.group(3) | |
450 self.tokens.append({ | |
451 'type': 'open_html', | |
452 'tag': tag, | |
453 'extra': attr, | |
454 'text': text | |
455 }) | |
456 | |
457 def parse_paragraph(self, m): | |
458 text = m.group(1).rstrip('\n') | |
459 self.tokens.append({'type': 'paragraph', 'text': text}) | |
460 | |
461 def parse_text(self, m): | |
462 text = m.group(0) | |
463 self.tokens.append({'type': 'text', 'text': text}) | |
464 | |
465 | |
466 class InlineGrammar(object): | |
467 """Grammars for inline level tokens.""" | |
468 | |
469 escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! .... | |
470 inline_html = re.compile( | |
471 r'^(?:%s|%s|%s)' % ( | |
472 r'<!--[\s\S]*?-->', | |
473 r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % ( | |
474 _valid_end, _valid_attr), | |
475 r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr), | |
476 ) | |
477 ) | |
478 autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>') | |
479 link = re.compile( | |
480 r'^!?\[(' | |
481 r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' | |
482 r')\]\(' | |
483 r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*''' | |
484 r'\)' | |
485 ) | |
486 reflink = re.compile( | |
487 r'^!?\[(' | |
488 r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' | |
489 r')\]\s*\[([^^\]]*)\]' | |
490 ) | |
491 nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]') | |
492 url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''') | |
493 double_emphasis = re.compile( | |
494 r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__ | |
495 r'|' | |
496 r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word** | |
497 ) | |
498 emphasis = re.compile( | |
499 r'^\b_((?:__|[^_])+?)_\b' # _word_ | |
500 r'|' | |
501 r'^\*((?:\*\*|[^\*])+?)\*(?!\*)' # *word* | |
502 ) | |
503 code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code` | |
504 linebreak = re.compile(r'^ {2,}\n(?!\s*$)') | |
505 strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~') # ~~word~~ | |
506 footnote = re.compile(r'^\[\^([^\]]+)\]') | |
507 text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)') | |
508 | |
509 def hard_wrap(self): | |
510 """Grammar for hard wrap linebreak. You don't need to add two | |
511 spaces at the end of a line. | |
512 """ | |
513 self.linebreak = re.compile(r'^ *\n(?!\s*$)') | |
514 self.text = re.compile( | |
515 r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)' | |
516 ) | |
517 | |
518 | |
519 class InlineLexer(object): | |
520 """Inline level lexer for inline grammars.""" | |
521 grammar_class = InlineGrammar | |
522 | |
523 default_rules = [ | |
524 'escape', 'inline_html', 'autolink', 'url', | |
525 'footnote', 'link', 'reflink', 'nolink', | |
526 'double_emphasis', 'emphasis', 'code', | |
527 'linebreak', 'strikethrough', 'text', | |
528 ] | |
529 inline_html_rules = [ | |
530 'escape', 'inline_html', 'autolink', 'url', 'link', 'reflink', | |
531 'nolink', 'double_emphasis', 'emphasis', 'code', | |
532 'linebreak', 'strikethrough', 'text', | |
533 ] | |
534 | |
535 def __init__(self, renderer, rules=None, **kwargs): | |
536 self.renderer = renderer | |
537 self.links = {} | |
538 self.footnotes = {} | |
539 self.footnote_index = 0 | |
540 | |
541 if not rules: | |
542 rules = self.grammar_class() | |
543 | |
544 kwargs.update(self.renderer.options) | |
545 if kwargs.get('hard_wrap'): | |
546 rules.hard_wrap() | |
547 | |
548 self.rules = rules | |
549 | |
550 self._in_link = False | |
551 self._in_footnote = False | |
552 self._parse_inline_html = kwargs.get('parse_inline_html') | |
553 | |
554 def __call__(self, text, rules=None): | |
555 return self.output(text, rules) | |
556 | |
557 def setup(self, links, footnotes): | |
558 self.footnote_index = 0 | |
559 self.links = links or {} | |
560 self.footnotes = footnotes or {} | |
561 | |
562 def output(self, text, rules=None): | |
563 text = text.rstrip('\n') | |
564 if not rules: | |
565 rules = list(self.default_rules) | |
566 | |
567 if self._in_footnote and 'footnote' in rules: | |
568 rules.remove('footnote') | |
569 | |
570 output = self.renderer.placeholder() | |
571 | |
572 def manipulate(text): | |
573 for key in rules: | |
574 pattern = getattr(self.rules, key) | |
575 m = pattern.match(text) | |
576 if not m: | |
577 continue | |
578 self.line_match = m | |
579 out = getattr(self, 'output_%s' % key)(m) | |
580 if out is not None: | |
581 return m, out | |
582 return False # pragma: no cover | |
583 | |
584 while text: | |
585 ret = manipulate(text) | |
586 if ret is not False: | |
587 m, out = ret | |
588 output += out | |
589 text = text[len(m.group(0)):] | |
590 continue | |
591 if text: # pragma: no cover | |
592 raise RuntimeError('Infinite loop at: %s' % text) | |
593 | |
594 return output | |
595 | |
596 def output_escape(self, m): | |
597 text = m.group(1) | |
598 return self.renderer.escape(text) | |
599 | |
600 def output_autolink(self, m): | |
601 link = m.group(1) | |
602 if m.group(2) == '@': | |
603 is_email = True | |
604 else: | |
605 is_email = False | |
606 return self.renderer.autolink(link, is_email) | |
607 | |
608 def output_url(self, m): | |
609 link = m.group(1) | |
610 if self._in_link: | |
611 return self.renderer.text(link) | |
612 return self.renderer.autolink(link, False) | |
613 | |
614 def output_inline_html(self, m): | |
615 tag = m.group(1) | |
616 if self._parse_inline_html and tag in _inline_tags: | |
617 text = m.group(3) | |
618 if tag == 'a': | |
619 self._in_link = True | |
620 text = self.output(text, rules=self.inline_html_rules) | |
621 self._in_link = False | |
622 else: | |
623 text = self.output(text, rules=self.inline_html_rules) | |
624 extra = m.group(2) or '' | |
625 html = '<%s%s>%s</%s>' % (tag, extra, text, tag) | |
626 else: | |
627 html = m.group(0) | |
628 return self.renderer.inline_html(html) | |
629 | |
630 def output_footnote(self, m): | |
631 key = _keyify(m.group(1)) | |
632 if key not in self.footnotes: | |
633 return None | |
634 if self.footnotes[key]: | |
635 return None | |
636 self.footnote_index += 1 | |
637 self.footnotes[key] = self.footnote_index | |
638 return self.renderer.footnote_ref(key, self.footnote_index) | |
639 | |
640 def output_link(self, m): | |
641 return self._process_link(m, m.group(3), m.group(4)) | |
642 | |
643 def output_reflink(self, m): | |
644 key = _keyify(m.group(2) or m.group(1)) | |
645 if key not in self.links: | |
646 return None | |
647 ret = self.links[key] | |
648 return self._process_link(m, ret['link'], ret['title']) | |
649 | |
650 def output_nolink(self, m): | |
651 key = _keyify(m.group(1)) | |
652 if key not in self.links: | |
653 return None | |
654 ret = self.links[key] | |
655 return self._process_link(m, ret['link'], ret['title']) | |
656 | |
657 def _process_link(self, m, link, title=None): | |
658 line = m.group(0) | |
659 text = m.group(1) | |
660 if line[0] == '!': | |
661 return self.renderer.image(link, title, text) | |
662 | |
663 self._in_link = True | |
664 text = self.output(text) | |
665 self._in_link = False | |
666 return self.renderer.link(link, title, text) | |
667 | |
668 def output_double_emphasis(self, m): | |
669 text = m.group(2) or m.group(1) | |
670 text = self.output(text) | |
671 return self.renderer.double_emphasis(text) | |
672 | |
673 def output_emphasis(self, m): | |
674 text = m.group(2) or m.group(1) | |
675 text = self.output(text) | |
676 return self.renderer.emphasis(text) | |
677 | |
678 def output_code(self, m): | |
679 text = m.group(2) | |
680 return self.renderer.codespan(text) | |
681 | |
682 def output_linebreak(self, m): | |
683 return self.renderer.linebreak() | |
684 | |
685 def output_strikethrough(self, m): | |
686 text = self.output(m.group(1)) | |
687 return self.renderer.strikethrough(text) | |
688 | |
689 def output_text(self, m): | |
690 text = m.group(0) | |
691 return self.renderer.text(text) | |
692 | |
693 | |
694 class Renderer(object): | |
695 """The default HTML renderer for rendering Markdown. | |
696 """ | |
697 | |
698 def __init__(self, **kwargs): | |
699 self.options = kwargs | |
700 | |
701 def placeholder(self): | |
702 """Returns the default, empty output value for the renderer. | |
703 | |
704 All renderer methods use the '+=' operator to append to this value. | |
705 Default is a string so rendering HTML can build up a result string with | |
706 the rendered Markdown. | |
707 | |
708 Can be overridden by Renderer subclasses to be types like an empty | |
709 list, allowing the renderer to create a tree-like structure to | |
710 represent the document (which can then be reprocessed later into a | |
711 separate format like docx or pdf). | |
712 """ | |
713 return '' | |
714 | |
715 def block_code(self, code, lang=None): | |
716 """Rendering block level code. ``pre > code``. | |
717 | |
718 :param code: text content of the code block. | |
719 :param lang: language of the given code. | |
720 """ | |
721 code = code.rstrip('\n') | |
722 if not lang: | |
723 code = escape(code, smart_amp=False) | |
724 return '<pre><code>%s\n</code></pre>\n' % code | |
725 code = escape(code, quote=True, smart_amp=False) | |
726 return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code) | |
727 | |
728 def block_quote(self, text): | |
729 """Rendering <blockquote> with the given text. | |
730 | |
731 :param text: text content of the blockquote. | |
732 """ | |
733 return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n') | |
734 | |
735 def block_html(self, html): | |
736 """Rendering block level pure html content. | |
737 | |
738 :param html: text content of the html snippet. | |
739 """ | |
740 if self.options.get('skip_style') and \ | |
741 html.lower().startswith('<style'): | |
742 return '' | |
743 if self.options.get('escape'): | |
744 return escape(html) | |
745 return html | |
746 | |
747 def header(self, text, level, raw=None): | |
748 """Rendering header/heading tags like ``<h1>`` ``<h2>``. | |
749 | |
750 :param text: rendered text content for the header. | |
751 :param level: a number for the header level, for example: 1. | |
752 :param raw: raw text content of the header. | |
753 """ | |
754 return '<h%d>%s</h%d>\n' % (level, text, level) | |
755 | |
756 def hrule(self): | |
757 """Rendering method for ``<hr>`` tag.""" | |
758 if self.options.get('use_xhtml'): | |
759 return '<hr />\n' | |
760 return '<hr>\n' | |
761 | |
762 def list(self, body, ordered=True): | |
763 """Rendering list tags like ``<ul>`` and ``<ol>``. | |
764 | |
765 :param body: body contents of the list. | |
766 :param ordered: whether this list is ordered or not. | |
767 """ | |
768 tag = 'ul' | |
769 if ordered: | |
770 tag = 'ol' | |
771 return '<%s>\n%s</%s>\n' % (tag, body, tag) | |
772 | |
773 def list_item(self, text): | |
774 """Rendering list item snippet. Like ``<li>``.""" | |
775 return '<li>%s</li>\n' % text | |
776 | |
777 def paragraph(self, text): | |
778 """Rendering paragraph tags. Like ``<p>``.""" | |
779 return '<p>%s</p>\n' % text.strip(' ') | |
780 | |
781 def table(self, header, body): | |
782 """Rendering table element. Wrap header and body in it. | |
783 | |
784 :param header: header part of the table. | |
785 :param body: body part of the table. | |
786 """ | |
787 return ( | |
788 '<table>\n<thead>%s</thead>\n' | |
789 '<tbody>\n%s</tbody>\n</table>\n' | |
790 ) % (header, body) | |
791 | |
792 def table_row(self, content): | |
793 """Rendering a table row. Like ``<tr>``. | |
794 | |
795 :param content: content of current table row. | |
796 """ | |
797 return '<tr>\n%s</tr>\n' % content | |
798 | |
799 def table_cell(self, content, **flags): | |
800 """Rendering a table cell. Like ``<th>`` ``<td>``. | |
801 | |
802 :param content: content of current table cell. | |
803 :param header: whether this is header or not. | |
804 :param align: align of current table cell. | |
805 """ | |
806 if flags['header']: | |
807 tag = 'th' | |
808 else: | |
809 tag = 'td' | |
810 align = flags['align'] | |
811 if not align: | |
812 return '<%s>%s</%s>\n' % (tag, content, tag) | |
813 return '<%s style="text-align:%s">%s</%s>\n' % ( | |
814 tag, align, content, tag | |
815 ) | |
816 | |
817 def double_emphasis(self, text): | |
818 """Rendering **strong** text. | |
819 | |
820 :param text: text content for emphasis. | |
821 """ | |
822 return '<strong>%s</strong>' % text | |
823 | |
824 def emphasis(self, text): | |
825 """Rendering *emphasis* text. | |
826 | |
827 :param text: text content for emphasis. | |
828 """ | |
829 return '<em>%s</em>' % text | |
830 | |
831 def codespan(self, text): | |
832 """Rendering inline `code` text. | |
833 | |
834 :param text: text content for inline code. | |
835 """ | |
836 text = escape(text.rstrip(), smart_amp=False) | |
837 return '<code>%s</code>' % text | |
838 | |
839 def linebreak(self): | |
840 """Rendering line break like ``<br>``.""" | |
841 if self.options.get('use_xhtml'): | |
842 return '<br />\n' | |
843 return '<br>\n' | |
844 | |
845 def strikethrough(self, text): | |
846 """Rendering ~~strikethrough~~ text. | |
847 | |
848 :param text: text content for strikethrough. | |
849 """ | |
850 return '<del>%s</del>' % text | |
851 | |
852 def text(self, text): | |
853 """Rendering unformatted text. | |
854 | |
855 :param text: text content. | |
856 """ | |
857 if self.options.get('parse_block_html'): | |
858 return text | |
859 return escape(text) | |
860 | |
861 def escape(self, text): | |
862 """Rendering escape sequence. | |
863 | |
864 :param text: text content. | |
865 """ | |
866 return escape(text) | |
867 | |
868 def autolink(self, link, is_email=False): | |
869 """Rendering a given link or email address. | |
870 | |
871 :param link: link content or email address. | |
872 :param is_email: whether this is an email or not. | |
873 """ | |
874 text = link = escape_link(link) | |
875 if is_email: | |
876 link = 'mailto:%s' % link | |
877 return '<a href="%s">%s</a>' % (link, text) | |
878 | |
879 def link(self, link, title, text): | |
880 """Rendering a given link with content and title. | |
881 | |
882 :param link: href link for ``<a>`` tag. | |
883 :param title: title content for `title` attribute. | |
884 :param text: text content for description. | |
885 """ | |
886 link = escape_link(link) | |
887 if not title: | |
888 return '<a href="%s">%s</a>' % (link, text) | |
889 title = escape(title, quote=True) | |
890 return '<a href="%s" title="%s">%s</a>' % (link, title, text) | |
891 | |
892 def image(self, src, title, text): | |
893 """Rendering a image with title and text. | |
894 | |
895 :param src: source link of the image. | |
896 :param title: title text of the image. | |
897 :param text: alt text of the image. | |
898 """ | |
899 src = escape_link(src) | |
900 text = escape(text, quote=True) | |
901 if title: | |
902 title = escape(title, quote=True) | |
903 html = '<img src="%s" alt="%s" title="%s"' % (src, text, title) | |
904 else: | |
905 html = '<img src="%s" alt="%s"' % (src, text) | |
906 if self.options.get('use_xhtml'): | |
907 return '%s />' % html | |
908 return '%s>' % html | |
909 | |
910 def inline_html(self, html): | |
911 """Rendering span level pure html content. | |
912 | |
913 :param html: text content of the html snippet. | |
914 """ | |
915 if self.options.get('escape'): | |
916 return escape(html) | |
917 return html | |
918 | |
919 def newline(self): | |
920 """Rendering newline element.""" | |
921 return '' | |
922 | |
923 def footnote_ref(self, key, index): | |
924 """Rendering the ref anchor of a footnote. | |
925 | |
926 :param key: identity key for the footnote. | |
927 :param index: the index count of current footnote. | |
928 """ | |
929 html = ( | |
930 '<sup class="footnote-ref" id="fnref-%s">' | |
931 '<a href="#fn-%s">%d</a></sup>' | |
932 ) % (escape(key), escape(key), index) | |
933 return html | |
934 | |
935 def footnote_item(self, key, text): | |
936 """Rendering a footnote item. | |
937 | |
938 :param key: identity key for the footnote. | |
939 :param text: text content of the footnote. | |
940 """ | |
941 back = ( | |
942 '<a href="#fnref-%s" class="footnote">↩</a>' | |
943 ) % escape(key) | |
944 text = text.rstrip() | |
945 if text.endswith('</p>'): | |
946 text = re.sub(r'<\/p>$', r'%s</p>' % back, text) | |
947 else: | |
948 text = '%s<p>%s</p>' % (text, back) | |
949 html = '<li id="fn-%s">%s</li>\n' % (escape(key), text) | |
950 return html | |
951 | |
952 def footnotes(self, text): | |
953 """Wrapper for all footnotes. | |
954 | |
955 :param text: contents of all footnotes. | |
956 """ | |
957 html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n' | |
958 return html % (self.hrule(), text) | |
959 | |
960 | |
961 class Markdown(object): | |
962 """The Markdown parser. | |
963 | |
964 :param renderer: An instance of ``Renderer``. | |
965 :param inline: An inline lexer class or instance. | |
966 :param block: A block lexer class or instance. | |
967 """ | |
968 def __init__(self, renderer=None, inline=None, block=None, **kwargs): | |
969 if not renderer: | |
970 renderer = Renderer(**kwargs) | |
971 else: | |
972 kwargs.update(renderer.options) | |
973 | |
974 self.renderer = renderer | |
975 | |
976 if inline and inspect.isclass(inline): | |
977 inline = inline(renderer, **kwargs) | |
978 if block and inspect.isclass(block): | |
979 block = block(**kwargs) | |
980 | |
981 if inline: | |
982 self.inline = inline | |
983 else: | |
984 self.inline = InlineLexer(renderer, **kwargs) | |
985 | |
986 self.block = block or BlockLexer(BlockGrammar()) | |
987 self.footnotes = [] | |
988 self.tokens = [] | |
989 | |
990 # detect if it should parse text in block html | |
991 self._parse_block_html = kwargs.get('parse_block_html') | |
992 | |
993 def __call__(self, text): | |
994 return self.parse(text) | |
995 | |
996 def render(self, text): | |
997 """Render the Markdown text. | |
998 | |
999 :param text: markdown formatted text content. | |
1000 """ | |
1001 return self.parse(text) | |
1002 | |
1003 def parse(self, text): | |
1004 out = self.output(preprocessing(text)) | |
1005 | |
1006 keys = self.block.def_footnotes | |
1007 | |
1008 # reset block | |
1009 self.block.def_links = {} | |
1010 self.block.def_footnotes = {} | |
1011 | |
1012 # reset inline | |
1013 self.inline.links = {} | |
1014 self.inline.footnotes = {} | |
1015 | |
1016 if not self.footnotes: | |
1017 return out | |
1018 | |
1019 footnotes = filter(lambda o: keys.get(o['key']), self.footnotes) | |
1020 self.footnotes = sorted( | |
1021 footnotes, key=lambda o: keys.get(o['key']), reverse=True | |
1022 ) | |
1023 | |
1024 body = self.renderer.placeholder() | |
1025 while self.footnotes: | |
1026 note = self.footnotes.pop() | |
1027 body += self.renderer.footnote_item( | |
1028 note['key'], note['text'] | |
1029 ) | |
1030 | |
1031 out += self.renderer.footnotes(body) | |
1032 return out | |
1033 | |
1034 def pop(self): | |
1035 if not self.tokens: | |
1036 return None | |
1037 self.token = self.tokens.pop() | |
1038 return self.token | |
1039 | |
1040 def peek(self): | |
1041 if self.tokens: | |
1042 return self.tokens[-1] | |
1043 return None # pragma: no cover | |
1044 | |
1045 def output(self, text, rules=None): | |
1046 self.tokens = self.block(text, rules) | |
1047 self.tokens.reverse() | |
1048 | |
1049 self.inline.setup(self.block.def_links, self.block.def_footnotes) | |
1050 | |
1051 out = self.renderer.placeholder() | |
1052 while self.pop(): | |
1053 out += self.tok() | |
1054 return out | |
1055 | |
1056 def tok(self): | |
1057 t = self.token['type'] | |
1058 | |
1059 # sepcial cases | |
1060 if t.endswith('_start'): | |
1061 t = t[:-6] | |
1062 | |
1063 return getattr(self, 'output_%s' % t)() | |
1064 | |
1065 def tok_text(self): | |
1066 text = self.token['text'] | |
1067 while self.peek()['type'] == 'text': | |
1068 text += '\n' + self.pop()['text'] | |
1069 return self.inline(text) | |
1070 | |
1071 def output_newline(self): | |
1072 return self.renderer.newline() | |
1073 | |
1074 def output_hrule(self): | |
1075 return self.renderer.hrule() | |
1076 | |
1077 def output_heading(self): | |
1078 return self.renderer.header( | |
1079 self.inline(self.token['text']), | |
1080 self.token['level'], | |
1081 self.token['text'], | |
1082 ) | |
1083 | |
1084 def output_code(self): | |
1085 return self.renderer.block_code( | |
1086 self.token['text'], self.token['lang'] | |
1087 ) | |
1088 | |
1089 def output_table(self): | |
1090 aligns = self.token['align'] | |
1091 aligns_length = len(aligns) | |
1092 cell = self.renderer.placeholder() | |
1093 | |
1094 # header part | |
1095 header = self.renderer.placeholder() | |
1096 for i, value in enumerate(self.token['header']): | |
1097 align = aligns[i] if i < aligns_length else None | |
1098 flags = {'header': True, 'align': align} | |
1099 cell += self.renderer.table_cell(self.inline(value), **flags) | |
1100 | |
1101 header += self.renderer.table_row(cell) | |
1102 | |
1103 # body part | |
1104 body = self.renderer.placeholder() | |
1105 for i, row in enumerate(self.token['cells']): | |
1106 cell = self.renderer.placeholder() | |
1107 for j, value in enumerate(row): | |
1108 align = aligns[j] if j < aligns_length else None | |
1109 flags = {'header': False, 'align': align} | |
1110 cell += self.renderer.table_cell(self.inline(value), **flags) | |
1111 body += self.renderer.table_row(cell) | |
1112 | |
1113 return self.renderer.table(header, body) | |
1114 | |
1115 def output_block_quote(self): | |
1116 body = self.renderer.placeholder() | |
1117 while self.pop()['type'] != 'block_quote_end': | |
1118 body += self.tok() | |
1119 return self.renderer.block_quote(body) | |
1120 | |
1121 def output_list(self): | |
1122 ordered = self.token['ordered'] | |
1123 body = self.renderer.placeholder() | |
1124 while self.pop()['type'] != 'list_end': | |
1125 body += self.tok() | |
1126 return self.renderer.list(body, ordered) | |
1127 | |
1128 def output_list_item(self): | |
1129 body = self.renderer.placeholder() | |
1130 while self.pop()['type'] != 'list_item_end': | |
1131 if self.token['type'] == 'text': | |
1132 body += self.tok_text() | |
1133 else: | |
1134 body += self.tok() | |
1135 | |
1136 return self.renderer.list_item(body) | |
1137 | |
1138 def output_loose_item(self): | |
1139 body = self.renderer.placeholder() | |
1140 while self.pop()['type'] != 'list_item_end': | |
1141 body += self.tok() | |
1142 return self.renderer.list_item(body) | |
1143 | |
1144 def output_footnote(self): | |
1145 self.inline._in_footnote = True | |
1146 body = self.renderer.placeholder() | |
1147 key = self.token['key'] | |
1148 while self.pop()['type'] != 'footnote_end': | |
1149 body += self.tok() | |
1150 self.footnotes.append({'key': key, 'text': body}) | |
1151 self.inline._in_footnote = False | |
1152 return self.renderer.placeholder() | |
1153 | |
1154 def output_close_html(self): | |
1155 text = self.token['text'] | |
1156 return self.renderer.block_html(text) | |
1157 | |
1158 def output_open_html(self): | |
1159 text = self.token['text'] | |
1160 tag = self.token['tag'] | |
1161 if self._parse_block_html and tag not in _pre_tags: | |
1162 text = self.inline(text, rules=self.inline.inline_html_rules) | |
1163 extra = self.token.get('extra') or '' | |
1164 html = '<%s%s>%s</%s>' % (tag, extra, text, tag) | |
1165 return self.renderer.block_html(html) | |
1166 | |
1167 def output_paragraph(self): | |
1168 return self.renderer.paragraph(self.inline(self.token['text'])) | |
1169 | |
1170 def output_text(self): | |
1171 return self.renderer.paragraph(self.tok_text()) | |
1172 | |
1173 | |
1174 def markdown(text, escape=True, **kwargs): | |
1175 """Render markdown formatted text to html. | |
1176 | |
1177 :param text: markdown formatted text content. | |
1178 :param escape: if set to False, all html tags will not be escaped. | |
1179 :param use_xhtml: output with xhtml tags. | |
1180 :param hard_wrap: if set to True, it will use the GFM line breaks feature. | |
1181 :param parse_block_html: parse text only in block level html. | |
1182 :param parse_inline_html: parse text only in inline level html. | |
1183 """ | |
1184 return Markdown(escape=escape, **kwargs)(text) |