comparison env/lib/python3.7/site-packages/docutils/utils/code_analyzer.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 #!/usr/bin/python
2 # coding: utf-8
3
4 """Lexical analysis of formal languages (i.e. code) using Pygments."""
5
6 # :Author: Georg Brandl; Felix Wiemann; Günter Milde
7 # :Date: $Date: 2019-08-26 18:46:50 +0200 (Mo, 26. Aug 2019) $
8 # :Copyright: This module has been placed in the public domain.
9
10 from docutils import ApplicationError
11 try:
12 from pkg_resources import DistributionNotFound as ResourceError
13 except (ImportError, RuntimeError):
14 class ResourceError(ApplicationError):
15 pass # stub
16 try:
17 import pygments
18 from pygments.lexers import get_lexer_by_name
19 from pygments.formatters.html import _get_ttype_class
20 with_pygments = True
21 except ImportError:
22 with_pygments = False
23
24 # Filter the following token types from the list of class arguments:
25 unstyled_tokens = ['token', # Token (base token type)
26 'text', # Token.Text
27 ''] # short name for Token and Text
28 # (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.)
29
30 class LexerError(ApplicationError):
31 pass
32
33 class Lexer(object):
34 """Parse `code` lines and yield "classified" tokens.
35
36 Arguments
37
38 code -- string of source code to parse,
39 language -- formal language the code is written in,
40 tokennames -- either 'long', 'short', or '' (see below).
41
42 Merge subsequent tokens of the same token-type.
43
44 Iterating over an instance yields the tokens as ``(tokentype, value)``
45 tuples. The value of `tokennames` configures the naming of the tokentype:
46
47 'long': downcased full token type name,
48 'short': short name defined by pygments.token.STANDARD_TYPES
49 (= class argument used in pygments html output),
50 'none': skip lexical analysis.
51 """
52
53 def __init__(self, code, language, tokennames='short'):
54 """
55 Set up a lexical analyzer for `code` in `language`.
56 """
57 self.code = code
58 self.language = language
59 self.tokennames = tokennames
60 self.lexer = None
61 # get lexical analyzer for `language`:
62 if language in ('', 'text') or tokennames == 'none':
63 return
64 if not with_pygments:
65 raise LexerError('Cannot analyze code. '
66 'Pygments package not found.')
67 try:
68 self.lexer = get_lexer_by_name(self.language)
69 except (pygments.util.ClassNotFound, ResourceError):
70 raise LexerError('Cannot analyze code. '
71 'No Pygments lexer found for "%s".' % language)
72 # self.lexer.add_filter('tokenmerge')
73 # Since version 1.2. (released Jan 01, 2010) Pygments has a
74 # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could
75 # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__.
76 # However, `merge` below also strips a final newline added by pygments.
77 #
78 # self.lexer.add_filter('tokenmerge')
79
80 def merge(self, tokens):
81 """Merge subsequent tokens of same token-type.
82
83 Also strip the final newline (added by pygments).
84 """
85 tokens = iter(tokens)
86 (lasttype, lastval) = next(tokens)
87 for ttype, value in tokens:
88 if ttype is lasttype:
89 lastval += value
90 else:
91 yield(lasttype, lastval)
92 (lasttype, lastval) = (ttype, value)
93 if lastval.endswith('\n'):
94 lastval = lastval[:-1]
95 if lastval:
96 yield(lasttype, lastval)
97
98 def __iter__(self):
99 """Parse self.code and yield "classified" tokens.
100 """
101 if self.lexer is None:
102 yield ([], self.code)
103 return
104 tokens = pygments.lex(self.code, self.lexer)
105 for tokentype, value in self.merge(tokens):
106 if self.tokennames == 'long': # long CSS class args
107 classes = str(tokentype).lower().split('.')
108 else: # short CSS class args
109 classes = [_get_ttype_class(tokentype)]
110 classes = [cls for cls in classes if cls not in unstyled_tokens]
111 yield (classes, value)
112
113
114 class NumberLines(object):
115 """Insert linenumber-tokens at the start of every code line.
116
117 Arguments
118
119 tokens -- iterable of ``(classes, value)`` tuples
120 startline -- first line number
121 endline -- last line number
122
123 Iterating over an instance yields the tokens with a
124 ``(['ln'], '<the line number>')`` token added for every code line.
125 Multi-line tokens are splitted."""
126
127 def __init__(self, tokens, startline, endline):
128 self.tokens = tokens
129 self.startline = startline
130 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d '
131 self.fmt_str = '%%%dd ' % len(str(endline))
132
133 def __iter__(self):
134 lineno = self.startline
135 yield (['ln'], self.fmt_str % lineno)
136 for ttype, value in self.tokens:
137 lines = value.split('\n')
138 for line in lines[:-1]:
139 yield (ttype, line + '\n')
140 lineno += 1
141 yield (['ln'], self.fmt_str % lineno)
142 yield (ttype, lines[-1])