Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/docutils/utils/code_analyzer.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/docutils/utils/code_analyzer.py Sat May 02 07:14:21 2020 -0400 @@ -0,0 +1,142 @@ +#!/usr/bin/python +# coding: utf-8 + +"""Lexical analysis of formal languages (i.e. code) using Pygments.""" + +# :Author: Georg Brandl; Felix Wiemann; Günter Milde +# :Date: $Date: 2019-08-26 18:46:50 +0200 (Mo, 26. Aug 2019) $ +# :Copyright: This module has been placed in the public domain. + +from docutils import ApplicationError +try: + from pkg_resources import DistributionNotFound as ResourceError +except (ImportError, RuntimeError): + class ResourceError(ApplicationError): + pass # stub +try: + import pygments + from pygments.lexers import get_lexer_by_name + from pygments.formatters.html import _get_ttype_class + with_pygments = True +except ImportError: + with_pygments = False + +# Filter the following token types from the list of class arguments: +unstyled_tokens = ['token', # Token (base token type) + 'text', # Token.Text + ''] # short name for Token and Text +# (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.) + +class LexerError(ApplicationError): + pass + +class Lexer(object): + """Parse `code` lines and yield "classified" tokens. + + Arguments + + code -- string of source code to parse, + language -- formal language the code is written in, + tokennames -- either 'long', 'short', or '' (see below). + + Merge subsequent tokens of the same token-type. + + Iterating over an instance yields the tokens as ``(tokentype, value)`` + tuples. The value of `tokennames` configures the naming of the tokentype: + + 'long': downcased full token type name, + 'short': short name defined by pygments.token.STANDARD_TYPES + (= class argument used in pygments html output), + 'none': skip lexical analysis. + """ + + def __init__(self, code, language, tokennames='short'): + """ + Set up a lexical analyzer for `code` in `language`. + """ + self.code = code + self.language = language + self.tokennames = tokennames + self.lexer = None + # get lexical analyzer for `language`: + if language in ('', 'text') or tokennames == 'none': + return + if not with_pygments: + raise LexerError('Cannot analyze code. ' + 'Pygments package not found.') + try: + self.lexer = get_lexer_by_name(self.language) + except (pygments.util.ClassNotFound, ResourceError): + raise LexerError('Cannot analyze code. ' + 'No Pygments lexer found for "%s".' % language) + # self.lexer.add_filter('tokenmerge') + # Since version 1.2. (released Jan 01, 2010) Pygments has a + # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could + # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__. + # However, `merge` below also strips a final newline added by pygments. + # + # self.lexer.add_filter('tokenmerge') + + def merge(self, tokens): + """Merge subsequent tokens of same token-type. + + Also strip the final newline (added by pygments). + """ + tokens = iter(tokens) + (lasttype, lastval) = next(tokens) + for ttype, value in tokens: + if ttype is lasttype: + lastval += value + else: + yield(lasttype, lastval) + (lasttype, lastval) = (ttype, value) + if lastval.endswith('\n'): + lastval = lastval[:-1] + if lastval: + yield(lasttype, lastval) + + def __iter__(self): + """Parse self.code and yield "classified" tokens. + """ + if self.lexer is None: + yield ([], self.code) + return + tokens = pygments.lex(self.code, self.lexer) + for tokentype, value in self.merge(tokens): + if self.tokennames == 'long': # long CSS class args + classes = str(tokentype).lower().split('.') + else: # short CSS class args + classes = [_get_ttype_class(tokentype)] + classes = [cls for cls in classes if cls not in unstyled_tokens] + yield (classes, value) + + +class NumberLines(object): + """Insert linenumber-tokens at the start of every code line. + + Arguments + + tokens -- iterable of ``(classes, value)`` tuples + startline -- first line number + endline -- last line number + + Iterating over an instance yields the tokens with a + ``(['ln'], '<the line number>')`` token added for every code line. + Multi-line tokens are splitted.""" + + def __init__(self, tokens, startline, endline): + self.tokens = tokens + self.startline = startline + # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d ' + self.fmt_str = '%%%dd ' % len(str(endline)) + + def __iter__(self): + lineno = self.startline + yield (['ln'], self.fmt_str % lineno) + for ttype, value in self.tokens: + lines = value.split('\n') + for line in lines[:-1]: + yield (ttype, line + '\n') + lineno += 1 + yield (['ln'], self.fmt_str % lineno) + yield (ttype, lines[-1])