- ..
- __init__.py
- _asy_builtins.py
- _cl_builtins.py
- _cocoa_builtins.py
- _csound_builtins.py
- _lasso_builtins.py
- _lua_builtins.py
- _mapping.py
- _mql_builtins.py
- _openedge_builtins.py
- _php_builtins.py
- _postgres_builtins.py
- _scilab_builtins.py
- _sourcemod_builtins.py
- _stan_builtins.py
- _stata_builtins.py
- _tsql_builtins.py
- _vim_builtins.py
- actionscript.py
- agile.py
- algebra.py
- ambient.py
- ampl.py
- apl.py
- archetype.py
- asm.py
- automation.py
- basic.py
- bibtex.py
- business.py
- c_cpp.py
- c_like.py
- capnproto.py
- chapel.py
- clean.py
- compiled.py
- configs.py
- console.py
- crystal.py
- csound.py
- css.py
- d.py
- dalvik.py
- data.py
- diff.py
- dotnet.py
- dsls.py
- dylan.py
- ecl.py
- eiffel.py
- elm.py
- erlang.py
- esoteric.py
- ezhil.py
- factor.py
- fantom.py
- felix.py
- forth.py
- fortran.py
- foxpro.py
- functional.py
- go.py
- grammar_notation.py
- graph.py
- graphics.py
- haskell.py
- haxe.py
- hdl.py
- hexdump.py
- html.py
- idl.py
- igor.py
- inferno.py
- installers.py
- int_fiction.py
- iolang.py
- j.py
- javascript.py
- julia.py
- jvm.py
- lisp.py
- make.py
- markup.py
- math.py
- matlab.py
- ml.py
- modeling.py
- modula2.py
- monte.py
- ncl.py
- nimrod.py
- nit.py
- nix.py
- oberon.py
- objective.py
- ooc.py
- other.py
- parasail.py
- parsers.py
- pascal.py
- pawn.py
- perl.py
- php.py
- praat.py
- prolog.py
- python.py
- qvt.py
- r.py
- rdf.py
- rebol.py
- resource.py
- rnc.py
- roboconf.py
- robotframework.py
- ruby.py
- rust.py
- sas.py
- scripting.py
- shell.py
- smalltalk.py
- smv.py
- snobol.py
- special.py
- sql.py
- stata.py
- supercollider.py
- tcl.py
- templates.py
- testing.py
- text.py
- textedit.py
- textfmts.py
- theorem.py
- trafficscript.py
- typoscript.py
- urbi.py
- varnish.py
- verification.py
- web.py
- webmisc.py
- whiley.py
- x10.py
special.py @master — raw · history · blame
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # -*- coding: utf-8 -*-
"""
pygments.lexers.special
~~~~~~~~~~~~~~~~~~~~~~~
Special lexers.
:copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from pygments.lexer import Lexer
from pygments.token import Token, Error, Text
from pygments.util import get_choice_opt, text_type, BytesIO
__all__ = ['TextLexer', 'RawTokenLexer']
class TextLexer(Lexer):
"""
"Null" lexer, doesn't highlight anything.
"""
name = 'Text only'
aliases = ['text']
filenames = ['*.txt']
mimetypes = ['text/plain']
priority = 0.01
def get_tokens_unprocessed(self, text):
yield 0, Text, text
def analyse_text(text):
return TextLexer.priority
_ttype_cache = {}
line_re = re.compile(b'.*?\n')
class RawTokenLexer(Lexer):
"""
Recreate a token stream formatted with the `RawTokenFormatter`. This
lexer raises exceptions during parsing if the token stream in the
file is malformed.
Additional options accepted:
`compress`
If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
the given compression algorithm before lexing (default: ``""``).
"""
name = 'Raw token data'
aliases = ['raw']
filenames = []
mimetypes = ['application/x-pygments-tokens']
def __init__(self, **options):
self.compress = get_choice_opt(options, 'compress',
['', 'none', 'gz', 'bz2'], '')
Lexer.__init__(self, **options)
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def get_tokens_unprocessed(self, text):
length = 0
for match in line_re.finditer(text):
try:
ttypestr, val = match.group().split(b'\t', 1)
except ValueError:
val = match.group().decode('ascii', 'replace')
ttype = Error
else:
ttype = _ttype_cache.get(ttypestr)
if not ttype:
ttype = Token
ttypes = ttypestr.split('.')[1:]
for ttype_ in ttypes:
if not ttype_ or not ttype_[0].isupper():
raise ValueError('malformed token name')
ttype = getattr(ttype, ttype_)
_ttype_cache[ttypestr] = ttype
val = val[2:-2].decode('unicode-escape')
yield length, ttype, val
length += len(val)
|