]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/pygments/formatters/latex.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / pip / _vendor / pygments / formatters / latex.py
1 """
2 pygments.formatters.latex
3 ~~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Formatter for LaTeX fancyvrb output.
6
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9 """
10
11 from io import StringIO
12
13 from pip._vendor.pygments.formatter import Formatter
14 from pip._vendor.pygments.lexer import Lexer, do_insertions
15 from pip._vendor.pygments.token import Token, STANDARD_TYPES
16 from pip._vendor.pygments.util import get_bool_opt, get_int_opt
17
18
19 __all__ = ['LatexFormatter']
20
21
22 def escape_tex(text, commandprefix):
23 return text.replace('\\', '\x00'). \
24 replace('{', '\x01'). \
25 replace('}', '\x02'). \
26 replace('\x00', r'\%sZbs{}' % commandprefix). \
27 replace('\x01', r'\%sZob{}' % commandprefix). \
28 replace('\x02', r'\%sZcb{}' % commandprefix). \
29 replace('^', r'\%sZca{}' % commandprefix). \
30 replace('_', r'\%sZus{}' % commandprefix). \
31 replace('&', r'\%sZam{}' % commandprefix). \
32 replace('<', r'\%sZlt{}' % commandprefix). \
33 replace('>', r'\%sZgt{}' % commandprefix). \
34 replace('#', r'\%sZsh{}' % commandprefix). \
35 replace('%', r'\%sZpc{}' % commandprefix). \
36 replace('$', r'\%sZdl{}' % commandprefix). \
37 replace('-', r'\%sZhy{}' % commandprefix). \
38 replace("'", r'\%sZsq{}' % commandprefix). \
39 replace('"', r'\%sZdq{}' % commandprefix). \
40 replace('~', r'\%sZti{}' % commandprefix)
41
42
43 DOC_TEMPLATE = r'''
44 \documentclass{%(docclass)s}
45 \usepackage{fancyvrb}
46 \usepackage{color}
47 \usepackage[%(encoding)s]{inputenc}
48 %(preamble)s
49
50 %(styledefs)s
51
52 \begin{document}
53
54 \section*{%(title)s}
55
56 %(code)s
57 \end{document}
58 '''
59
60 ## Small explanation of the mess below :)
61 #
62 # The previous version of the LaTeX formatter just assigned a command to
63 # each token type defined in the current style. That obviously is
64 # problematic if the highlighted code is produced for a different style
65 # than the style commands themselves.
66 #
67 # This version works much like the HTML formatter which assigns multiple
68 # CSS classes to each <span> tag, from the most specific to the least
69 # specific token type, thus falling back to the parent token type if one
70 # is not defined. Here, the classes are there too and use the same short
71 # forms given in token.STANDARD_TYPES.
72 #
73 # Highlighted code now only uses one custom command, which by default is
74 # \PY and selectable by the commandprefix option (and in addition the
75 # escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
76 # backwards compatibility purposes).
77 #
78 # \PY has two arguments: the classes, separated by +, and the text to
79 # render in that style. The classes are resolved into the respective
80 # style commands by magic, which serves to ignore unknown classes.
81 #
82 # The magic macros are:
83 # * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
84 # to render in \PY@do. Their definition determines the style.
85 # * \PY@reset resets \PY@it etc. to do nothing.
86 # * \PY@toks parses the list of classes, using magic inspired by the
87 # keyval package (but modified to use plusses instead of commas
88 # because fancyvrb redefines commas inside its environments).
89 # * \PY@tok processes one class, calling the \PY@tok@classname command
90 # if it exists.
91 # * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
92 # for its class.
93 # * \PY resets the style, parses the classnames and then calls \PY@do.
94 #
95 # Tip: to read this code, print it out in substituted form using e.g.
96 # >>> print STYLE_TEMPLATE % {'cp': 'PY'}
97
98 STYLE_TEMPLATE = r'''
99 \makeatletter
100 \def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
101 \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
102 \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
103 \def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
104 \def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
105 \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
106 \def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
107 \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
108 \def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}
109
110 %(styles)s
111
112 \def\%(cp)sZbs{\char`\\}
113 \def\%(cp)sZus{\char`\_}
114 \def\%(cp)sZob{\char`\{}
115 \def\%(cp)sZcb{\char`\}}
116 \def\%(cp)sZca{\char`\^}
117 \def\%(cp)sZam{\char`\&}
118 \def\%(cp)sZlt{\char`\<}
119 \def\%(cp)sZgt{\char`\>}
120 \def\%(cp)sZsh{\char`\#}
121 \def\%(cp)sZpc{\char`\%%}
122 \def\%(cp)sZdl{\char`\$}
123 \def\%(cp)sZhy{\char`\-}
124 \def\%(cp)sZsq{\char`\'}
125 \def\%(cp)sZdq{\char`\"}
126 \def\%(cp)sZti{\char`\~}
127 %% for compatibility with earlier versions
128 \def\%(cp)sZat{@}
129 \def\%(cp)sZlb{[}
130 \def\%(cp)sZrb{]}
131 \makeatother
132 '''
133
134
135 def _get_ttype_name(ttype):
136 fname = STANDARD_TYPES.get(ttype)
137 if fname:
138 return fname
139 aname = ''
140 while fname is None:
141 aname = ttype[-1] + aname
142 ttype = ttype.parent
143 fname = STANDARD_TYPES.get(ttype)
144 return fname + aname
145
146
147 class LatexFormatter(Formatter):
148 r"""
149 Format tokens as LaTeX code. This needs the `fancyvrb` and `color`
150 standard packages.
151
152 Without the `full` option, code is formatted as one ``Verbatim``
153 environment, like this:
154
155 .. sourcecode:: latex
156
157 \begin{Verbatim}[commandchars=\\\{\}]
158 \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
159 \PY{k}{pass}
160 \end{Verbatim}
161
162 Wrapping can be disabled using the `nowrap` option.
163
164 The special command used here (``\PY``) and all the other macros it needs
165 are output by the `get_style_defs` method.
166
167 With the `full` option, a complete LaTeX document is output, including
168 the command definitions in the preamble.
169
170 The `get_style_defs()` method of a `LatexFormatter` returns a string
171 containing ``\def`` commands defining the macros needed inside the
172 ``Verbatim`` environments.
173
174 Additional options accepted:
175
176 `nowrap`
177 If set to ``True``, don't wrap the tokens at all, not even inside a
178 ``\begin{Verbatim}`` environment. This disables most other options
179 (default: ``False``).
180
181 `style`
182 The style to use, can be a string or a Style subclass (default:
183 ``'default'``).
184
185 `full`
186 Tells the formatter to output a "full" document, i.e. a complete
187 self-contained document (default: ``False``).
188
189 `title`
190 If `full` is true, the title that should be used to caption the
191 document (default: ``''``).
192
193 `docclass`
194 If the `full` option is enabled, this is the document class to use
195 (default: ``'article'``).
196
197 `preamble`
198 If the `full` option is enabled, this can be further preamble commands,
199 e.g. ``\usepackage`` (default: ``''``).
200
201 `linenos`
202 If set to ``True``, output line numbers (default: ``False``).
203
204 `linenostart`
205 The line number for the first line (default: ``1``).
206
207 `linenostep`
208 If set to a number n > 1, only every nth line number is printed.
209
210 `verboptions`
211 Additional options given to the Verbatim environment (see the *fancyvrb*
212 docs for possible values) (default: ``''``).
213
214 `commandprefix`
215 The LaTeX commands used to produce colored output are constructed
216 using this prefix and some letters (default: ``'PY'``).
217
218 .. versionadded:: 0.7
219 .. versionchanged:: 0.10
220 The default is now ``'PY'`` instead of ``'C'``.
221
222 `texcomments`
223 If set to ``True``, enables LaTeX comment lines. That is, LaTex markup
224 in comment tokens is not escaped so that LaTeX can render it (default:
225 ``False``).
226
227 .. versionadded:: 1.2
228
229 `mathescape`
230 If set to ``True``, enables LaTeX math mode escape in comments. That
231 is, ``'$...$'`` inside a comment will trigger math mode (default:
232 ``False``).
233
234 .. versionadded:: 1.2
235
236 `escapeinside`
237 If set to a string of length 2, enables escaping to LaTeX. Text
238 delimited by these 2 characters is read as LaTeX code and
239 typeset accordingly. It has no effect in string literals. It has
240 no effect in comments if `texcomments` or `mathescape` is
241 set. (default: ``''``).
242
243 .. versionadded:: 2.0
244
245 `envname`
246 Allows you to pick an alternative environment name replacing Verbatim.
247 The alternate environment still has to support Verbatim's option syntax.
248 (default: ``'Verbatim'``).
249
250 .. versionadded:: 2.0
251 """
252 name = 'LaTeX'
253 aliases = ['latex', 'tex']
254 filenames = ['*.tex']
255
256 def __init__(self, **options):
257 Formatter.__init__(self, **options)
258 self.nowrap = get_bool_opt(options, 'nowrap', False)
259 self.docclass = options.get('docclass', 'article')
260 self.preamble = options.get('preamble', '')
261 self.linenos = get_bool_opt(options, 'linenos', False)
262 self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
263 self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
264 self.verboptions = options.get('verboptions', '')
265 self.nobackground = get_bool_opt(options, 'nobackground', False)
266 self.commandprefix = options.get('commandprefix', 'PY')
267 self.texcomments = get_bool_opt(options, 'texcomments', False)
268 self.mathescape = get_bool_opt(options, 'mathescape', False)
269 self.escapeinside = options.get('escapeinside', '')
270 if len(self.escapeinside) == 2:
271 self.left = self.escapeinside[0]
272 self.right = self.escapeinside[1]
273 else:
274 self.escapeinside = ''
275 self.envname = options.get('envname', 'Verbatim')
276
277 self._create_stylesheet()
278
279 def _create_stylesheet(self):
280 t2n = self.ttype2name = {Token: ''}
281 c2d = self.cmd2def = {}
282 cp = self.commandprefix
283
284 def rgbcolor(col):
285 if col:
286 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)
287 for i in (0, 2, 4)])
288 else:
289 return '1,1,1'
290
291 for ttype, ndef in self.style:
292 name = _get_ttype_name(ttype)
293 cmndef = ''
294 if ndef['bold']:
295 cmndef += r'\let\$$@bf=\textbf'
296 if ndef['italic']:
297 cmndef += r'\let\$$@it=\textit'
298 if ndef['underline']:
299 cmndef += r'\let\$$@ul=\underline'
300 if ndef['roman']:
301 cmndef += r'\let\$$@ff=\textrm'
302 if ndef['sans']:
303 cmndef += r'\let\$$@ff=\textsf'
304 if ndef['mono']:
305 cmndef += r'\let\$$@ff=\textsf'
306 if ndef['color']:
307 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' %
308 rgbcolor(ndef['color']))
309 if ndef['border']:
310 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}'
311 r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' %
312 (rgbcolor(ndef['border']),
313 rgbcolor(ndef['bgcolor'])))
314 elif ndef['bgcolor']:
315 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}'
316 r'\colorbox[rgb]{%s}{\strut ##1}}}' %
317 rgbcolor(ndef['bgcolor']))
318 if cmndef == '':
319 continue
320 cmndef = cmndef.replace('$$', cp)
321 t2n[ttype] = name
322 c2d[name] = cmndef
323
324 def get_style_defs(self, arg=''):
325 """
326 Return the command sequences needed to define the commands
327 used to format text in the verbatim environment. ``arg`` is ignored.
328 """
329 cp = self.commandprefix
330 styles = []
331 for name, definition in self.cmd2def.items():
332 styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition))
333 return STYLE_TEMPLATE % {'cp': self.commandprefix,
334 'styles': '\n'.join(styles)}
335
336 def format_unencoded(self, tokensource, outfile):
337 # TODO: add support for background colors
338 t2n = self.ttype2name
339 cp = self.commandprefix
340
341 if self.full:
342 realoutfile = outfile
343 outfile = StringIO()
344
345 if not self.nowrap:
346 outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
347 if self.linenos:
348 start, step = self.linenostart, self.linenostep
349 outfile.write(',numbers=left' +
350 (start and ',firstnumber=%d' % start or '') +
351 (step and ',stepnumber=%d' % step or ''))
352 if self.mathescape or self.texcomments or self.escapeinside:
353 outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7'
354 '\\catcode`\\_=8\\relax}')
355 if self.verboptions:
356 outfile.write(',' + self.verboptions)
357 outfile.write(']\n')
358
359 for ttype, value in tokensource:
360 if ttype in Token.Comment:
361 if self.texcomments:
362 # Try to guess comment starting lexeme and escape it ...
363 start = value[0:1]
364 for i in range(1, len(value)):
365 if start[0] != value[i]:
366 break
367 start += value[i]
368
369 value = value[len(start):]
370 start = escape_tex(start, cp)
371
372 # ... but do not escape inside comment.
373 value = start + value
374 elif self.mathescape:
375 # Only escape parts not inside a math environment.
376 parts = value.split('$')
377 in_math = False
378 for i, part in enumerate(parts):
379 if not in_math:
380 parts[i] = escape_tex(part, cp)
381 in_math = not in_math
382 value = '$'.join(parts)
383 elif self.escapeinside:
384 text = value
385 value = ''
386 while text:
387 a, sep1, text = text.partition(self.left)
388 if sep1:
389 b, sep2, text = text.partition(self.right)
390 if sep2:
391 value += escape_tex(a, cp) + b
392 else:
393 value += escape_tex(a + sep1 + b, cp)
394 else:
395 value += escape_tex(a, cp)
396 else:
397 value = escape_tex(value, cp)
398 elif ttype not in Token.Escape:
399 value = escape_tex(value, cp)
400 styles = []
401 while ttype is not Token:
402 try:
403 styles.append(t2n[ttype])
404 except KeyError:
405 # not in current style
406 styles.append(_get_ttype_name(ttype))
407 ttype = ttype.parent
408 styleval = '+'.join(reversed(styles))
409 if styleval:
410 spl = value.split('\n')
411 for line in spl[:-1]:
412 if line:
413 outfile.write("\\%s{%s}{%s}" % (cp, styleval, line))
414 outfile.write('\n')
415 if spl[-1]:
416 outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1]))
417 else:
418 outfile.write(value)
419
420 if not self.nowrap:
421 outfile.write('\\end{' + self.envname + '}\n')
422
423 if self.full:
424 encoding = self.encoding or 'utf8'
425 # map known existings encodings from LaTeX distribution
426 encoding = {
427 'utf_8': 'utf8',
428 'latin_1': 'latin1',
429 'iso_8859_1': 'latin1',
430 }.get(encoding.replace('-', '_'), encoding)
431 realoutfile.write(DOC_TEMPLATE %
432 dict(docclass = self.docclass,
433 preamble = self.preamble,
434 title = self.title,
435 encoding = encoding,
436 styledefs = self.get_style_defs(),
437 code = outfile.getvalue()))
438
439
440 class LatexEmbeddedLexer(Lexer):
441 """
442 This lexer takes one lexer as argument, the lexer for the language
443 being formatted, and the left and right delimiters for escaped text.
444
445 First everything is scanned using the language lexer to obtain
446 strings and comments. All other consecutive tokens are merged and
447 the resulting text is scanned for escaped segments, which are given
448 the Token.Escape type. Finally text that is not escaped is scanned
449 again with the language lexer.
450 """
451 def __init__(self, left, right, lang, **options):
452 self.left = left
453 self.right = right
454 self.lang = lang
455 Lexer.__init__(self, **options)
456
457 def get_tokens_unprocessed(self, text):
458 # find and remove all the escape tokens (replace with an empty string)
459 # this is very similar to DelegatingLexer.get_tokens_unprocessed.
460 buffered = ''
461 insertions = []
462 insertion_buf = []
463 for i, t, v in self._find_safe_escape_tokens(text):
464 if t is None:
465 if insertion_buf:
466 insertions.append((len(buffered), insertion_buf))
467 insertion_buf = []
468 buffered += v
469 else:
470 insertion_buf.append((i, t, v))
471 if insertion_buf:
472 insertions.append((len(buffered), insertion_buf))
473 return do_insertions(insertions,
474 self.lang.get_tokens_unprocessed(buffered))
475
476 def _find_safe_escape_tokens(self, text):
477 """ find escape tokens that are not in strings or comments """
478 for i, t, v in self._filter_to(
479 self.lang.get_tokens_unprocessed(text),
480 lambda t: t in Token.Comment or t in Token.String
481 ):
482 if t is None:
483 for i2, t2, v2 in self._find_escape_tokens(v):
484 yield i + i2, t2, v2
485 else:
486 yield i, None, v
487
488 def _filter_to(self, it, pred):
489 """ Keep only the tokens that match `pred`, merge the others together """
490 buf = ''
491 idx = 0
492 for i, t, v in it:
493 if pred(t):
494 if buf:
495 yield idx, None, buf
496 buf = ''
497 yield i, t, v
498 else:
499 if not buf:
500 idx = i
501 buf += v
502 if buf:
503 yield idx, None, buf
504
505 def _find_escape_tokens(self, text):
506 """ Find escape tokens within text, give token=None otherwise """
507 index = 0
508 while text:
509 a, sep1, text = text.partition(self.left)
510 if a:
511 yield index, None, a
512 index += len(a)
513 if sep1:
514 b, sep2, text = text.partition(self.right)
515 if sep2:
516 yield index + len(sep1), Token.Escape, b
517 index += len(sep1) + len(b) + len(sep2)
518 else:
519 yield index, Token.Error, sep1
520 index += len(sep1)
521 text = b