]>
Commit | Line | Data |
---|---|---|
e0df8241 JR |
1 | """ |
2 | pygments.formatters.latex | |
3 | ~~~~~~~~~~~~~~~~~~~~~~~~~ | |
4 | ||
5 | Formatter for LaTeX fancyvrb output. | |
6 | ||
7 | :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. | |
8 | :license: BSD, see LICENSE for details. | |
9 | """ | |
10 | ||
11 | from io import StringIO | |
12 | ||
13 | from pip._vendor.pygments.formatter import Formatter | |
14 | from pip._vendor.pygments.lexer import Lexer, do_insertions | |
15 | from pip._vendor.pygments.token import Token, STANDARD_TYPES | |
16 | from pip._vendor.pygments.util import get_bool_opt, get_int_opt | |
17 | ||
18 | ||
19 | __all__ = ['LatexFormatter'] | |
20 | ||
21 | ||
22 | def escape_tex(text, commandprefix): | |
23 | return text.replace('\\', '\x00'). \ | |
24 | replace('{', '\x01'). \ | |
25 | replace('}', '\x02'). \ | |
26 | replace('\x00', r'\%sZbs{}' % commandprefix). \ | |
27 | replace('\x01', r'\%sZob{}' % commandprefix). \ | |
28 | replace('\x02', r'\%sZcb{}' % commandprefix). \ | |
29 | replace('^', r'\%sZca{}' % commandprefix). \ | |
30 | replace('_', r'\%sZus{}' % commandprefix). \ | |
31 | replace('&', r'\%sZam{}' % commandprefix). \ | |
32 | replace('<', r'\%sZlt{}' % commandprefix). \ | |
33 | replace('>', r'\%sZgt{}' % commandprefix). \ | |
34 | replace('#', r'\%sZsh{}' % commandprefix). \ | |
35 | replace('%', r'\%sZpc{}' % commandprefix). \ | |
36 | replace('$', r'\%sZdl{}' % commandprefix). \ | |
37 | replace('-', r'\%sZhy{}' % commandprefix). \ | |
38 | replace("'", r'\%sZsq{}' % commandprefix). \ | |
39 | replace('"', r'\%sZdq{}' % commandprefix). \ | |
40 | replace('~', r'\%sZti{}' % commandprefix) | |
41 | ||
42 | ||
43 | DOC_TEMPLATE = r''' | |
44 | \documentclass{%(docclass)s} | |
45 | \usepackage{fancyvrb} | |
46 | \usepackage{color} | |
47 | \usepackage[%(encoding)s]{inputenc} | |
48 | %(preamble)s | |
49 | ||
50 | %(styledefs)s | |
51 | ||
52 | \begin{document} | |
53 | ||
54 | \section*{%(title)s} | |
55 | ||
56 | %(code)s | |
57 | \end{document} | |
58 | ''' | |
59 | ||
60 | ## Small explanation of the mess below :) | |
61 | # | |
62 | # The previous version of the LaTeX formatter just assigned a command to | |
63 | # each token type defined in the current style. That obviously is | |
64 | # problematic if the highlighted code is produced for a different style | |
65 | # than the style commands themselves. | |
66 | # | |
67 | # This version works much like the HTML formatter which assigns multiple | |
68 | # CSS classes to each <span> tag, from the most specific to the least | |
69 | # specific token type, thus falling back to the parent token type if one | |
70 | # is not defined. Here, the classes are there too and use the same short | |
71 | # forms given in token.STANDARD_TYPES. | |
72 | # | |
73 | # Highlighted code now only uses one custom command, which by default is | |
74 | # \PY and selectable by the commandprefix option (and in addition the | |
75 | # escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for | |
76 | # backwards compatibility purposes). | |
77 | # | |
78 | # \PY has two arguments: the classes, separated by +, and the text to | |
79 | # render in that style. The classes are resolved into the respective | |
80 | # style commands by magic, which serves to ignore unknown classes. | |
81 | # | |
82 | # The magic macros are: | |
83 | # * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text | |
84 | # to render in \PY@do. Their definition determines the style. | |
85 | # * \PY@reset resets \PY@it etc. to do nothing. | |
86 | # * \PY@toks parses the list of classes, using magic inspired by the | |
87 | # keyval package (but modified to use plusses instead of commas | |
88 | # because fancyvrb redefines commas inside its environments). | |
89 | # * \PY@tok processes one class, calling the \PY@tok@classname command | |
90 | # if it exists. | |
91 | # * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style | |
92 | # for its class. | |
93 | # * \PY resets the style, parses the classnames and then calls \PY@do. | |
94 | # | |
95 | # Tip: to read this code, print it out in substituted form using e.g. | |
96 | # >>> print STYLE_TEMPLATE % {'cp': 'PY'} | |
97 | ||
98 | STYLE_TEMPLATE = r''' | |
99 | \makeatletter | |
100 | \def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% | |
101 | \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% | |
102 | \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} | |
103 | \def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} | |
104 | \def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% | |
105 | \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} | |
106 | \def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% | |
107 | \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} | |
108 | \def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} | |
109 | ||
110 | %(styles)s | |
111 | ||
112 | \def\%(cp)sZbs{\char`\\} | |
113 | \def\%(cp)sZus{\char`\_} | |
114 | \def\%(cp)sZob{\char`\{} | |
115 | \def\%(cp)sZcb{\char`\}} | |
116 | \def\%(cp)sZca{\char`\^} | |
117 | \def\%(cp)sZam{\char`\&} | |
118 | \def\%(cp)sZlt{\char`\<} | |
119 | \def\%(cp)sZgt{\char`\>} | |
120 | \def\%(cp)sZsh{\char`\#} | |
121 | \def\%(cp)sZpc{\char`\%%} | |
122 | \def\%(cp)sZdl{\char`\$} | |
123 | \def\%(cp)sZhy{\char`\-} | |
124 | \def\%(cp)sZsq{\char`\'} | |
125 | \def\%(cp)sZdq{\char`\"} | |
126 | \def\%(cp)sZti{\char`\~} | |
127 | %% for compatibility with earlier versions | |
128 | \def\%(cp)sZat{@} | |
129 | \def\%(cp)sZlb{[} | |
130 | \def\%(cp)sZrb{]} | |
131 | \makeatother | |
132 | ''' | |
133 | ||
134 | ||
135 | def _get_ttype_name(ttype): | |
136 | fname = STANDARD_TYPES.get(ttype) | |
137 | if fname: | |
138 | return fname | |
139 | aname = '' | |
140 | while fname is None: | |
141 | aname = ttype[-1] + aname | |
142 | ttype = ttype.parent | |
143 | fname = STANDARD_TYPES.get(ttype) | |
144 | return fname + aname | |
145 | ||
146 | ||
147 | class LatexFormatter(Formatter): | |
148 | r""" | |
149 | Format tokens as LaTeX code. This needs the `fancyvrb` and `color` | |
150 | standard packages. | |
151 | ||
152 | Without the `full` option, code is formatted as one ``Verbatim`` | |
153 | environment, like this: | |
154 | ||
155 | .. sourcecode:: latex | |
156 | ||
157 | \begin{Verbatim}[commandchars=\\\{\}] | |
158 | \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): | |
159 | \PY{k}{pass} | |
160 | \end{Verbatim} | |
161 | ||
162 | Wrapping can be disabled using the `nowrap` option. | |
163 | ||
164 | The special command used here (``\PY``) and all the other macros it needs | |
165 | are output by the `get_style_defs` method. | |
166 | ||
167 | With the `full` option, a complete LaTeX document is output, including | |
168 | the command definitions in the preamble. | |
169 | ||
170 | The `get_style_defs()` method of a `LatexFormatter` returns a string | |
171 | containing ``\def`` commands defining the macros needed inside the | |
172 | ``Verbatim`` environments. | |
173 | ||
174 | Additional options accepted: | |
175 | ||
176 | `nowrap` | |
177 | If set to ``True``, don't wrap the tokens at all, not even inside a | |
178 | ``\begin{Verbatim}`` environment. This disables most other options | |
179 | (default: ``False``). | |
180 | ||
181 | `style` | |
182 | The style to use, can be a string or a Style subclass (default: | |
183 | ``'default'``). | |
184 | ||
185 | `full` | |
186 | Tells the formatter to output a "full" document, i.e. a complete | |
187 | self-contained document (default: ``False``). | |
188 | ||
189 | `title` | |
190 | If `full` is true, the title that should be used to caption the | |
191 | document (default: ``''``). | |
192 | ||
193 | `docclass` | |
194 | If the `full` option is enabled, this is the document class to use | |
195 | (default: ``'article'``). | |
196 | ||
197 | `preamble` | |
198 | If the `full` option is enabled, this can be further preamble commands, | |
199 | e.g. ``\usepackage`` (default: ``''``). | |
200 | ||
201 | `linenos` | |
202 | If set to ``True``, output line numbers (default: ``False``). | |
203 | ||
204 | `linenostart` | |
205 | The line number for the first line (default: ``1``). | |
206 | ||
207 | `linenostep` | |
208 | If set to a number n > 1, only every nth line number is printed. | |
209 | ||
210 | `verboptions` | |
211 | Additional options given to the Verbatim environment (see the *fancyvrb* | |
212 | docs for possible values) (default: ``''``). | |
213 | ||
214 | `commandprefix` | |
215 | The LaTeX commands used to produce colored output are constructed | |
216 | using this prefix and some letters (default: ``'PY'``). | |
217 | ||
218 | .. versionadded:: 0.7 | |
219 | .. versionchanged:: 0.10 | |
220 | The default is now ``'PY'`` instead of ``'C'``. | |
221 | ||
222 | `texcomments` | |
223 | If set to ``True``, enables LaTeX comment lines. That is, LaTex markup | |
224 | in comment tokens is not escaped so that LaTeX can render it (default: | |
225 | ``False``). | |
226 | ||
227 | .. versionadded:: 1.2 | |
228 | ||
229 | `mathescape` | |
230 | If set to ``True``, enables LaTeX math mode escape in comments. That | |
231 | is, ``'$...$'`` inside a comment will trigger math mode (default: | |
232 | ``False``). | |
233 | ||
234 | .. versionadded:: 1.2 | |
235 | ||
236 | `escapeinside` | |
237 | If set to a string of length 2, enables escaping to LaTeX. Text | |
238 | delimited by these 2 characters is read as LaTeX code and | |
239 | typeset accordingly. It has no effect in string literals. It has | |
240 | no effect in comments if `texcomments` or `mathescape` is | |
241 | set. (default: ``''``). | |
242 | ||
243 | .. versionadded:: 2.0 | |
244 | ||
245 | `envname` | |
246 | Allows you to pick an alternative environment name replacing Verbatim. | |
247 | The alternate environment still has to support Verbatim's option syntax. | |
248 | (default: ``'Verbatim'``). | |
249 | ||
250 | .. versionadded:: 2.0 | |
251 | """ | |
252 | name = 'LaTeX' | |
253 | aliases = ['latex', 'tex'] | |
254 | filenames = ['*.tex'] | |
255 | ||
256 | def __init__(self, **options): | |
257 | Formatter.__init__(self, **options) | |
258 | self.nowrap = get_bool_opt(options, 'nowrap', False) | |
259 | self.docclass = options.get('docclass', 'article') | |
260 | self.preamble = options.get('preamble', '') | |
261 | self.linenos = get_bool_opt(options, 'linenos', False) | |
262 | self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) | |
263 | self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) | |
264 | self.verboptions = options.get('verboptions', '') | |
265 | self.nobackground = get_bool_opt(options, 'nobackground', False) | |
266 | self.commandprefix = options.get('commandprefix', 'PY') | |
267 | self.texcomments = get_bool_opt(options, 'texcomments', False) | |
268 | self.mathescape = get_bool_opt(options, 'mathescape', False) | |
269 | self.escapeinside = options.get('escapeinside', '') | |
270 | if len(self.escapeinside) == 2: | |
271 | self.left = self.escapeinside[0] | |
272 | self.right = self.escapeinside[1] | |
273 | else: | |
274 | self.escapeinside = '' | |
275 | self.envname = options.get('envname', 'Verbatim') | |
276 | ||
277 | self._create_stylesheet() | |
278 | ||
279 | def _create_stylesheet(self): | |
280 | t2n = self.ttype2name = {Token: ''} | |
281 | c2d = self.cmd2def = {} | |
282 | cp = self.commandprefix | |
283 | ||
284 | def rgbcolor(col): | |
285 | if col: | |
286 | return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0) | |
287 | for i in (0, 2, 4)]) | |
288 | else: | |
289 | return '1,1,1' | |
290 | ||
291 | for ttype, ndef in self.style: | |
292 | name = _get_ttype_name(ttype) | |
293 | cmndef = '' | |
294 | if ndef['bold']: | |
295 | cmndef += r'\let\$$@bf=\textbf' | |
296 | if ndef['italic']: | |
297 | cmndef += r'\let\$$@it=\textit' | |
298 | if ndef['underline']: | |
299 | cmndef += r'\let\$$@ul=\underline' | |
300 | if ndef['roman']: | |
301 | cmndef += r'\let\$$@ff=\textrm' | |
302 | if ndef['sans']: | |
303 | cmndef += r'\let\$$@ff=\textsf' | |
304 | if ndef['mono']: | |
305 | cmndef += r'\let\$$@ff=\textsf' | |
306 | if ndef['color']: | |
307 | cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % | |
308 | rgbcolor(ndef['color'])) | |
309 | if ndef['border']: | |
310 | cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}' | |
311 | r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' % | |
312 | (rgbcolor(ndef['border']), | |
313 | rgbcolor(ndef['bgcolor']))) | |
314 | elif ndef['bgcolor']: | |
315 | cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}' | |
316 | r'\colorbox[rgb]{%s}{\strut ##1}}}' % | |
317 | rgbcolor(ndef['bgcolor'])) | |
318 | if cmndef == '': | |
319 | continue | |
320 | cmndef = cmndef.replace('$$', cp) | |
321 | t2n[ttype] = name | |
322 | c2d[name] = cmndef | |
323 | ||
324 | def get_style_defs(self, arg=''): | |
325 | """ | |
326 | Return the command sequences needed to define the commands | |
327 | used to format text in the verbatim environment. ``arg`` is ignored. | |
328 | """ | |
329 | cp = self.commandprefix | |
330 | styles = [] | |
331 | for name, definition in self.cmd2def.items(): | |
332 | styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition)) | |
333 | return STYLE_TEMPLATE % {'cp': self.commandprefix, | |
334 | 'styles': '\n'.join(styles)} | |
335 | ||
336 | def format_unencoded(self, tokensource, outfile): | |
337 | # TODO: add support for background colors | |
338 | t2n = self.ttype2name | |
339 | cp = self.commandprefix | |
340 | ||
341 | if self.full: | |
342 | realoutfile = outfile | |
343 | outfile = StringIO() | |
344 | ||
345 | if not self.nowrap: | |
346 | outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}') | |
347 | if self.linenos: | |
348 | start, step = self.linenostart, self.linenostep | |
349 | outfile.write(',numbers=left' + | |
350 | (start and ',firstnumber=%d' % start or '') + | |
351 | (step and ',stepnumber=%d' % step or '')) | |
352 | if self.mathescape or self.texcomments or self.escapeinside: | |
353 | outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7' | |
354 | '\\catcode`\\_=8\\relax}') | |
355 | if self.verboptions: | |
356 | outfile.write(',' + self.verboptions) | |
357 | outfile.write(']\n') | |
358 | ||
359 | for ttype, value in tokensource: | |
360 | if ttype in Token.Comment: | |
361 | if self.texcomments: | |
362 | # Try to guess comment starting lexeme and escape it ... | |
363 | start = value[0:1] | |
364 | for i in range(1, len(value)): | |
365 | if start[0] != value[i]: | |
366 | break | |
367 | start += value[i] | |
368 | ||
369 | value = value[len(start):] | |
370 | start = escape_tex(start, cp) | |
371 | ||
372 | # ... but do not escape inside comment. | |
373 | value = start + value | |
374 | elif self.mathescape: | |
375 | # Only escape parts not inside a math environment. | |
376 | parts = value.split('$') | |
377 | in_math = False | |
378 | for i, part in enumerate(parts): | |
379 | if not in_math: | |
380 | parts[i] = escape_tex(part, cp) | |
381 | in_math = not in_math | |
382 | value = '$'.join(parts) | |
383 | elif self.escapeinside: | |
384 | text = value | |
385 | value = '' | |
386 | while text: | |
387 | a, sep1, text = text.partition(self.left) | |
388 | if sep1: | |
389 | b, sep2, text = text.partition(self.right) | |
390 | if sep2: | |
391 | value += escape_tex(a, cp) + b | |
392 | else: | |
393 | value += escape_tex(a + sep1 + b, cp) | |
394 | else: | |
395 | value += escape_tex(a, cp) | |
396 | else: | |
397 | value = escape_tex(value, cp) | |
398 | elif ttype not in Token.Escape: | |
399 | value = escape_tex(value, cp) | |
400 | styles = [] | |
401 | while ttype is not Token: | |
402 | try: | |
403 | styles.append(t2n[ttype]) | |
404 | except KeyError: | |
405 | # not in current style | |
406 | styles.append(_get_ttype_name(ttype)) | |
407 | ttype = ttype.parent | |
408 | styleval = '+'.join(reversed(styles)) | |
409 | if styleval: | |
410 | spl = value.split('\n') | |
411 | for line in spl[:-1]: | |
412 | if line: | |
413 | outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) | |
414 | outfile.write('\n') | |
415 | if spl[-1]: | |
416 | outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) | |
417 | else: | |
418 | outfile.write(value) | |
419 | ||
420 | if not self.nowrap: | |
421 | outfile.write('\\end{' + self.envname + '}\n') | |
422 | ||
423 | if self.full: | |
424 | encoding = self.encoding or 'utf8' | |
425 | # map known existings encodings from LaTeX distribution | |
426 | encoding = { | |
427 | 'utf_8': 'utf8', | |
428 | 'latin_1': 'latin1', | |
429 | 'iso_8859_1': 'latin1', | |
430 | }.get(encoding.replace('-', '_'), encoding) | |
431 | realoutfile.write(DOC_TEMPLATE % | |
432 | dict(docclass = self.docclass, | |
433 | preamble = self.preamble, | |
434 | title = self.title, | |
435 | encoding = encoding, | |
436 | styledefs = self.get_style_defs(), | |
437 | code = outfile.getvalue())) | |
438 | ||
439 | ||
440 | class LatexEmbeddedLexer(Lexer): | |
441 | """ | |
442 | This lexer takes one lexer as argument, the lexer for the language | |
443 | being formatted, and the left and right delimiters for escaped text. | |
444 | ||
445 | First everything is scanned using the language lexer to obtain | |
446 | strings and comments. All other consecutive tokens are merged and | |
447 | the resulting text is scanned for escaped segments, which are given | |
448 | the Token.Escape type. Finally text that is not escaped is scanned | |
449 | again with the language lexer. | |
450 | """ | |
451 | def __init__(self, left, right, lang, **options): | |
452 | self.left = left | |
453 | self.right = right | |
454 | self.lang = lang | |
455 | Lexer.__init__(self, **options) | |
456 | ||
457 | def get_tokens_unprocessed(self, text): | |
458 | # find and remove all the escape tokens (replace with an empty string) | |
459 | # this is very similar to DelegatingLexer.get_tokens_unprocessed. | |
460 | buffered = '' | |
461 | insertions = [] | |
462 | insertion_buf = [] | |
463 | for i, t, v in self._find_safe_escape_tokens(text): | |
464 | if t is None: | |
465 | if insertion_buf: | |
466 | insertions.append((len(buffered), insertion_buf)) | |
467 | insertion_buf = [] | |
468 | buffered += v | |
469 | else: | |
470 | insertion_buf.append((i, t, v)) | |
471 | if insertion_buf: | |
472 | insertions.append((len(buffered), insertion_buf)) | |
473 | return do_insertions(insertions, | |
474 | self.lang.get_tokens_unprocessed(buffered)) | |
475 | ||
476 | def _find_safe_escape_tokens(self, text): | |
477 | """ find escape tokens that are not in strings or comments """ | |
478 | for i, t, v in self._filter_to( | |
479 | self.lang.get_tokens_unprocessed(text), | |
480 | lambda t: t in Token.Comment or t in Token.String | |
481 | ): | |
482 | if t is None: | |
483 | for i2, t2, v2 in self._find_escape_tokens(v): | |
484 | yield i + i2, t2, v2 | |
485 | else: | |
486 | yield i, None, v | |
487 | ||
488 | def _filter_to(self, it, pred): | |
489 | """ Keep only the tokens that match `pred`, merge the others together """ | |
490 | buf = '' | |
491 | idx = 0 | |
492 | for i, t, v in it: | |
493 | if pred(t): | |
494 | if buf: | |
495 | yield idx, None, buf | |
496 | buf = '' | |
497 | yield i, t, v | |
498 | else: | |
499 | if not buf: | |
500 | idx = i | |
501 | buf += v | |
502 | if buf: | |
503 | yield idx, None, buf | |
504 | ||
505 | def _find_escape_tokens(self, text): | |
506 | """ Find escape tokens within text, give token=None otherwise """ | |
507 | index = 0 | |
508 | while text: | |
509 | a, sep1, text = text.partition(self.left) | |
510 | if a: | |
511 | yield index, None, a | |
512 | index += len(a) | |
513 | if sep1: | |
514 | b, sep2, text = text.partition(self.right) | |
515 | if sep2: | |
516 | yield index + len(sep1), Token.Escape, b | |
517 | index += len(sep1) + len(b) + len(sep2) | |
518 | else: | |
519 | yield index, Token.Error, sep1 | |
520 | index += len(sep1) | |
521 | text = b |