]>
Commit | Line | Data |
---|---|---|
e0df8241 JR |
1 | import re |
2 | import itertools | |
3 | import textwrap | |
4 | import functools | |
5 | ||
6 | try: | |
7 | from importlib.resources import files # type: ignore | |
8 | except ImportError: # pragma: nocover | |
9 | from pkg_resources.extern.importlib_resources import files # type: ignore | |
10 | ||
11 | from pkg_resources.extern.jaraco.functools import compose, method_cache | |
12 | from pkg_resources.extern.jaraco.context import ExceptionTrap | |
13 | ||
14 | ||
15 | def substitution(old, new): | |
16 | """ | |
17 | Return a function that will perform a substitution on a string | |
18 | """ | |
19 | return lambda s: s.replace(old, new) | |
20 | ||
21 | ||
22 | def multi_substitution(*substitutions): | |
23 | """ | |
24 | Take a sequence of pairs specifying substitutions, and create | |
25 | a function that performs those substitutions. | |
26 | ||
27 | >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo') | |
28 | 'baz' | |
29 | """ | |
30 | substitutions = itertools.starmap(substitution, substitutions) | |
31 | # compose function applies last function first, so reverse the | |
32 | # substitutions to get the expected order. | |
33 | substitutions = reversed(tuple(substitutions)) | |
34 | return compose(*substitutions) | |
35 | ||
36 | ||
37 | class FoldedCase(str): | |
38 | """ | |
39 | A case insensitive string class; behaves just like str | |
40 | except compares equal when the only variation is case. | |
41 | ||
42 | >>> s = FoldedCase('hello world') | |
43 | ||
44 | >>> s == 'Hello World' | |
45 | True | |
46 | ||
47 | >>> 'Hello World' == s | |
48 | True | |
49 | ||
50 | >>> s != 'Hello World' | |
51 | False | |
52 | ||
53 | >>> s.index('O') | |
54 | 4 | |
55 | ||
56 | >>> s.split('O') | |
57 | ['hell', ' w', 'rld'] | |
58 | ||
59 | >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) | |
60 | ['alpha', 'Beta', 'GAMMA'] | |
61 | ||
62 | Sequence membership is straightforward. | |
63 | ||
64 | >>> "Hello World" in [s] | |
65 | True | |
66 | >>> s in ["Hello World"] | |
67 | True | |
68 | ||
69 | You may test for set inclusion, but candidate and elements | |
70 | must both be folded. | |
71 | ||
72 | >>> FoldedCase("Hello World") in {s} | |
73 | True | |
74 | >>> s in {FoldedCase("Hello World")} | |
75 | True | |
76 | ||
77 | String inclusion works as long as the FoldedCase object | |
78 | is on the right. | |
79 | ||
80 | >>> "hello" in FoldedCase("Hello World") | |
81 | True | |
82 | ||
83 | But not if the FoldedCase object is on the left: | |
84 | ||
85 | >>> FoldedCase('hello') in 'Hello World' | |
86 | False | |
87 | ||
88 | In that case, use ``in_``: | |
89 | ||
90 | >>> FoldedCase('hello').in_('Hello World') | |
91 | True | |
92 | ||
93 | >>> FoldedCase('hello') > FoldedCase('Hello') | |
94 | False | |
95 | """ | |
96 | ||
97 | def __lt__(self, other): | |
98 | return self.lower() < other.lower() | |
99 | ||
100 | def __gt__(self, other): | |
101 | return self.lower() > other.lower() | |
102 | ||
103 | def __eq__(self, other): | |
104 | return self.lower() == other.lower() | |
105 | ||
106 | def __ne__(self, other): | |
107 | return self.lower() != other.lower() | |
108 | ||
109 | def __hash__(self): | |
110 | return hash(self.lower()) | |
111 | ||
112 | def __contains__(self, other): | |
113 | return super().lower().__contains__(other.lower()) | |
114 | ||
115 | def in_(self, other): | |
116 | "Does self appear in other?" | |
117 | return self in FoldedCase(other) | |
118 | ||
119 | # cache lower since it's likely to be called frequently. | |
120 | @method_cache | |
121 | def lower(self): | |
122 | return super().lower() | |
123 | ||
124 | def index(self, sub): | |
125 | return self.lower().index(sub.lower()) | |
126 | ||
127 | def split(self, splitter=' ', maxsplit=0): | |
128 | pattern = re.compile(re.escape(splitter), re.I) | |
129 | return pattern.split(self, maxsplit) | |
130 | ||
131 | ||
132 | # Python 3.8 compatibility | |
133 | _unicode_trap = ExceptionTrap(UnicodeDecodeError) | |
134 | ||
135 | ||
136 | @_unicode_trap.passes | |
137 | def is_decodable(value): | |
138 | r""" | |
139 | Return True if the supplied value is decodable (using the default | |
140 | encoding). | |
141 | ||
142 | >>> is_decodable(b'\xff') | |
143 | False | |
144 | >>> is_decodable(b'\x32') | |
145 | True | |
146 | """ | |
147 | value.decode() | |
148 | ||
149 | ||
150 | def is_binary(value): | |
151 | r""" | |
152 | Return True if the value appears to be binary (that is, it's a byte | |
153 | string and isn't decodable). | |
154 | ||
155 | >>> is_binary(b'\xff') | |
156 | True | |
157 | >>> is_binary('\xff') | |
158 | False | |
159 | """ | |
160 | return isinstance(value, bytes) and not is_decodable(value) | |
161 | ||
162 | ||
163 | def trim(s): | |
164 | r""" | |
165 | Trim something like a docstring to remove the whitespace that | |
166 | is common due to indentation and formatting. | |
167 | ||
168 | >>> trim("\n\tfoo = bar\n\t\tbar = baz\n") | |
169 | 'foo = bar\n\tbar = baz' | |
170 | """ | |
171 | return textwrap.dedent(s).strip() | |
172 | ||
173 | ||
174 | def wrap(s): | |
175 | """ | |
176 | Wrap lines of text, retaining existing newlines as | |
177 | paragraph markers. | |
178 | ||
179 | >>> print(wrap(lorem_ipsum)) | |
180 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do | |
181 | eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad | |
182 | minim veniam, quis nostrud exercitation ullamco laboris nisi ut | |
183 | aliquip ex ea commodo consequat. Duis aute irure dolor in | |
184 | reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla | |
185 | pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | |
186 | culpa qui officia deserunt mollit anim id est laborum. | |
187 | <BLANKLINE> | |
188 | Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam | |
189 | varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus | |
190 | magna felis sollicitudin mauris. Integer in mauris eu nibh euismod | |
191 | gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis | |
192 | risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, | |
193 | eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas | |
194 | fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla | |
195 | a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, | |
196 | neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing | |
197 | sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque | |
198 | nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus | |
199 | quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, | |
200 | molestie eu, feugiat in, orci. In hac habitasse platea dictumst. | |
201 | """ | |
202 | paragraphs = s.splitlines() | |
203 | wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs) | |
204 | return '\n\n'.join(wrapped) | |
205 | ||
206 | ||
207 | def unwrap(s): | |
208 | r""" | |
209 | Given a multi-line string, return an unwrapped version. | |
210 | ||
211 | >>> wrapped = wrap(lorem_ipsum) | |
212 | >>> wrapped.count('\n') | |
213 | 20 | |
214 | >>> unwrapped = unwrap(wrapped) | |
215 | >>> unwrapped.count('\n') | |
216 | 1 | |
217 | >>> print(unwrapped) | |
218 | Lorem ipsum dolor sit amet, consectetur adipiscing ... | |
219 | Curabitur pretium tincidunt lacus. Nulla gravida orci ... | |
220 | ||
221 | """ | |
222 | paragraphs = re.split(r'\n\n+', s) | |
223 | cleaned = (para.replace('\n', ' ') for para in paragraphs) | |
224 | return '\n'.join(cleaned) | |
225 | ||
226 | ||
227 | ||
228 | ||
229 | class Splitter(object): | |
230 | """object that will split a string with the given arguments for each call | |
231 | ||
232 | >>> s = Splitter(',') | |
233 | >>> s('hello, world, this is your, master calling') | |
234 | ['hello', ' world', ' this is your', ' master calling'] | |
235 | """ | |
236 | ||
237 | def __init__(self, *args): | |
238 | self.args = args | |
239 | ||
240 | def __call__(self, s): | |
241 | return s.split(*self.args) | |
242 | ||
243 | ||
244 | def indent(string, prefix=' ' * 4): | |
245 | """ | |
246 | >>> indent('foo') | |
247 | ' foo' | |
248 | """ | |
249 | return prefix + string | |
250 | ||
251 | ||
252 | class WordSet(tuple): | |
253 | """ | |
254 | Given an identifier, return the words that identifier represents, | |
255 | whether in camel case, underscore-separated, etc. | |
256 | ||
257 | >>> WordSet.parse("camelCase") | |
258 | ('camel', 'Case') | |
259 | ||
260 | >>> WordSet.parse("under_sep") | |
261 | ('under', 'sep') | |
262 | ||
263 | Acronyms should be retained | |
264 | ||
265 | >>> WordSet.parse("firstSNL") | |
266 | ('first', 'SNL') | |
267 | ||
268 | >>> WordSet.parse("you_and_I") | |
269 | ('you', 'and', 'I') | |
270 | ||
271 | >>> WordSet.parse("A simple test") | |
272 | ('A', 'simple', 'test') | |
273 | ||
274 | Multiple caps should not interfere with the first cap of another word. | |
275 | ||
276 | >>> WordSet.parse("myABCClass") | |
277 | ('my', 'ABC', 'Class') | |
278 | ||
279 | The result is a WordSet, so you can get the form you need. | |
280 | ||
281 | >>> WordSet.parse("myABCClass").underscore_separated() | |
282 | 'my_ABC_Class' | |
283 | ||
284 | >>> WordSet.parse('a-command').camel_case() | |
285 | 'ACommand' | |
286 | ||
287 | >>> WordSet.parse('someIdentifier').lowered().space_separated() | |
288 | 'some identifier' | |
289 | ||
290 | Slices of the result should return another WordSet. | |
291 | ||
292 | >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated() | |
293 | 'out_of_context' | |
294 | ||
295 | >>> WordSet.from_class_name(WordSet()).lowered().space_separated() | |
296 | 'word set' | |
297 | ||
298 | >>> example = WordSet.parse('figured it out') | |
299 | >>> example.headless_camel_case() | |
300 | 'figuredItOut' | |
301 | >>> example.dash_separated() | |
302 | 'figured-it-out' | |
303 | ||
304 | """ | |
305 | ||
306 | _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))') | |
307 | ||
308 | def capitalized(self): | |
309 | return WordSet(word.capitalize() for word in self) | |
310 | ||
311 | def lowered(self): | |
312 | return WordSet(word.lower() for word in self) | |
313 | ||
314 | def camel_case(self): | |
315 | return ''.join(self.capitalized()) | |
316 | ||
317 | def headless_camel_case(self): | |
318 | words = iter(self) | |
319 | first = next(words).lower() | |
320 | new_words = itertools.chain((first,), WordSet(words).camel_case()) | |
321 | return ''.join(new_words) | |
322 | ||
323 | def underscore_separated(self): | |
324 | return '_'.join(self) | |
325 | ||
326 | def dash_separated(self): | |
327 | return '-'.join(self) | |
328 | ||
329 | def space_separated(self): | |
330 | return ' '.join(self) | |
331 | ||
332 | def trim_right(self, item): | |
333 | """ | |
334 | Remove the item from the end of the set. | |
335 | ||
336 | >>> WordSet.parse('foo bar').trim_right('foo') | |
337 | ('foo', 'bar') | |
338 | >>> WordSet.parse('foo bar').trim_right('bar') | |
339 | ('foo',) | |
340 | >>> WordSet.parse('').trim_right('bar') | |
341 | () | |
342 | """ | |
343 | return self[:-1] if self and self[-1] == item else self | |
344 | ||
345 | def trim_left(self, item): | |
346 | """ | |
347 | Remove the item from the beginning of the set. | |
348 | ||
349 | >>> WordSet.parse('foo bar').trim_left('foo') | |
350 | ('bar',) | |
351 | >>> WordSet.parse('foo bar').trim_left('bar') | |
352 | ('foo', 'bar') | |
353 | >>> WordSet.parse('').trim_left('bar') | |
354 | () | |
355 | """ | |
356 | return self[1:] if self and self[0] == item else self | |
357 | ||
358 | def trim(self, item): | |
359 | """ | |
360 | >>> WordSet.parse('foo bar').trim('foo') | |
361 | ('bar',) | |
362 | """ | |
363 | return self.trim_left(item).trim_right(item) | |
364 | ||
365 | def __getitem__(self, item): | |
366 | result = super(WordSet, self).__getitem__(item) | |
367 | if isinstance(item, slice): | |
368 | result = WordSet(result) | |
369 | return result | |
370 | ||
371 | @classmethod | |
372 | def parse(cls, identifier): | |
373 | matches = cls._pattern.finditer(identifier) | |
374 | return WordSet(match.group(0) for match in matches) | |
375 | ||
376 | @classmethod | |
377 | def from_class_name(cls, subject): | |
378 | return cls.parse(subject.__class__.__name__) | |
379 | ||
380 | ||
381 | # for backward compatibility | |
382 | words = WordSet.parse | |
383 | ||
384 | ||
385 | def simple_html_strip(s): | |
386 | r""" | |
387 | Remove HTML from the string `s`. | |
388 | ||
389 | >>> str(simple_html_strip('')) | |
390 | '' | |
391 | ||
392 | >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise')) | |
393 | A stormy day in paradise | |
394 | ||
395 | >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.')) | |
396 | Somebody tell the truth. | |
397 | ||
398 | >>> print(simple_html_strip('What about<br/>\nmultiple lines?')) | |
399 | What about | |
400 | multiple lines? | |
401 | """ | |
402 | html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL) | |
403 | texts = (match.group(3) or '' for match in html_stripper.finditer(s)) | |
404 | return ''.join(texts) | |
405 | ||
406 | ||
407 | class SeparatedValues(str): | |
408 | """ | |
409 | A string separated by a separator. Overrides __iter__ for getting | |
410 | the values. | |
411 | ||
412 | >>> list(SeparatedValues('a,b,c')) | |
413 | ['a', 'b', 'c'] | |
414 | ||
415 | Whitespace is stripped and empty values are discarded. | |
416 | ||
417 | >>> list(SeparatedValues(' a, b , c, ')) | |
418 | ['a', 'b', 'c'] | |
419 | """ | |
420 | ||
421 | separator = ',' | |
422 | ||
423 | def __iter__(self): | |
424 | parts = self.split(self.separator) | |
425 | return filter(None, (part.strip() for part in parts)) | |
426 | ||
427 | ||
428 | class Stripper: | |
429 | r""" | |
430 | Given a series of lines, find the common prefix and strip it from them. | |
431 | ||
432 | >>> lines = [ | |
433 | ... 'abcdefg\n', | |
434 | ... 'abc\n', | |
435 | ... 'abcde\n', | |
436 | ... ] | |
437 | >>> res = Stripper.strip_prefix(lines) | |
438 | >>> res.prefix | |
439 | 'abc' | |
440 | >>> list(res.lines) | |
441 | ['defg\n', '\n', 'de\n'] | |
442 | ||
443 | If no prefix is common, nothing should be stripped. | |
444 | ||
445 | >>> lines = [ | |
446 | ... 'abcd\n', | |
447 | ... '1234\n', | |
448 | ... ] | |
449 | >>> res = Stripper.strip_prefix(lines) | |
450 | >>> res.prefix = '' | |
451 | >>> list(res.lines) | |
452 | ['abcd\n', '1234\n'] | |
453 | """ | |
454 | ||
455 | def __init__(self, prefix, lines): | |
456 | self.prefix = prefix | |
457 | self.lines = map(self, lines) | |
458 | ||
459 | @classmethod | |
460 | def strip_prefix(cls, lines): | |
461 | prefix_lines, lines = itertools.tee(lines) | |
462 | prefix = functools.reduce(cls.common_prefix, prefix_lines) | |
463 | return cls(prefix, lines) | |
464 | ||
465 | def __call__(self, line): | |
466 | if not self.prefix: | |
467 | return line | |
468 | null, prefix, rest = line.partition(self.prefix) | |
469 | return rest | |
470 | ||
471 | @staticmethod | |
472 | def common_prefix(s1, s2): | |
473 | """ | |
474 | Return the common prefix of two lines. | |
475 | """ | |
476 | index = min(len(s1), len(s2)) | |
477 | while s1[:index] != s2[:index]: | |
478 | index -= 1 | |
479 | return s1[:index] | |
480 | ||
481 | ||
482 | def remove_prefix(text, prefix): | |
483 | """ | |
484 | Remove the prefix from the text if it exists. | |
485 | ||
486 | >>> remove_prefix('underwhelming performance', 'underwhelming ') | |
487 | 'performance' | |
488 | ||
489 | >>> remove_prefix('something special', 'sample') | |
490 | 'something special' | |
491 | """ | |
492 | null, prefix, rest = text.rpartition(prefix) | |
493 | return rest | |
494 | ||
495 | ||
496 | def remove_suffix(text, suffix): | |
497 | """ | |
498 | Remove the suffix from the text if it exists. | |
499 | ||
500 | >>> remove_suffix('name.git', '.git') | |
501 | 'name' | |
502 | ||
503 | >>> remove_suffix('something special', 'sample') | |
504 | 'something special' | |
505 | """ | |
506 | rest, suffix, null = text.partition(suffix) | |
507 | return rest | |
508 | ||
509 | ||
510 | def normalize_newlines(text): | |
511 | r""" | |
512 | Replace alternate newlines with the canonical newline. | |
513 | ||
514 | >>> normalize_newlines('Lorem Ipsum\u2029') | |
515 | 'Lorem Ipsum\n' | |
516 | >>> normalize_newlines('Lorem Ipsum\r\n') | |
517 | 'Lorem Ipsum\n' | |
518 | >>> normalize_newlines('Lorem Ipsum\x85') | |
519 | 'Lorem Ipsum\n' | |
520 | """ | |
521 | newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] | |
522 | pattern = '|'.join(newlines) | |
523 | return re.sub(pattern, '\n', text) | |
524 | ||
525 | ||
526 | def _nonblank(str): | |
527 | return str and not str.startswith('#') | |
528 | ||
529 | ||
530 | @functools.singledispatch | |
531 | def yield_lines(iterable): | |
532 | r""" | |
533 | Yield valid lines of a string or iterable. | |
534 | ||
535 | >>> list(yield_lines('')) | |
536 | [] | |
537 | >>> list(yield_lines(['foo', 'bar'])) | |
538 | ['foo', 'bar'] | |
539 | >>> list(yield_lines('foo\nbar')) | |
540 | ['foo', 'bar'] | |
541 | >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) | |
542 | ['foo', 'baz #comment'] | |
543 | >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) | |
544 | ['foo', 'bar', 'baz', 'bing'] | |
545 | """ | |
546 | return itertools.chain.from_iterable(map(yield_lines, iterable)) | |
547 | ||
548 | ||
549 | @yield_lines.register(str) | |
550 | def _(text): | |
551 | return filter(_nonblank, map(str.strip, text.splitlines())) | |
552 | ||
553 | ||
554 | def drop_comment(line): | |
555 | """ | |
556 | Drop comments. | |
557 | ||
558 | >>> drop_comment('foo # bar') | |
559 | 'foo' | |
560 | ||
561 | A hash without a space may be in a URL. | |
562 | ||
563 | >>> drop_comment('http://example.com/foo#bar') | |
564 | 'http://example.com/foo#bar' | |
565 | """ | |
566 | return line.partition(' #')[0] | |
567 | ||
568 | ||
569 | def join_continuation(lines): | |
570 | r""" | |
571 | Join lines continued by a trailing backslash. | |
572 | ||
573 | >>> list(join_continuation(['foo \\', 'bar', 'baz'])) | |
574 | ['foobar', 'baz'] | |
575 | >>> list(join_continuation(['foo \\', 'bar', 'baz'])) | |
576 | ['foobar', 'baz'] | |
577 | >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) | |
578 | ['foobarbaz'] | |
579 | ||
580 | Not sure why, but... | |
581 | The character preceeding the backslash is also elided. | |
582 | ||
583 | >>> list(join_continuation(['goo\\', 'dly'])) | |
584 | ['godly'] | |
585 | ||
586 | A terrible idea, but... | |
587 | If no line is available to continue, suppress the lines. | |
588 | ||
589 | >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) | |
590 | ['foo'] | |
591 | """ | |
592 | lines = iter(lines) | |
593 | for item in lines: | |
594 | while item.endswith('\\'): | |
595 | try: | |
596 | item = item[:-2].strip() + next(lines) | |
597 | except StopIteration: | |
598 | return | |
599 | yield item |