[dlqueue.git] / venv / lib / python3.11 / site-packages / pkg_resources / _vendor / jaraco / text / __init__.py

import re
import itertools
import textwrap
import functools

try:
    from importlib.resources import files  # type: ignore
except ImportError:  # pragma: nocover
    from pkg_resources.extern.importlib_resources import files  # type: ignore

from pkg_resources.extern.jaraco.functools import compose, method_cache
from pkg_resources.extern.jaraco.context import ExceptionTrap


def substitution(old, new):
    """
    Return a function that will perform a substitution on a string
    """
    return lambda s: s.replace(old, new)


def multi_substitution(*substitutions):
    """
    Take a sequence of pairs specifying substitutions, and create
    a function that performs those substitutions.

    >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
    'baz'
    """
    substitutions = itertools.starmap(substitution, substitutions)
    # compose function applies last function first, so reverse the
    #  substitutions to get the expected order.
    substitutions = reversed(tuple(substitutions))
    return compose(*substitutions)


class FoldedCase(str):
    """
    A case insensitive string class; behaves just like str
    except compares equal when the only variation is case.

    >>> s = FoldedCase('hello world')

    >>> s == 'Hello World'
    True

    >>> 'Hello World' == s
    True

    >>> s != 'Hello World'
    False

    >>> s.index('O')
    4

    >>> s.split('O')
    ['hell', ' w', 'rld']

    >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
    ['alpha', 'Beta', 'GAMMA']

    Sequence membership is straightforward.

    >>> "Hello World" in [s]
    True
    >>> s in ["Hello World"]
    True

    You may test for set inclusion, but candidate and elements
    must both be folded.

    >>> FoldedCase("Hello World") in {s}
    True
    >>> s in {FoldedCase("Hello World")}
    True

    String inclusion works as long as the FoldedCase object
    is on the right.

    >>> "hello" in FoldedCase("Hello World")
    True

    But not if the FoldedCase object is on the left:

    >>> FoldedCase('hello') in 'Hello World'
    False

    In that case, use ``in_``:

    >>> FoldedCase('hello').in_('Hello World')
    True

    >>> FoldedCase('hello') > FoldedCase('Hello')
    False
    """

    def __lt__(self, other):
        return self.lower() < other.lower()

    def __gt__(self, other):
        return self.lower() > other.lower()

    def __eq__(self, other):
        return self.lower() == other.lower()

    def __ne__(self, other):
        return self.lower() != other.lower()

    def __hash__(self):
        return hash(self.lower())

    def __contains__(self, other):
        return super().lower().__contains__(other.lower())

    def in_(self, other):
        "Does self appear in other?"
        return self in FoldedCase(other)

    # cache lower since it's likely to be called frequently.
    @method_cache
    def lower(self):
        return super().lower()

    def index(self, sub):
        return self.lower().index(sub.lower())

    def split(self, splitter=' ', maxsplit=0):
        pattern = re.compile(re.escape(splitter), re.I)
        return pattern.split(self, maxsplit)


# Python 3.8 compatibility
_unicode_trap = ExceptionTrap(UnicodeDecodeError)


@_unicode_trap.passes
def is_decodable(value):
    r"""
    Return True if the supplied value is decodable (using the default
    encoding).

    >>> is_decodable(b'\xff')
    False
    >>> is_decodable(b'\x32')
    True
    """
    value.decode()


def is_binary(value):
    r"""
    Return True if the value appears to be binary (that is, it's a byte
    string and isn't decodable).

    >>> is_binary(b'\xff')
    True
    >>> is_binary('\xff')
    False
    """
    return isinstance(value, bytes) and not is_decodable(value)


def trim(s):
    r"""
    Trim something like a docstring to remove the whitespace that
    is common due to indentation and formatting.

    >>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
    'foo = bar\n\tbar = baz'
    """
    return textwrap.dedent(s).strip()


def wrap(s):
    """
    Wrap lines of text, retaining existing newlines as
    paragraph markers.

    >>> print(wrap(lorem_ipsum))
    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
    eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
    minim veniam, quis nostrud exercitation ullamco laboris nisi ut
    aliquip ex ea commodo consequat. Duis aute irure dolor in
    reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
    pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
    culpa qui officia deserunt mollit anim id est laborum.
    <BLANKLINE>
    Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
    varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
    magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
    gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
    risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
    eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
    fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
    a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
    neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
    sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
    nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
    quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
    molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
    """
    paragraphs = s.splitlines()
    wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
    return '\n\n'.join(wrapped)


def unwrap(s):
    r"""
    Given a multi-line string, return an unwrapped version.

    >>> wrapped = wrap(lorem_ipsum)
    >>> wrapped.count('\n')
    20
    >>> unwrapped = unwrap(wrapped)
    >>> unwrapped.count('\n')
    1
    >>> print(unwrapped)
    Lorem ipsum dolor sit amet, consectetur adipiscing ...
    Curabitur pretium tincidunt lacus. Nulla gravida orci ...

    """
    paragraphs = re.split(r'\n\n+', s)
    cleaned = (para.replace('\n', ' ') for para in paragraphs)
    return '\n'.join(cleaned)


class Splitter(object):
    """object that will split a string with the given arguments for each call

    >>> s = Splitter(',')
    >>> s('hello, world, this is your, master calling')
    ['hello', ' world', ' this is your', ' master calling']
    """

    def __init__(self, *args):
        self.args = args

    def __call__(self, s):
        return s.split(*self.args)


def indent(string, prefix=' ' * 4):
    """
    >>> indent('foo')
    '    foo'
    """
    return prefix + string


class WordSet(tuple):
    """
    Given an identifier, return the words that identifier represents,
    whether in camel case, underscore-separated, etc.

    >>> WordSet.parse("camelCase")
    ('camel', 'Case')

    >>> WordSet.parse("under_sep")
    ('under', 'sep')

    Acronyms should be retained

    >>> WordSet.parse("firstSNL")
    ('first', 'SNL')

    >>> WordSet.parse("you_and_I")
    ('you', 'and', 'I')

    >>> WordSet.parse("A simple test")
    ('A', 'simple', 'test')

    Multiple caps should not interfere with the first cap of another word.

    >>> WordSet.parse("myABCClass")
    ('my', 'ABC', 'Class')

    The result is a WordSet, so you can get the form you need.

    >>> WordSet.parse("myABCClass").underscore_separated()
    'my_ABC_Class'

    >>> WordSet.parse('a-command').camel_case()
    'ACommand'

    >>> WordSet.parse('someIdentifier').lowered().space_separated()
    'some identifier'

    Slices of the result should return another WordSet.

    >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
    'out_of_context'

    >>> WordSet.from_class_name(WordSet()).lowered().space_separated()
    'word set'

    >>> example = WordSet.parse('figured it out')
    >>> example.headless_camel_case()
    'figuredItOut'
    >>> example.dash_separated()
    'figured-it-out'

    """

    _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')

    def capitalized(self):
        return WordSet(word.capitalize() for word in self)

    def lowered(self):
        return WordSet(word.lower() for word in self)

    def camel_case(self):
        return ''.join(self.capitalized())

    def headless_camel_case(self):
        words = iter(self)
        first = next(words).lower()
        new_words = itertools.chain((first,), WordSet(words).camel_case())
        return ''.join(new_words)

    def underscore_separated(self):
        return '_'.join(self)

    def dash_separated(self):
        return '-'.join(self)

    def space_separated(self):
        return ' '.join(self)

    def trim_right(self, item):
        """
        Remove the item from the end of the set.

        >>> WordSet.parse('foo bar').trim_right('foo')
        ('foo', 'bar')
        >>> WordSet.parse('foo bar').trim_right('bar')
        ('foo',)
        >>> WordSet.parse('').trim_right('bar')
        ()
        """
        return self[:-1] if self and self[-1] == item else self

    def trim_left(self, item):
        """
        Remove the item from the beginning of the set.

        >>> WordSet.parse('foo bar').trim_left('foo')
        ('bar',)
        >>> WordSet.parse('foo bar').trim_left('bar')
        ('foo', 'bar')
        >>> WordSet.parse('').trim_left('bar')
        ()
        """
        return self[1:] if self and self[0] == item else self

    def trim(self, item):
        """
        >>> WordSet.parse('foo bar').trim('foo')
        ('bar',)
        """
        return self.trim_left(item).trim_right(item)

    def __getitem__(self, item):
        result = super(WordSet, self).__getitem__(item)
        if isinstance(item, slice):
            result = WordSet(result)
        return result

    @classmethod
    def parse(cls, identifier):
        matches = cls._pattern.finditer(identifier)
        return WordSet(match.group(0) for match in matches)

    @classmethod
    def from_class_name(cls, subject):
        return cls.parse(subject.__class__.__name__)


# for backward compatibility
words = WordSet.parse


def simple_html_strip(s):
    r"""
    Remove HTML from the string `s`.

    >>> str(simple_html_strip(''))
    ''

    >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
    A stormy day in paradise

    >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
    Somebody  tell the truth.

    >>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
    What about
    multiple lines?
    """
    html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL)
    texts = (match.group(3) or '' for match in html_stripper.finditer(s))
    return ''.join(texts)


class SeparatedValues(str):
    """
    A string separated by a separator. Overrides __iter__ for getting
    the values.

    >>> list(SeparatedValues('a,b,c'))
    ['a', 'b', 'c']

    Whitespace is stripped and empty values are discarded.

    >>> list(SeparatedValues(' a,   b   , c,  '))
    ['a', 'b', 'c']
    """

    separator = ','

    def __iter__(self):
        parts = self.split(self.separator)
        return filter(None, (part.strip() for part in parts))


class Stripper:
    r"""
    Given a series of lines, find the common prefix and strip it from them.

    >>> lines = [
    ...     'abcdefg\n',
    ...     'abc\n',
    ...     'abcde\n',
    ... ]
    >>> res = Stripper.strip_prefix(lines)
    >>> res.prefix
    'abc'
    >>> list(res.lines)
    ['defg\n', '\n', 'de\n']

    If no prefix is common, nothing should be stripped.

    >>> lines = [
    ...     'abcd\n',
    ...     '1234\n',
    ... ]
    >>> res = Stripper.strip_prefix(lines)
    >>> res.prefix = ''
    >>> list(res.lines)
    ['abcd\n', '1234\n']
    """

    def __init__(self, prefix, lines):
        self.prefix = prefix
        self.lines = map(self, lines)

    @classmethod
    def strip_prefix(cls, lines):
        prefix_lines, lines = itertools.tee(lines)
        prefix = functools.reduce(cls.common_prefix, prefix_lines)
        return cls(prefix, lines)

    def __call__(self, line):
        if not self.prefix:
            return line
        null, prefix, rest = line.partition(self.prefix)
        return rest

    @staticmethod
    def common_prefix(s1, s2):
        """
        Return the common prefix of two lines.
        """
        index = min(len(s1), len(s2))
        while s1[:index] != s2[:index]:
            index -= 1
        return s1[:index]


def remove_prefix(text, prefix):
    """
    Remove the prefix from the text if it exists.

    >>> remove_prefix('underwhelming performance', 'underwhelming ')
    'performance'

    >>> remove_prefix('something special', 'sample')
    'something special'
    """
    null, prefix, rest = text.rpartition(prefix)
    return rest


def remove_suffix(text, suffix):
    """
    Remove the suffix from the text if it exists.

    >>> remove_suffix('name.git', '.git')
    'name'

    >>> remove_suffix('something special', 'sample')
    'something special'
    """
    rest, suffix, null = text.partition(suffix)
    return rest


def normalize_newlines(text):
    r"""
    Replace alternate newlines with the canonical newline.

    >>> normalize_newlines('Lorem Ipsum\u2029')
    'Lorem Ipsum\n'
    >>> normalize_newlines('Lorem Ipsum\r\n')
    'Lorem Ipsum\n'
    >>> normalize_newlines('Lorem Ipsum\x85')
    'Lorem Ipsum\n'
    """
    newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029']
    pattern = '|'.join(newlines)
    return re.sub(pattern, '\n', text)


def _nonblank(str):
    return str and not str.startswith('#')


@functools.singledispatch
def yield_lines(iterable):
    r"""
    Yield valid lines of a string or iterable.

    >>> list(yield_lines(''))
    []
    >>> list(yield_lines(['foo', 'bar']))
    ['foo', 'bar']
    >>> list(yield_lines('foo\nbar'))
    ['foo', 'bar']
    >>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
    ['foo', 'baz #comment']
    >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
    ['foo', 'bar', 'baz', 'bing']
    """
    return itertools.chain.from_iterable(map(yield_lines, iterable))


@yield_lines.register(str)
def _(text):
    return filter(_nonblank, map(str.strip, text.splitlines()))


def drop_comment(line):
    """
    Drop comments.

    >>> drop_comment('foo # bar')
    'foo'

    A hash without a space may be in a URL.

    >>> drop_comment('http://example.com/foo#bar')
    'http://example.com/foo#bar'
    """
    return line.partition(' #')[0]


def join_continuation(lines):
    r"""
    Join lines continued by a trailing backslash.

    >>> list(join_continuation(['foo \\', 'bar', 'baz']))
    ['foobar', 'baz']
    >>> list(join_continuation(['foo \\', 'bar', 'baz']))
    ['foobar', 'baz']
    >>> list(join_continuation(['foo \\', 'bar \\', 'baz']))
    ['foobarbaz']

    Not sure why, but...
    The character preceeding the backslash is also elided.

    >>> list(join_continuation(['goo\\', 'dly']))
    ['godly']

    A terrible idea, but...
    If no line is available to continue, suppress the lines.

    >>> list(join_continuation(['foo', 'bar\\', 'baz\\']))
    ['foo']
    """
    lines = iter(lines)
    for item in lines:
        while item.endswith('\\'):
            try:
                item = item[:-2].strip() + next(lines)
            except StopIteration:
                return
        yield item
Commit	Line	Data
e0df8241 JR	1	import re
	2	import itertools
	3	import textwrap
	4	import functools
	5
	6	try:
	7	from importlib.resources import files # type: ignore
	8	except ImportError: # pragma: nocover
	9	from pkg_resources.extern.importlib_resources import files # type: ignore
	10
	11	from pkg_resources.extern.jaraco.functools import compose, method_cache
	12	from pkg_resources.extern.jaraco.context import ExceptionTrap
	13
	14
	15	def substitution(old, new):
	16	"""
	17	Return a function that will perform a substitution on a string
	18	"""
	19	return lambda s: s.replace(old, new)
	20
	21
	22	def multi_substitution(*substitutions):
	23	"""
	24	Take a sequence of pairs specifying substitutions, and create
	25	a function that performs those substitutions.
	26
	27	>>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
	28	'baz'
	29	"""
	30	substitutions = itertools.starmap(substitution, substitutions)
	31	# compose function applies last function first, so reverse the
	32	# substitutions to get the expected order.
	33	substitutions = reversed(tuple(substitutions))
	34	return compose(*substitutions)
	35
	36
	37	class FoldedCase(str):
	38	"""
	39	A case insensitive string class; behaves just like str
	40	except compares equal when the only variation is case.
	41
	42	>>> s = FoldedCase('hello world')
	43
	44	>>> s == 'Hello World'
	45	True
	46
	47	>>> 'Hello World' == s
	48	True
	49
	50	>>> s != 'Hello World'
	51	False
	52
	53	>>> s.index('O')
	54	4
	55
	56	>>> s.split('O')
	57	['hell', ' w', 'rld']
	58
	59	>>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
	60	['alpha', 'Beta', 'GAMMA']
	61
	62	Sequence membership is straightforward.
	63
	64	>>> "Hello World" in [s]
65	True
66	>>> s in ["Hello World"]
67	True
68
69	You may test for set inclusion, but candidate and elements
70	must both be folded.
71
72	>>> FoldedCase("Hello World") in {s}
73	True
74	>>> s in {FoldedCase("Hello World")}
75	True
76
77	String inclusion works as long as the FoldedCase object
78	is on the right.
79
80	>>> "hello" in FoldedCase("Hello World")
81	True
82
83	But not if the FoldedCase object is on the left:
84
85	>>> FoldedCase('hello') in 'Hello World'
86	False
87
88	In that case, use ``in_``:
89
90	>>> FoldedCase('hello').in_('Hello World')
91	True
92
93	>>> FoldedCase('hello') > FoldedCase('Hello')
94	False
95	"""
96
97	def __lt__(self, other):
98	return self.lower() < other.lower()
99
100	def __gt__(self, other):
101	return self.lower() > other.lower()
102
103	def __eq__(self, other):
104	return self.lower() == other.lower()
105
106	def __ne__(self, other):
107	return self.lower() != other.lower()
108
109	def __hash__(self):
110	return hash(self.lower())
111
112	def __contains__(self, other):
113	return super().lower().__contains__(other.lower())
114
115	def in_(self, other):
116	"Does self appear in other?"
117	return self in FoldedCase(other)
118
119	# cache lower since it's likely to be called frequently.
120	@method_cache
121	def lower(self):
122	return super().lower()
123
124	def index(self, sub):
125	return self.lower().index(sub.lower())
126
127	def split(self, splitter=' ', maxsplit=0):
128	pattern = re.compile(re.escape(splitter), re.I)
129	return pattern.split(self, maxsplit)
130
131
132	# Python 3.8 compatibility
133	_unicode_trap = ExceptionTrap(UnicodeDecodeError)
134
135
136	@_unicode_trap.passes
137	def is_decodable(value):
138	r"""
139	Return True if the supplied value is decodable (using the default
140	encoding).
141
142	>>> is_decodable(b'\xff')
143	False
144	>>> is_decodable(b'\x32')
145	True
146	"""
147	value.decode()
148
149
150	def is_binary(value):
151	r"""
152	Return True if the value appears to be binary (that is, it's a byte
153	string and isn't decodable).
154
155	>>> is_binary(b'\xff')
156	True
157	>>> is_binary('\xff')
158	False
159	"""
160	return isinstance(value, bytes) and not is_decodable(value)
161
162
163	def trim(s):
164	r"""
165	Trim something like a docstring to remove the whitespace that
166	is common due to indentation and formatting.
167
168	>>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
169	'foo = bar\n\tbar = baz'
170	"""
171	return textwrap.dedent(s).strip()
172
173
174	def wrap(s):
175	"""
176	Wrap lines of text, retaining existing newlines as
177	paragraph markers.
178
179	>>> print(wrap(lorem_ipsum))
180	Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
181	eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
182	minim veniam, quis nostrud exercitation ullamco laboris nisi ut
183	aliquip ex ea commodo consequat. Duis aute irure dolor in
184	reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
185	pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
186	culpa qui officia deserunt mollit anim id est laborum.
187	<BLANKLINE>
188	Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
189	varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
190	magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
191	gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
192	risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
193	eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
194	fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
195	a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
196	neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
197	sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
198	nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
199	quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
200	molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
201	"""
202	paragraphs = s.splitlines()
203	wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
204	return '\n\n'.join(wrapped)
205
206
207	def unwrap(s):
208	r"""
209	Given a multi-line string, return an unwrapped version.
210
211	>>> wrapped = wrap(lorem_ipsum)
212	>>> wrapped.count('\n')
213	20
214	>>> unwrapped = unwrap(wrapped)
215	>>> unwrapped.count('\n')
216	1
217	>>> print(unwrapped)
218	Lorem ipsum dolor sit amet, consectetur adipiscing ...
219	Curabitur pretium tincidunt lacus. Nulla gravida orci ...
220
221	"""
222	paragraphs = re.split(r'\n\n+', s)
223	cleaned = (para.replace('\n', ' ') for para in paragraphs)
224	return '\n'.join(cleaned)
225
226
227
228
229	class Splitter(object):
230	"""object that will split a string with the given arguments for each call
231
232	>>> s = Splitter(',')
233	>>> s('hello, world, this is your, master calling')
234	['hello', ' world', ' this is your', ' master calling']
235	"""
236
237	def __init__(self, *args):
238	self.args = args
239
240	def __call__(self, s):
241	return s.split(*self.args)
242
243
244	def indent(string, prefix=' ' * 4):
245	"""
246	>>> indent('foo')
247	' foo'
248	"""
249	return prefix + string
250
251
252	class WordSet(tuple):
253	"""
254	Given an identifier, return the words that identifier represents,
255	whether in camel case, underscore-separated, etc.
256
257	>>> WordSet.parse("camelCase")
258	('camel', 'Case')
259
260	>>> WordSet.parse("under_sep")
261	('under', 'sep')
262
263	Acronyms should be retained
264
265	>>> WordSet.parse("firstSNL")
266	('first', 'SNL')
267
268	>>> WordSet.parse("you_and_I")
269	('you', 'and', 'I')
270
271	>>> WordSet.parse("A simple test")
272	('A', 'simple', 'test')
273
274	Multiple caps should not interfere with the first cap of another word.
275
276	>>> WordSet.parse("myABCClass")
277	('my', 'ABC', 'Class')
278
279	The result is a WordSet, so you can get the form you need.
280
281	>>> WordSet.parse("myABCClass").underscore_separated()
282	'my_ABC_Class'
283
284	>>> WordSet.parse('a-command').camel_case()
285	'ACommand'
286
287	>>> WordSet.parse('someIdentifier').lowered().space_separated()
288	'some identifier'
289
290	Slices of the result should return another WordSet.
291
292	>>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
293	'out_of_context'
294
295	>>> WordSet.from_class_name(WordSet()).lowered().space_separated()
296	'word set'
297
298	>>> example = WordSet.parse('figured it out')
299	>>> example.headless_camel_case()
300	'figuredItOut'
301	>>> example.dash_separated()
302	'figured-it-out'
303
304	"""
305
306	_pattern = re.compile('([A-Z]?[a-z]+)\|([A-Z]+(?![a-z]))')
307
308	def capitalized(self):
309	return WordSet(word.capitalize() for word in self)
310
311	def lowered(self):
312	return WordSet(word.lower() for word in self)
313
314	def camel_case(self):
315	return ''.join(self.capitalized())
316
317	def headless_camel_case(self):
318	words = iter(self)
319	first = next(words).lower()
320	new_words = itertools.chain((first,), WordSet(words).camel_case())
321	return ''.join(new_words)
322
323	def underscore_separated(self):
324	return '_'.join(self)
325
326	def dash_separated(self):
327	return '-'.join(self)
328
329	def space_separated(self):
330	return ' '.join(self)
331
332	def trim_right(self, item):
333	"""
334	Remove the item from the end of the set.
335
336	>>> WordSet.parse('foo bar').trim_right('foo')
337	('foo', 'bar')
338	>>> WordSet.parse('foo bar').trim_right('bar')
339	('foo',)
340	>>> WordSet.parse('').trim_right('bar')
341	()
342	"""
343	return self[:-1] if self and self[-1] == item else self
344
345	def trim_left(self, item):
346	"""
347	Remove the item from the beginning of the set.
348
349	>>> WordSet.parse('foo bar').trim_left('foo')
350	('bar',)
351	>>> WordSet.parse('foo bar').trim_left('bar')
352	('foo', 'bar')
353	>>> WordSet.parse('').trim_left('bar')
354	()
355	"""
356	return self[1:] if self and self[0] == item else self
357
358	def trim(self, item):
359	"""
360	>>> WordSet.parse('foo bar').trim('foo')
361	('bar',)
362	"""
363	return self.trim_left(item).trim_right(item)
364
365	def __getitem__(self, item):
366	result = super(WordSet, self).__getitem__(item)
367	if isinstance(item, slice):
368	result = WordSet(result)
369	return result
370
371	@classmethod
372	def parse(cls, identifier):
373	matches = cls._pattern.finditer(identifier)
374	return WordSet(match.group(0) for match in matches)
375
376	@classmethod
377	def from_class_name(cls, subject):
378	return cls.parse(subject.__class__.__name__)
379
380
381	# for backward compatibility
382	words = WordSet.parse
383
384
385	def simple_html_strip(s):
386	r"""
387	Remove HTML from the string `s`.
388
389	>>> str(simple_html_strip(''))
390	''
391
392	>>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
393	A stormy day in paradise
394
395	>>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
396	Somebody tell the truth.
397
398	>>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
399	What about
400	multiple lines?
401	"""
402	html_stripper = re.compile('(<!--.?-->)\|(<[^>]>)\|([^<]+)', re.DOTALL)
403	texts = (match.group(3) or '' for match in html_stripper.finditer(s))
404	return ''.join(texts)
405
406
407	class SeparatedValues(str):
408	"""
409	A string separated by a separator. Overrides __iter__ for getting
410	the values.
411
412	>>> list(SeparatedValues('a,b,c'))
413	['a', 'b', 'c']
414
415	Whitespace is stripped and empty values are discarded.
416
417	>>> list(SeparatedValues(' a, b , c, '))
418	['a', 'b', 'c']
419	"""
420
421	separator = ','
422
423	def __iter__(self):
424	parts = self.split(self.separator)
425	return filter(None, (part.strip() for part in parts))
426
427
428	class Stripper:
429	r"""
430	Given a series of lines, find the common prefix and strip it from them.
431
432	>>> lines = [
433	... 'abcdefg\n',
434	... 'abc\n',
435	... 'abcde\n',
436	... ]
437	>>> res = Stripper.strip_prefix(lines)
438	>>> res.prefix
439	'abc'
440	>>> list(res.lines)
441	['defg\n', '\n', 'de\n']
442
443	If no prefix is common, nothing should be stripped.
444
445	>>> lines = [
446	... 'abcd\n',
447	... '1234\n',
448	... ]
449	>>> res = Stripper.strip_prefix(lines)
450	>>> res.prefix = ''
451	>>> list(res.lines)
452	['abcd\n', '1234\n']
453	"""
454
455	def __init__(self, prefix, lines):
456	self.prefix = prefix
457	self.lines = map(self, lines)
458
459	@classmethod
460	def strip_prefix(cls, lines):
461	prefix_lines, lines = itertools.tee(lines)
462	prefix = functools.reduce(cls.common_prefix, prefix_lines)
463	return cls(prefix, lines)
464
465	def __call__(self, line):
466	if not self.prefix:
467	return line
468	null, prefix, rest = line.partition(self.prefix)
469	return rest
470
471	@staticmethod
472	def common_prefix(s1, s2):
473	"""
474	Return the common prefix of two lines.
475	"""
476	index = min(len(s1), len(s2))
477	while s1[:index] != s2[:index]:
478	index -= 1
479	return s1[:index]
480
481
482	def remove_prefix(text, prefix):
483	"""
484	Remove the prefix from the text if it exists.
485
486	>>> remove_prefix('underwhelming performance', 'underwhelming ')
487	'performance'
488
489	>>> remove_prefix('something special', 'sample')
490	'something special'
491	"""
492	null, prefix, rest = text.rpartition(prefix)
493	return rest
494
495
496	def remove_suffix(text, suffix):
497	"""
498	Remove the suffix from the text if it exists.
499
500	>>> remove_suffix('name.git', '.git')
501	'name'
502
503	>>> remove_suffix('something special', 'sample')
504	'something special'
505	"""
506	rest, suffix, null = text.partition(suffix)
507	return rest
508
509
510	def normalize_newlines(text):
511	r"""
512	Replace alternate newlines with the canonical newline.
513
514	>>> normalize_newlines('Lorem Ipsum\u2029')
515	'Lorem Ipsum\n'
516	>>> normalize_newlines('Lorem Ipsum\r\n')
517	'Lorem Ipsum\n'
518	>>> normalize_newlines('Lorem Ipsum\x85')
519	'Lorem Ipsum\n'
520	"""
521	newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029']
522	pattern = '\|'.join(newlines)
523	return re.sub(pattern, '\n', text)
524
525
526	def _nonblank(str):
527	return str and not str.startswith('#')
528
529
530	@functools.singledispatch
531	def yield_lines(iterable):
532	r"""
533	Yield valid lines of a string or iterable.
534
535	>>> list(yield_lines(''))
536	[]
537	>>> list(yield_lines(['foo', 'bar']))
538	['foo', 'bar']
539	>>> list(yield_lines('foo\nbar'))
540	['foo', 'bar']
541	>>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
542	['foo', 'baz #comment']
543	>>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
544	['foo', 'bar', 'baz', 'bing']
545	"""
546	return itertools.chain.from_iterable(map(yield_lines, iterable))
547
548
549	@yield_lines.register(str)
550	def _(text):
551	return filter(_nonblank, map(str.strip, text.splitlines()))
552
553
554	def drop_comment(line):
555	"""
556	Drop comments.
557
558	>>> drop_comment('foo # bar')
559	'foo'
560
561	A hash without a space may be in a URL.
562
563	>>> drop_comment('http://example.com/foo#bar')
564	'http://example.com/foo#bar'
565	"""
566	return line.partition(' #')[0]
567
568
569	def join_continuation(lines):
570	r"""
571	Join lines continued by a trailing backslash.
572
573	>>> list(join_continuation(['foo \\', 'bar', 'baz']))
574	['foobar', 'baz']
575	>>> list(join_continuation(['foo \\', 'bar', 'baz']))
576	['foobar', 'baz']
577	>>> list(join_continuation(['foo \\', 'bar \\', 'baz']))
578	['foobarbaz']
579
580	Not sure why, but...
581	The character preceeding the backslash is also elided.
582
583	>>> list(join_continuation(['goo\\', 'dly']))
584	['godly']
585
586	A terrible idea, but...
587	If no line is available to continue, suppress the lines.
588
589	>>> list(join_continuation(['foo', 'bar\\', 'baz\\']))
590	['foo']
591	"""
592	lines = iter(lines)
593	for item in lines:
594	while item.endswith('\\'):
595	try:
596	item = item[:-2].strip() + next(lines)
597	except StopIteration:
598	return
599	yield item