[yt-dlp.git] / yt_dlp / webvtt.py

"""
A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
to be able to assemble a single stand-alone subtitle file, suitably adjusting
timestamps on the way, while everything else is passed through unmodified.

Regular expressions based on the W3C WebVTT specification
<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
"""

import io
import re

from .utils import int_or_none, timetuple_from_msec


class _MatchParser:
    """
    An object that maintains the current parsing position and allows
    conveniently advancing it as syntax elements are successfully parsed.
    """

    def __init__(self, string):
        self._data = string
        self._pos = 0

    def match(self, r):
        if isinstance(r, re.Pattern):
            return r.match(self._data, self._pos)
        if isinstance(r, str):
            if self._data.startswith(r, self._pos):
                return len(r)
            return None
        raise ValueError(r)

    def advance(self, by):
        if by is None:
            amt = 0
        elif isinstance(by, re.Match):
            amt = len(by.group(0))
        elif isinstance(by, str):
            amt = len(by)
        elif isinstance(by, int):
            amt = by
        else:
            raise ValueError(by)
        self._pos += amt
        return by

    def consume(self, r):
        return self.advance(self.match(r))

    def child(self):
        return _MatchChildParser(self)


class _MatchChildParser(_MatchParser):
    """
    A child parser state, which advances through the same data as
    its parent, but has an independent position. This is useful when
    advancing through syntax elements we might later want to backtrack
    from.
    """

    def __init__(self, parent):
        super().__init__(parent._data)
        self.__parent = parent
        self._pos = parent._pos

    def commit(self):
        """
        Advance the parent state to the current position of this child state.
        """
        self.__parent._pos = self._pos
        return self.__parent


class ParseError(Exception):
    def __init__(self, parser):
        data = parser._data[parser._pos:parser._pos + 100]
        super().__init__(f'Parse error at position {parser._pos} (near {data!r})')


# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp>
# prescribes that hours must be *2 or more* digits, timestamps with a single
# digit for the hour part has been seen in the wild.
# See https://github.com/yt-dlp/yt-dlp/issues/921
_REGEX_TS = re.compile(r'''(?x)
    (?:([0-9]{1,}):)?
    ([0-9]{2}):
    ([0-9]{2})\.
    ([0-9]{3})?
''')
_REGEX_EOF = re.compile(r'\Z')
_REGEX_NL = re.compile(r'(?:\r\n|[\r\n]|$)')
_REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+')
_REGEX_OPTIONAL_WHITESPACE = re.compile(r'[ \t]*')


def _parse_ts(ts):
    """
    Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
    into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
    """
    return 90 * sum(
        int(part or 0) * mult for part, mult in zip(ts.groups(), (3600_000, 60_000, 1000, 1)))


def _format_ts(ts):
    """
    Convert an MPEG PES timestamp into a WebVTT timestamp.
    This will lose sub-millisecond precision.
    """
    return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90))


class Block:
    """
    An abstract WebVTT block.
    """

    def __init__(self, **kwargs):
        for key, val in kwargs.items():
            setattr(self, key, val)

    @classmethod
    def parse(cls, parser):
        m = parser.match(cls._REGEX)
        if not m:
            return None
        parser.advance(m)
        return cls(raw=m.group(0))

    def write_into(self, stream):
        stream.write(self.raw)


class HeaderBlock(Block):
    """
    A WebVTT block that may only appear in the header part of the file,
    i.e. before any cue blocks.
    """
    pass


class Magic(HeaderBlock):
    _REGEX = re.compile(r'\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])')

    # XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5
    # <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC
    # doesn't specify the exact grammar nor where in the WebVTT
    # syntax it should be placed; the below has been devised based
    # on usage in the wild
    #
    # And strictly speaking, the presence of this extension violates
    # the W3C WebVTT spec. Oh well.

    _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
    _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
    _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
    _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')

    # This was removed from the spec in the 2017 revision;
    # the last spec draft to describe this syntax element is
    # <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>.
    # Nevertheless, YouTube keeps serving those
    _REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])')

    @classmethod
    def __parse_tsmap(cls, parser):
        parser = parser.child()

        while True:
            m = parser.consume(cls._REGEX_TSMAP_LOCAL)
            if m:
                m = parser.consume(_REGEX_TS)
                if m is None:
                    raise ParseError(parser)
                local = _parse_ts(m)
                if local is None:
                    raise ParseError(parser)
            else:
                m = parser.consume(cls._REGEX_TSMAP_MPEGTS)
                if m:
                    mpegts = int_or_none(m.group(1))
                    if mpegts is None:
                        raise ParseError(parser)
                else:
                    raise ParseError(parser)
            if parser.consume(cls._REGEX_TSMAP_SEP):
                continue
            if parser.consume(_REGEX_NL):
                break
            raise ParseError(parser)

        parser.commit()
        return local, mpegts

    @classmethod
    def parse(cls, parser):
        parser = parser.child()

        m = parser.consume(cls._REGEX)
        if not m:
            raise ParseError(parser)

        extra = m.group(1)
        local, mpegts, meta = None, None, ''
        while not parser.consume(_REGEX_NL):
            if parser.consume(cls._REGEX_TSMAP):
                local, mpegts = cls.__parse_tsmap(parser)
                continue
            m = parser.consume(cls._REGEX_META)
            if m:
                meta += m.group(0)
                continue
            raise ParseError(parser)
        parser.commit()
        return cls(extra=extra, mpegts=mpegts, local=local, meta=meta)

    def write_into(self, stream):
        stream.write('WEBVTT')
        if self.extra is not None:
            stream.write(self.extra)
        stream.write('\n')
        if self.local or self.mpegts:
            stream.write('X-TIMESTAMP-MAP=LOCAL:')
            stream.write(_format_ts(self.local if self.local is not None else 0))
            stream.write(',MPEGTS:')
            stream.write(str(self.mpegts if self.mpegts is not None else 0))
            stream.write('\n')
        if self.meta:
            stream.write(self.meta)
        stream.write('\n')


class StyleBlock(HeaderBlock):
    _REGEX = re.compile(r'''(?x)
        STYLE[\ \t]*(?:\r\n|[\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    ''')


class RegionBlock(HeaderBlock):
    _REGEX = re.compile(r'''(?x)
        REGION[\ \t]*
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    ''')


class CommentBlock(Block):
    _REGEX = re.compile(r'''(?x)
        NOTE(?:\r\n|[\ \t\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    ''')


class CueBlock(Block):
    """
    A cue block. The payload is not interpreted.
    """

    _REGEX_ID = re.compile(r'((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])')
    _REGEX_ARROW = re.compile(r'[ \t]+-->[ \t]+')
    _REGEX_SETTINGS = re.compile(r'[ \t]+((?:(?!-->)[^\r\n])+)')
    _REGEX_PAYLOAD = re.compile(r'[^\r\n]+(?:\r\n|[\r\n])?')

    @classmethod
    def parse(cls, parser):
        parser = parser.child()

        id_ = None
        m = parser.consume(cls._REGEX_ID)
        if m:
            id_ = m.group(1)

        m0 = parser.consume(_REGEX_TS)
        if not m0:
            return None
        if not parser.consume(cls._REGEX_ARROW):
            return None
        m1 = parser.consume(_REGEX_TS)
        if not m1:
            return None
        m2 = parser.consume(cls._REGEX_SETTINGS)
        parser.consume(_REGEX_OPTIONAL_WHITESPACE)
        if not parser.consume(_REGEX_NL):
            return None

        start = _parse_ts(m0)
        end = _parse_ts(m1)
        settings = m2.group(1) if m2 is not None else None

        text = io.StringIO()
        while True:
            m = parser.consume(cls._REGEX_PAYLOAD)
            if not m:
                break
            text.write(m.group(0))

        parser.commit()
        return cls(
            id=id_,
            start=start, end=end, settings=settings,
            text=text.getvalue(),
        )

    def write_into(self, stream):
        if self.id is not None:
            stream.write(self.id)
            stream.write('\n')
        stream.write(_format_ts(self.start))
        stream.write(' --> ')
        stream.write(_format_ts(self.end))
        if self.settings is not None:
            stream.write(' ')
            stream.write(self.settings)
        stream.write('\n')
        stream.write(self.text)
        stream.write('\n')

    @property
    def as_json(self):
        return {
            'id': self.id,
            'start': self.start,
            'end': self.end,
            'text': self.text,
            'settings': self.settings,
        }

    def __eq__(self, other):
        return self.as_json == other.as_json

    @classmethod
    def from_json(cls, json):
        return cls(
            id=json['id'],
            start=json['start'],
            end=json['end'],
            text=json['text'],
            settings=json['settings'],
        )

    def hinges(self, other):
        if self.text != other.text:
            return False
        if self.settings != other.settings:
            return False
        return self.start <= self.end == other.start <= other.end


def parse_fragment(frag_content):
    """
    A generator that yields (partially) parsed WebVTT blocks when given
    a bytes object containing the raw contents of a WebVTT file.
    """

    parser = _MatchParser(frag_content.decode())

    yield Magic.parse(parser)

    while not parser.match(_REGEX_EOF):
        if parser.consume(_REGEX_BLANK):
            continue

        block = RegionBlock.parse(parser)
        if block:
            yield block
            continue
        block = StyleBlock.parse(parser)
        if block:
            yield block
            continue
        block = CommentBlock.parse(parser)
        if block:
            yield block  # XXX: or skip
            continue

        break

    while not parser.match(_REGEX_EOF):
        if parser.consume(_REGEX_BLANK):
            continue

        block = CommentBlock.parse(parser)
        if block:
            yield block  # XXX: or skip
            continue
        block = CueBlock.parse(parser)
        if block:
            yield block
            continue

        raise ParseError(parser)
Commit	Line	Data
4a2f19ab F	1	"""
	2	A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
	3	to be able to assemble a single stand-alone subtitle file, suitably adjusting
	4	timestamps on the way, while everything else is passed through unmodified.
	5
	6	Regular expressions based on the W3C WebVTT specification
	7	<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
	8	in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
	9	"""
	10
4a2f19ab	11	import io
6929b41a	12	import re
f8271158	13
aa7785f8	14	from .utils import int_or_none, timetuple_from_msec
4a2f19ab F	15
4a2f19ab F	16
86e5f3ed	17	class _MatchParser:
4a2f19ab F	18	"""
	19	An object that maintains the current parsing position and allows
	20	conveniently advancing it as syntax elements are successfully parsed.
	21	"""
	22
	23	def __init__(self, string):
	24	self._data = string
	25	self._pos = 0
	26
	27	def match(self, r):
77f90330	28	if isinstance(r, re.Pattern):
4a2f19ab F	29	return r.match(self._data, self._pos)
	30	if isinstance(r, str):
	31	if self._data.startswith(r, self._pos):
	32	return len(r)
	33	return None
	34	raise ValueError(r)
	35
	36	def advance(self, by):
	37	if by is None:
	38	amt = 0
77f90330	39	elif isinstance(by, re.Match):
4a2f19ab F	40	amt = len(by.group(0))
	41	elif isinstance(by, str):
	42	amt = len(by)
	43	elif isinstance(by, int):
	44	amt = by
	45	else:
	46	raise ValueError(by)
	47	self._pos += amt
	48	return by
	49
	50	def consume(self, r):
	51	return self.advance(self.match(r))
	52
	53	def child(self):
	54	return _MatchChildParser(self)
	55
	56
	57	class _MatchChildParser(_MatchParser):
	58	"""
	59	A child parser state, which advances through the same data as
	60	its parent, but has an independent position. This is useful when
	61	advancing through syntax elements we might later want to backtrack
	62	from.
	63	"""
	64
	65	def __init__(self, parent):
86e5f3ed	66	super().__init__(parent._data)
4a2f19ab F	67	self.__parent = parent
	68	self._pos = parent._pos
	69
	70	def commit(self):
	71	"""
	72	Advance the parent state to the current position of this child state.
	73	"""
	74	self.__parent._pos = self._pos
	75	return self.__parent
	76
	77
	78	class ParseError(Exception):
	79	def __init__(self, parser):
add96eb9	80	data = parser._data[parser._pos:parser._pos + 100]
add96eb9	81	super().__init__(f'Parse error at position {parser._pos} (near {data!r})')
4a2f19ab F	82
4a2f19ab F	83
81a136b8	84	# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp>
	85	# prescribes that hours must be 2 or more digits, timestamps with a single
	86	# digit for the hour part has been seen in the wild.
	87	# See https://github.com/yt-dlp/yt-dlp/issues/921
4a2f19ab	88	_REGEX_TS = re.compile(r'''(?x)
81a136b8	89	(?:([0-9]{1,}):)?
4a2f19ab F	90	([0-9]{2}):
	91	([0-9]{2})\.
	92	([0-9]{3})?
	93	''')
	94	_REGEX_EOF = re.compile(r'\Z')
f352a097	95	_REGEX_NL = re.compile(r'(?:\r\n\|[\r\n]\|$)')
4a2f19ab	96	_REGEX_BLANK = re.compile(r'(?:\r\n\|[\r\n])+')
15f22b48	97	_REGEX_OPTIONAL_WHITESPACE = re.compile(r'[ \t]*')
4a2f19ab F	98
	99
	100	def _parse_ts(ts):
	101	"""
	102	Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
	103	into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
	104	"""
19a03940	105	return 90 * sum(
19a03940	106	int(part or 0) * mult for part, mult in zip(ts.groups(), (3600_000, 60_000, 1000, 1)))
4a2f19ab F	107
	108
	109	def _format_ts(ts):
	110	"""
	111	Convert an MPEG PES timestamp into a WebVTT timestamp.
	112	This will lose sub-millisecond precision.
	113	"""
aa7785f8	114	return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90))
4a2f19ab F	115
4a2f19ab F	116
86e5f3ed	117	class Block:
4a2f19ab F	118	"""
	119	An abstract WebVTT block.
	120	"""
	121
	122	def __init__(self, **kwargs):
	123	for key, val in kwargs.items():
	124	setattr(self, key, val)
	125
	126	@classmethod
	127	def parse(cls, parser):
	128	m = parser.match(cls._REGEX)
	129	if not m:
	130	return None
	131	parser.advance(m)
	132	return cls(raw=m.group(0))
	133
	134	def write_into(self, stream):
	135	stream.write(self.raw)
	136
	137
	138	class HeaderBlock(Block):
	139	"""
	140	A WebVTT block that may only appear in the header part of the file,
	141	i.e. before any cue blocks.
	142	"""
4a2f19ab F	143	pass
	144
	145
	146	class Magic(HeaderBlock):
	147	_REGEX = re.compile(r'\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n\|[\r\n])')
	148
	149	# XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5
	150	# <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC
add96eb9	151	# doesn't specify the exact grammar nor where in the WebVTT
4a2f19ab F	152	# syntax it should be placed; the below has been devised based
	153	# on usage in the wild
	154	#
	155	# And strictly speaking, the presence of this extension violates
	156	# the W3C WebVTT spec. Oh well.
	157
	158	_REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
	159	_REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
	160	_REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
81a136b8	161	_REGEX_TSMAP_SEP = re.compile(r'[ \t],[ \t]')
4a2f19ab	162
c646d76f	163	# This was removed from the spec in the 2017 revision;
	164	# the last spec draft to describe this syntax element is
	165	# <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>.
	166	# Nevertheless, YouTube keeps serving those
	167	_REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n\|[\r\n])')
	168
4a2f19ab F	169	@classmethod
	170	def __parse_tsmap(cls, parser):
	171	parser = parser.child()
	172
	173	while True:
	174	m = parser.consume(cls._REGEX_TSMAP_LOCAL)
	175	if m:
	176	m = parser.consume(_REGEX_TS)
	177	if m is None:
	178	raise ParseError(parser)
	179	local = _parse_ts(m)
	180	if local is None:
	181	raise ParseError(parser)
	182	else:
	183	m = parser.consume(cls._REGEX_TSMAP_MPEGTS)
	184	if m:
	185	mpegts = int_or_none(m.group(1))
	186	if mpegts is None:
	187	raise ParseError(parser)
	188	else:
	189	raise ParseError(parser)
81a136b8	190	if parser.consume(cls._REGEX_TSMAP_SEP):
4a2f19ab F	191	continue
	192	if parser.consume(_REGEX_NL):
	193	break
	194	raise ParseError(parser)
	195
	196	parser.commit()
	197	return local, mpegts
	198
	199	@classmethod
	200	def parse(cls, parser):
	201	parser = parser.child()
	202
	203	m = parser.consume(cls._REGEX)
	204	if not m:
	205	raise ParseError(parser)
	206
	207	extra = m.group(1)
c646d76f	208	local, mpegts, meta = None, None, ''
	209	while not parser.consume(_REGEX_NL):
	210	if parser.consume(cls._REGEX_TSMAP):
	211	local, mpegts = cls.__parse_tsmap(parser)
	212	continue
	213	m = parser.consume(cls._REGEX_META)
	214	if m:
	215	meta += m.group(0)
	216	continue
4a2f19ab F	217	raise ParseError(parser)
4a2f19ab F	218	parser.commit()
c646d76f	219	return cls(extra=extra, mpegts=mpegts, local=local, meta=meta)
4a2f19ab F	220
	221	def write_into(self, stream):
	222	stream.write('WEBVTT')
	223	if self.extra is not None:
	224	stream.write(self.extra)
	225	stream.write('\n')
	226	if self.local or self.mpegts:
	227	stream.write('X-TIMESTAMP-MAP=LOCAL:')
	228	stream.write(_format_ts(self.local if self.local is not None else 0))
	229	stream.write(',MPEGTS:')
	230	stream.write(str(self.mpegts if self.mpegts is not None else 0))
	231	stream.write('\n')
c646d76f	232	if self.meta:
c646d76f	233	stream.write(self.meta)
4a2f19ab F	234	stream.write('\n')
	235
	236
	237	class StyleBlock(HeaderBlock):
	238	_REGEX = re.compile(r'''(?x)
	239	STYLE[\ \t]*(?:\r\n\|[\r\n])
	240	((?:(?!-->)[^\r\n])+(?:\r\n\|[\r\n]))*
	241	(?:\r\n\|[\r\n])
	242	''')
	243
	244
	245	class RegionBlock(HeaderBlock):
	246	_REGEX = re.compile(r'''(?x)
	247	REGION[\ \t]*
	248	((?:(?!-->)[^\r\n])+(?:\r\n\|[\r\n]))*
	249	(?:\r\n\|[\r\n])
	250	''')
	251
	252
	253	class CommentBlock(Block):
	254	_REGEX = re.compile(r'''(?x)
	255	NOTE(?:\r\n\|[\ \t\r\n])
	256	((?:(?!-->)[^\r\n])+(?:\r\n\|[\r\n]))*
	257	(?:\r\n\|[\r\n])
	258	''')
	259
	260
	261	class CueBlock(Block):
	262	"""
	263	A cue block. The payload is not interpreted.
	264	"""
	265
	266	_REGEX_ID = re.compile(r'((?:(?!-->)[^\r\n])+)(?:\r\n\|[\r\n])')
	267	_REGEX_ARROW = re.compile(r'[ \t]+-->[ \t]+')
	268	_REGEX_SETTINGS = re.compile(r'[ \t]+((?:(?!-->)[^\r\n])+)')
	269	_REGEX_PAYLOAD = re.compile(r'[^\r\n]+(?:\r\n\|[\r\n])?')
	270
	271	@classmethod
	272	def parse(cls, parser):
	273	parser = parser.child()
	274
add96eb9	275	id_ = None
4a2f19ab F	276	m = parser.consume(cls._REGEX_ID)
4a2f19ab F	277	if m:
add96eb9	278	id_ = m.group(1)
4a2f19ab F	279
	280	m0 = parser.consume(_REGEX_TS)
	281	if not m0:
	282	return None
	283	if not parser.consume(cls._REGEX_ARROW):
	284	return None
	285	m1 = parser.consume(_REGEX_TS)
	286	if not m1:
	287	return None
	288	m2 = parser.consume(cls._REGEX_SETTINGS)
298230e5	289	parser.consume(_REGEX_OPTIONAL_WHITESPACE)
4a2f19ab F	290	if not parser.consume(_REGEX_NL):
	291	return None
	292
	293	start = _parse_ts(m0)
	294	end = _parse_ts(m1)
	295	settings = m2.group(1) if m2 is not None else None
	296
	297	text = io.StringIO()
	298	while True:
	299	m = parser.consume(cls._REGEX_PAYLOAD)
	300	if not m:
	301	break
	302	text.write(m.group(0))
	303
	304	parser.commit()
	305	return cls(
add96eb9	306	id=id_,
4a2f19ab	307	start=start, end=end, settings=settings,
add96eb9	308	text=text.getvalue(),
4a2f19ab F	309	)
	310
	311	def write_into(self, stream):
	312	if self.id is not None:
	313	stream.write(self.id)
	314	stream.write('\n')
	315	stream.write(_format_ts(self.start))
	316	stream.write(' --> ')
	317	stream.write(_format_ts(self.end))
	318	if self.settings is not None:
	319	stream.write(' ')
	320	stream.write(self.settings)
	321	stream.write('\n')
	322	stream.write(self.text)
	323	stream.write('\n')
	324
333217f4 F	325	@property
	326	def as_json(self):
	327	return {
	328	'id': self.id,
	329	'start': self.start,
	330	'end': self.end,
	331	'text': self.text,
	332	'settings': self.settings,
	333	}
	334
25a3f4f5 F	335	def __eq__(self, other):
	336	return self.as_json == other.as_json
	337
	338	@classmethod
	339	def from_json(cls, json):
	340	return cls(
	341	id=json['id'],
	342	start=json['start'],
	343	end=json['end'],
	344	text=json['text'],
add96eb9	345	settings=json['settings'],
25a3f4f5 F	346	)
	347
	348	def hinges(self, other):
	349	if self.text != other.text:
	350	return False
	351	if self.settings != other.settings:
	352	return False
	353	return self.start <= self.end == other.start <= other.end
	354
4a2f19ab F	355
	356	def parse_fragment(frag_content):
	357	"""
	358	A generator that yields (partially) parsed WebVTT blocks when given
	359	a bytes object containing the raw contents of a WebVTT file.
	360	"""
	361
0f06bcd7	362	parser = _MatchParser(frag_content.decode())
4a2f19ab F	363
	364	yield Magic.parse(parser)
	365
	366	while not parser.match(_REGEX_EOF):
	367	if parser.consume(_REGEX_BLANK):
	368	continue
	369
	370	block = RegionBlock.parse(parser)
	371	if block:
	372	yield block
	373	continue
	374	block = StyleBlock.parse(parser)
	375	if block:
	376	yield block
	377	continue
	378	block = CommentBlock.parse(parser)
	379	if block:
	380	yield block # XXX: or skip
	381	continue
	382
	383	break
	384
	385	while not parser.match(_REGEX_EOF):
	386	if parser.consume(_REGEX_BLANK):
	387	continue
	388
	389	block = CommentBlock.parse(parser)
	390	if block:
	391	yield block # XXX: or skip
	392	continue
	393	block = CueBlock.parse(parser)
	394	if block:
	395	yield block
	396	continue
	397
	398	raise ParseError(parser)