[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py

import json
import time
import urllib.error

from .fragment import FragmentFD
from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get


class YoutubeLiveChatFD(FragmentFD):
    """ Downloads YouTube live chats fragment by fragment """

    def real_download(self, filename, info_dict):
        video_id = info_dict['video_id']
        self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
        if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
            self.report_warning('Live chat download runs until the livestream ends. '
                                'If you wish to download the video simultaneously, run a separate yt-dlp instance')

        fragment_retries = self.params.get('fragment_retries', 0)
        test = self.params.get('test', False)

        ctx = {
            'filename': filename,
            'live': True,
            'total_frags': None,
        }

        from ..extractor.youtube import YoutubeBaseInfoExtractor

        ie = YoutubeBaseInfoExtractor(self.ydl)

        start_time = int(time.time() * 1000)

        def dl_fragment(url, data=None, headers=None):
            http_headers = info_dict.get('http_headers', {})
            if headers:
                http_headers = http_headers.copy()
                http_headers.update(headers)
            return self._download_fragment(ctx, url, info_dict, http_headers, data)

        def parse_actions_replay(live_chat_continuation):
            offset = continuation_id = click_tracking_params = None
            processed_fragment = bytearray()
            for action in live_chat_continuation.get('actions', []):
                if 'replayChatItemAction' in action:
                    replay_chat_item_action = action['replayChatItemAction']
                    offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
                processed_fragment.extend(
                    json.dumps(action, ensure_ascii=False).encode() + b'\n')
            if offset is not None:
                continuation = try_get(
                    live_chat_continuation,
                    lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
                if continuation:
                    continuation_id = continuation.get('continuation')
                    click_tracking_params = continuation.get('clickTrackingParams')
            self._append_fragment(ctx, processed_fragment)
            return continuation_id, offset, click_tracking_params

        def try_refresh_replay_beginning(live_chat_continuation):
            # choose the second option that contains the unfiltered live chat replay
            refresh_continuation = try_get(
                live_chat_continuation,
                lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
            if refresh_continuation:
                # no data yet but required to call _append_fragment
                self._append_fragment(ctx, b'')
                refresh_continuation_id = refresh_continuation.get('continuation')
                offset = 0
                click_tracking_params = refresh_continuation.get('trackingParams')
                return refresh_continuation_id, offset, click_tracking_params
            return parse_actions_replay(live_chat_continuation)

        live_offset = 0

        def parse_actions_live(live_chat_continuation):
            nonlocal live_offset
            continuation_id = click_tracking_params = None
            processed_fragment = bytearray()
            for action in live_chat_continuation.get('actions', []):
                timestamp = self.parse_live_timestamp(action)
                if timestamp is not None:
                    live_offset = timestamp - start_time
                # compatibility with replay format
                pseudo_action = {
                    'replayChatItemAction': {'actions': [action]},
                    'videoOffsetTimeMsec': str(live_offset),
                    'isLive': True,
                }
                processed_fragment.extend(
                    json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
            continuation_data_getters = [
                lambda x: x['continuations'][0]['invalidationContinuationData'],
                lambda x: x['continuations'][0]['timedContinuationData'],
            ]
            continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
            if continuation_data:
                continuation_id = continuation_data.get('continuation')
                click_tracking_params = continuation_data.get('clickTrackingParams')
                timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
                if timeout_ms is not None:
                    time.sleep(timeout_ms / 1000)
            self._append_fragment(ctx, processed_fragment)
            return continuation_id, live_offset, click_tracking_params

        def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
            count = 0
            while count <= fragment_retries:
                try:
                    success = dl_fragment(url, request_data, headers)
                    if not success:
                        return False, None, None, None
                    raw_fragment = self._read_fragment(ctx)
                    try:
                        data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                    except RegexNotFoundError:
                        data = None
                    if not data:
                        data = json.loads(raw_fragment)
                    live_chat_continuation = try_get(
                        data,
                        lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
                    if info_dict['protocol'] == 'youtube_live_chat_replay':
                        if frag_index == 1:
                            continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
                        else:
                            continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
                    elif info_dict['protocol'] == 'youtube_live_chat':
                        continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
                    return True, continuation_id, offset, click_tracking_params
                except urllib.error.HTTPError as err:
                    count += 1
                    if count <= fragment_retries:
                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
            if count > fragment_retries:
                self.report_error('giving up after %s fragment retries' % fragment_retries)
                return False, None, None, None

        self._prepare_and_start_frag_download(ctx, info_dict)

        success = dl_fragment(info_dict['url'])
        if not success:
            return False
        raw_fragment = self._read_fragment(ctx)
        try:
            data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
        except RegexNotFoundError:
            return False
        continuation_id = try_get(
            data,
            lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
        # no data yet but required to call _append_fragment
        self._append_fragment(ctx, b'')

        ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))

        if not ytcfg:
            return False
        api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
        innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
        if not api_key or not innertube_context:
            return False
        visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
        if info_dict['protocol'] == 'youtube_live_chat_replay':
            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
            chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
        elif info_dict['protocol'] == 'youtube_live_chat':
            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
            chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id

        frag_index = offset = 0
        click_tracking_params = None
        while continuation_id is not None:
            frag_index += 1
            request_data = {
                'context': innertube_context,
                'continuation': continuation_id,
            }
            if frag_index > 1:
                request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
                if click_tracking_params:
                    request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
                headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
                headers.update({'content-type': 'application/json'})
                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
                success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
                    url, frag_index, fragment_request_data, headers)
            else:
                success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
                    chat_page_url, frag_index)
            if not success:
                return False
            if test:
                break

        self._finish_frag_download(ctx, info_dict)
        return True

    @staticmethod
    def parse_live_timestamp(action):
        action_content = dict_get(
            action,
            ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
        if not isinstance(action_content, dict):
            return None
        item = dict_get(action_content, ['item', 'bannerRenderer'])
        if not isinstance(item, dict):
            return None
        renderer = dict_get(item, [
            # text
            'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
            'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
            # ticker
            'liveChatTickerPaidMessageItemRenderer',
            'liveChatTickerSponsorItemRenderer',
            # banner
            'liveChatBannerRenderer',
        ])
        if not isinstance(renderer, dict):
            return None
        parent_item_getters = [
            lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
            lambda x: x['contents'],
        ]
        parent_item = try_get(renderer, parent_item_getters, dict)
        if parent_item:
            renderer = dict_get(parent_item, [
                'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
                'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
            ])
            if not isinstance(renderer, dict):
                return None
        return int_or_none(renderer.get('timestampUsec'), 1000)
Commit	Line	Data
a78e3a57	1	import json
c60ee3a2	2	import time
ac668111	3	import urllib.error
a78e3a57	4
a78e3a57	5	from .fragment import FragmentFD
f8271158	6	from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
a78e3a57	7
a78e3a57	8
c60ee3a2	9	class YoutubeLiveChatFD(FragmentFD):
c60ee3a2	10	""" Downloads YouTube live chats fragment by fragment """
a78e3a57	11
a78e3a57	12	def real_download(self, filename, info_dict):
	13	video_id = info_dict['video_id']
	14	self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
592b7485	15	if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
08d30158	16	self.report_warning('Live chat download runs until the livestream ends. '
08d30158	17	'If you wish to download the video simultaneously, run a separate yt-dlp instance')
a78e3a57	18
82e3f6eb	19	fragment_retries = self.params.get('fragment_retries', 0)
a78e3a57	20	test = self.params.get('test', False)
	21
	22	ctx = {
	23	'filename': filename,
	24	'live': True,
	25	'total_frags': None,
	26	}
	27
c487cf00	28	from ..extractor.youtube import YoutubeBaseInfoExtractor
	29
	30	ie = YoutubeBaseInfoExtractor(self.ydl)
a78e3a57	31
c60ee3a2	32	start_time = int(time.time() * 1000)
c60ee3a2	33
273762c8	34	def dl_fragment(url, data=None, headers=None):
	35	http_headers = info_dict.get('http_headers', {})
	36	if headers:
	37	http_headers = http_headers.copy()
	38	http_headers.update(headers)
	39	return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57	40
c60ee3a2	41	def parse_actions_replay(live_chat_continuation):
c2603313	42	offset = continuation_id = click_tracking_params = None
c60ee3a2	43	processed_fragment = bytearray()
	44	for action in live_chat_continuation.get('actions', []):
	45	if 'replayChatItemAction' in action:
	46	replay_chat_item_action = action['replayChatItemAction']
	47	offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
	48	processed_fragment.extend(
0f06bcd7	49	json.dumps(action, ensure_ascii=False).encode() + b'\n')
c60ee3a2	50	if offset is not None:
c2603313	51	continuation = try_get(
c60ee3a2	52	live_chat_continuation,
c2603313	53	lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
	54	if continuation:
	55	continuation_id = continuation.get('continuation')
	56	click_tracking_params = continuation.get('clickTrackingParams')
c60ee3a2	57	self._append_fragment(ctx, processed_fragment)
c2603313	58	return continuation_id, offset, click_tracking_params
c60ee3a2	59
d534c452	60	def try_refresh_replay_beginning(live_chat_continuation):
d534c452	61	# choose the second option that contains the unfiltered live chat replay
c2603313	62	refresh_continuation = try_get(
d534c452	63	live_chat_continuation,
c2603313	64	lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
c2603313	65	if refresh_continuation:
d534c452	66	# no data yet but required to call _append_fragment
d534c452	67	self._append_fragment(ctx, b'')
c2603313	68	refresh_continuation_id = refresh_continuation.get('continuation')
	69	offset = 0
	70	click_tracking_params = refresh_continuation.get('trackingParams')
	71	return refresh_continuation_id, offset, click_tracking_params
d534c452	72	return parse_actions_replay(live_chat_continuation)
d534c452	73
c60ee3a2	74	live_offset = 0
	75
	76	def parse_actions_live(live_chat_continuation):
	77	nonlocal live_offset
c2603313	78	continuation_id = click_tracking_params = None
c60ee3a2	79	processed_fragment = bytearray()
	80	for action in live_chat_continuation.get('actions', []):
	81	timestamp = self.parse_live_timestamp(action)
	82	if timestamp is not None:
	83	live_offset = timestamp - start_time
	84	# compatibility with replay format
	85	pseudo_action = {
	86	'replayChatItemAction': {'actions': [action]},
	87	'videoOffsetTimeMsec': str(live_offset),
	88	'isLive': True,
	89	}
	90	processed_fragment.extend(
0f06bcd7	91	json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
c60ee3a2	92	continuation_data_getters = [
	93	lambda x: x['continuations'][0]['invalidationContinuationData'],
	94	lambda x: x['continuations'][0]['timedContinuationData'],
	95	]
	96	continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
	97	if continuation_data:
	98	continuation_id = continuation_data.get('continuation')
c2603313	99	click_tracking_params = continuation_data.get('clickTrackingParams')
c60ee3a2	100	timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
	101	if timeout_ms is not None:
	102	time.sleep(timeout_ms / 1000)
	103	self._append_fragment(ctx, processed_fragment)
c2603313	104	return continuation_id, live_offset, click_tracking_params
c60ee3a2	105
d534c452	106	def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
82e3f6eb	107	count = 0
	108	while count <= fragment_retries:
	109	try:
d71fd412	110	success = dl_fragment(url, request_data, headers)
82e3f6eb	111	if not success:
c2603313	112	return False, None, None, None
d71fd412	113	raw_fragment = self._read_fragment(ctx)
d534c452	114	try:
11f9be09	115	data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
d534c452	116	except RegexNotFoundError:
	117	data = None
	118	if not data:
	119	data = json.loads(raw_fragment)
82e3f6eb	120	live_chat_continuation = try_get(
	121	data,
	122	lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
d534c452	123	if info_dict['protocol'] == 'youtube_live_chat_replay':
d534c452	124	if frag_index == 1:
c2603313	125	continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
d534c452	126	else:
c2603313	127	continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
d534c452	128	elif info_dict['protocol'] == 'youtube_live_chat':
c2603313	129	continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
c2603313	130	return True, continuation_id, offset, click_tracking_params
ac668111	131	except urllib.error.HTTPError as err:
82e3f6eb	132	count += 1
	133	if count <= fragment_retries:
	134	self.report_retry_fragment(err, frag_index, count, fragment_retries)
	135	if count > fragment_retries:
	136	self.report_error('giving up after %s fragment retries' % fragment_retries)
c2603313	137	return False, None, None, None
82e3f6eb	138
3ba7740d	139	self._prepare_and_start_frag_download(ctx, info_dict)
a78e3a57	140
d71fd412	141	success = dl_fragment(info_dict['url'])
a78e3a57	142	if not success:
a78e3a57	143	return False
d71fd412	144	raw_fragment = self._read_fragment(ctx)
273762c8	145	try:
11f9be09	146	data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8	147	except RegexNotFoundError:
273762c8	148	return False
82e3f6eb	149	continuation_id = try_get(
	150	data,
	151	lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57	152	# no data yet but required to call _append_fragment
	153	self._append_fragment(ctx, b'')
	154
11f9be09	155	ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8	156
	157	if not ytcfg:
	158	return False
	159	api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
	160	innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
	161	if not api_key or not innertube_context:
	162	return False
c60ee3a2	163	visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
	164	if info_dict['protocol'] == 'youtube_live_chat_replay':
	165	url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452	166	chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2	167	elif info_dict['protocol'] == 'youtube_live_chat':
c60ee3a2	168	url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452	169	chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8	170
82e3f6eb	171	frag_index = offset = 0
c2603313	172	click_tracking_params = None
a78e3a57	173	while continuation_id is not None:
82e3f6eb	174	frag_index += 1
273762c8	175	request_data = {
	176	'context': innertube_context,
	177	'continuation': continuation_id,
	178	}
	179	if frag_index > 1:
	180	request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
c2603313	181	if click_tracking_params:
c2603313	182	request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
99e9e001	183	headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
d534c452	184	headers.update({'content-type': 'application/json'})
0f06bcd7	185	fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
c2603313	186	success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
d534c452	187	url, frag_index, fragment_request_data, headers)
d534c452	188	else:
c2603313	189	success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
c2603313	190	chat_page_url, frag_index)
82e3f6eb	191	if not success:
	192	return False
	193	if test:
a78e3a57	194	break
a78e3a57	195
3ba7740d	196	self._finish_frag_download(ctx, info_dict)
a78e3a57	197	return True
c60ee3a2	198
	199	@staticmethod
	200	def parse_live_timestamp(action):
	201	action_content = dict_get(
	202	action,
	203	['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
	204	if not isinstance(action_content, dict):
	205	return None
	206	item = dict_get(action_content, ['item', 'bannerRenderer'])
	207	if not isinstance(item, dict):
	208	return None
	209	renderer = dict_get(item, [
	210	# text
	211	'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
	212	'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
	213	# ticker
	214	'liveChatTickerPaidMessageItemRenderer',
	215	'liveChatTickerSponsorItemRenderer',
	216	# banner
	217	'liveChatBannerRenderer',
	218	])
	219	if not isinstance(renderer, dict):
	220	return None
	221	parent_item_getters = [
	222	lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
	223	lambda x: x['contents'],
	224	]
	225	parent_item = try_get(renderer, parent_item_getters, dict)
	226	if parent_item:
	227	renderer = dict_get(parent_item, [
	228	'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
	229	'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
	230	])
	231	if not isinstance(renderer, dict):
	232	return None
	233	return int_or_none(renderer.get('timestampUsec'), 1000)