[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py

from __future__ import division, unicode_literals

import json
import time

from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import (
    try_get,
    dict_get,
    int_or_none,
    RegexNotFoundError,
)
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE


class YoutubeLiveChatFD(FragmentFD):
    """ Downloads YouTube live chats fragment by fragment """

    FD_NAME = 'youtube_live_chat'

    def real_download(self, filename, info_dict):
        video_id = info_dict['video_id']
        self.to_screen('[%s] Downloading live chat' % self.FD_NAME)

        fragment_retries = self.params.get('fragment_retries', 0)
        test = self.params.get('test', False)

        ctx = {
            'filename': filename,
            'live': True,
            'total_frags': None,
        }

        ie = YT_BaseIE(self.ydl)

        start_time = int(time.time() * 1000)

        def dl_fragment(url, data=None, headers=None):
            http_headers = info_dict.get('http_headers', {})
            if headers:
                http_headers = http_headers.copy()
                http_headers.update(headers)
            return self._download_fragment(ctx, url, info_dict, http_headers, data)

        def parse_actions_replay(live_chat_continuation):
            offset = continuation_id = None
            processed_fragment = bytearray()
            for action in live_chat_continuation.get('actions', []):
                if 'replayChatItemAction' in action:
                    replay_chat_item_action = action['replayChatItemAction']
                    offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
                processed_fragment.extend(
                    json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
            if offset is not None:
                continuation_id = try_get(
                    live_chat_continuation,
                    lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
            self._append_fragment(ctx, processed_fragment)
            return continuation_id, offset

        def try_refresh_replay_beginning(live_chat_continuation):
            # choose the second option that contains the unfiltered live chat replay
            refresh_continuation_id = try_get(
                live_chat_continuation,
                lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData']['continuation'], str)
            if refresh_continuation_id:
                # no data yet but required to call _append_fragment
                self._append_fragment(ctx, b'')
                return refresh_continuation_id, 0
            return parse_actions_replay(live_chat_continuation)

        live_offset = 0

        def parse_actions_live(live_chat_continuation):
            nonlocal live_offset
            continuation_id = None
            processed_fragment = bytearray()
            for action in live_chat_continuation.get('actions', []):
                timestamp = self.parse_live_timestamp(action)
                if timestamp is not None:
                    live_offset = timestamp - start_time
                # compatibility with replay format
                pseudo_action = {
                    'replayChatItemAction': {'actions': [action]},
                    'videoOffsetTimeMsec': str(live_offset),
                    'isLive': True,
                }
                processed_fragment.extend(
                    json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
            continuation_data_getters = [
                lambda x: x['continuations'][0]['invalidationContinuationData'],
                lambda x: x['continuations'][0]['timedContinuationData'],
            ]
            continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
            if continuation_data:
                continuation_id = continuation_data.get('continuation')
                timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
                if timeout_ms is not None:
                    time.sleep(timeout_ms / 1000)
            self._append_fragment(ctx, processed_fragment)
            return continuation_id, live_offset

        def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
            count = 0
            while count <= fragment_retries:
                try:
                    success, raw_fragment = dl_fragment(url, request_data, headers)
                    if not success:
                        return False, None, None
                    try:
                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                    except RegexNotFoundError:
                        data = None
                    if not data:
                        data = json.loads(raw_fragment)
                    live_chat_continuation = try_get(
                        data,
                        lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
                    if info_dict['protocol'] == 'youtube_live_chat_replay':
                        if frag_index == 1:
                            continuation_id, offset = try_refresh_replay_beginning(live_chat_continuation)
                        else:
                            continuation_id, offset = parse_actions_replay(live_chat_continuation)
                    elif info_dict['protocol'] == 'youtube_live_chat':
                        continuation_id, offset = parse_actions_live(live_chat_continuation)
                    return True, continuation_id, offset
                except compat_urllib_error.HTTPError as err:
                    count += 1
                    if count <= fragment_retries:
                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
            if count > fragment_retries:
                self.report_error('giving up after %s fragment retries' % fragment_retries)
                return False, None, None

        self._prepare_and_start_frag_download(ctx)

        success, raw_fragment = dl_fragment(info_dict['url'])
        if not success:
            return False
        try:
            data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
        except RegexNotFoundError:
            return False
        continuation_id = try_get(
            data,
            lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
        # no data yet but required to call _append_fragment
        self._append_fragment(ctx, b'')

        ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))

        if not ytcfg:
            return False
        api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
        innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
        if not api_key or not innertube_context:
            return False
        visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
        if info_dict['protocol'] == 'youtube_live_chat_replay':
            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
            chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
        elif info_dict['protocol'] == 'youtube_live_chat':
            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
            chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id

        frag_index = offset = 0
        while continuation_id is not None:
            frag_index += 1
            request_data = {
                'context': innertube_context,
                'continuation': continuation_id,
            }
            if frag_index > 1:
                request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
                headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
                headers.update({'content-type': 'application/json'})
                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
                success, continuation_id, offset = download_and_parse_fragment(
                    url, frag_index, fragment_request_data, headers)
            else:
                success, continuation_id, offset = download_and_parse_fragment(chat_page_url, frag_index)
            if not success:
                return False
            if test:
                break

        self._finish_frag_download(ctx)
        return True

    @staticmethod
    def parse_live_timestamp(action):
        action_content = dict_get(
            action,
            ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
        if not isinstance(action_content, dict):
            return None
        item = dict_get(action_content, ['item', 'bannerRenderer'])
        if not isinstance(item, dict):
            return None
        renderer = dict_get(item, [
            # text
            'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
            'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
            # ticker
            'liveChatTickerPaidMessageItemRenderer',
            'liveChatTickerSponsorItemRenderer',
            # banner
            'liveChatBannerRenderer',
        ])
        if not isinstance(renderer, dict):
            return None
        parent_item_getters = [
            lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
            lambda x: x['contents'],
        ]
        parent_item = try_get(renderer, parent_item_getters, dict)
        if parent_item:
            renderer = dict_get(parent_item, [
                'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
                'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
            ])
            if not isinstance(renderer, dict):
                return None
        return int_or_none(renderer.get('timestampUsec'), 1000)
Commit	Line	Data
a78e3a57	1	from __future__ import division, unicode_literals
a78e3a57	2
a78e3a57	3	import json
c60ee3a2	4	import time
a78e3a57	5
a78e3a57	6	from .fragment import FragmentFD
82e3f6eb	7	from ..compat import compat_urllib_error
273762c8	8	from ..utils import (
273762c8	9	try_get,
c60ee3a2	10	dict_get,
c60ee3a2	11	int_or_none,
273762c8	12	RegexNotFoundError,
273762c8	13	)
82e3f6eb	14	from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
a78e3a57	15
a78e3a57	16
c60ee3a2	17	class YoutubeLiveChatFD(FragmentFD):
c60ee3a2	18	""" Downloads YouTube live chats fragment by fragment """
a78e3a57	19
c60ee3a2	20	FD_NAME = 'youtube_live_chat'
a78e3a57	21
	22	def real_download(self, filename, info_dict):
	23	video_id = info_dict['video_id']
	24	self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
	25
82e3f6eb	26	fragment_retries = self.params.get('fragment_retries', 0)
a78e3a57	27	test = self.params.get('test', False)
	28
	29	ctx = {
	30	'filename': filename,
	31	'live': True,
	32	'total_frags': None,
	33	}
	34
273762c8	35	ie = YT_BaseIE(self.ydl)
a78e3a57	36
c60ee3a2	37	start_time = int(time.time() * 1000)
c60ee3a2	38
273762c8	39	def dl_fragment(url, data=None, headers=None):
	40	http_headers = info_dict.get('http_headers', {})
	41	if headers:
	42	http_headers = http_headers.copy()
	43	http_headers.update(headers)
	44	return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57	45
c60ee3a2	46	def parse_actions_replay(live_chat_continuation):
	47	offset = continuation_id = None
	48	processed_fragment = bytearray()
	49	for action in live_chat_continuation.get('actions', []):
	50	if 'replayChatItemAction' in action:
	51	replay_chat_item_action = action['replayChatItemAction']
	52	offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
	53	processed_fragment.extend(
	54	json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
	55	if offset is not None:
	56	continuation_id = try_get(
	57	live_chat_continuation,
	58	lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
	59	self._append_fragment(ctx, processed_fragment)
	60	return continuation_id, offset
	61
d534c452	62	def try_refresh_replay_beginning(live_chat_continuation):
	63	# choose the second option that contains the unfiltered live chat replay
	64	refresh_continuation_id = try_get(
	65	live_chat_continuation,
	66	lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData']['continuation'], str)
	67	if refresh_continuation_id:
	68	# no data yet but required to call _append_fragment
	69	self._append_fragment(ctx, b'')
	70	return refresh_continuation_id, 0
	71	return parse_actions_replay(live_chat_continuation)
	72
c60ee3a2	73	live_offset = 0
	74
	75	def parse_actions_live(live_chat_continuation):
	76	nonlocal live_offset
	77	continuation_id = None
	78	processed_fragment = bytearray()
	79	for action in live_chat_continuation.get('actions', []):
	80	timestamp = self.parse_live_timestamp(action)
	81	if timestamp is not None:
	82	live_offset = timestamp - start_time
	83	# compatibility with replay format
	84	pseudo_action = {
	85	'replayChatItemAction': {'actions': [action]},
	86	'videoOffsetTimeMsec': str(live_offset),
	87	'isLive': True,
	88	}
	89	processed_fragment.extend(
	90	json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
	91	continuation_data_getters = [
	92	lambda x: x['continuations'][0]['invalidationContinuationData'],
	93	lambda x: x['continuations'][0]['timedContinuationData'],
	94	]
	95	continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
	96	if continuation_data:
	97	continuation_id = continuation_data.get('continuation')
	98	timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
	99	if timeout_ms is not None:
	100	time.sleep(timeout_ms / 1000)
	101	self._append_fragment(ctx, processed_fragment)
	102	return continuation_id, live_offset
	103
d534c452	104	def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
82e3f6eb	105	count = 0
	106	while count <= fragment_retries:
	107	try:
c60ee3a2	108	success, raw_fragment = dl_fragment(url, request_data, headers)
82e3f6eb	109	if not success:
82e3f6eb	110	return False, None, None
d534c452	111	try:
	112	data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
	113	except RegexNotFoundError:
	114	data = None
	115	if not data:
	116	data = json.loads(raw_fragment)
82e3f6eb	117	live_chat_continuation = try_get(
	118	data,
	119	lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
d534c452	120	if info_dict['protocol'] == 'youtube_live_chat_replay':
	121	if frag_index == 1:
	122	continuation_id, offset = try_refresh_replay_beginning(live_chat_continuation)
	123	else:
	124	continuation_id, offset = parse_actions_replay(live_chat_continuation)
	125	elif info_dict['protocol'] == 'youtube_live_chat':
	126	continuation_id, offset = parse_actions_live(live_chat_continuation)
82e3f6eb	127	return True, continuation_id, offset
	128	except compat_urllib_error.HTTPError as err:
	129	count += 1
	130	if count <= fragment_retries:
	131	self.report_retry_fragment(err, frag_index, count, fragment_retries)
	132	if count > fragment_retries:
	133	self.report_error('giving up after %s fragment retries' % fragment_retries)
	134	return False, None, None
	135
a78e3a57	136	self._prepare_and_start_frag_download(ctx)
a78e3a57	137
83b20a97	138	success, raw_fragment = dl_fragment(info_dict['url'])
a78e3a57	139	if not success:
a78e3a57	140	return False
273762c8	141	try:
	142	data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
	143	except RegexNotFoundError:
	144	return False
82e3f6eb	145	continuation_id = try_get(
	146	data,
	147	lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57	148	# no data yet but required to call _append_fragment
	149	self._append_fragment(ctx, b'')
	150
273762c8	151	ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
	152
	153	if not ytcfg:
	154	return False
	155	api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
	156	innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
	157	if not api_key or not innertube_context:
	158	return False
c60ee3a2	159	visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
	160	if info_dict['protocol'] == 'youtube_live_chat_replay':
	161	url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452	162	chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2	163	elif info_dict['protocol'] == 'youtube_live_chat':
c60ee3a2	164	url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452	165	chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8	166
82e3f6eb	167	frag_index = offset = 0
a78e3a57	168	while continuation_id is not None:
82e3f6eb	169	frag_index += 1
273762c8	170	request_data = {
	171	'context': innertube_context,
	172	'continuation': continuation_id,
	173	}
	174	if frag_index > 1:
	175	request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
d534c452	176	headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
	177	headers.update({'content-type': 'application/json'})
	178	fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
	179	success, continuation_id, offset = download_and_parse_fragment(
	180	url, frag_index, fragment_request_data, headers)
	181	else:
	182	success, continuation_id, offset = download_and_parse_fragment(chat_page_url, frag_index)
82e3f6eb	183	if not success:
	184	return False
	185	if test:
a78e3a57	186	break
	187
	188	self._finish_frag_download(ctx)
a78e3a57	189	return True
c60ee3a2	190
	191	@staticmethod
	192	def parse_live_timestamp(action):
	193	action_content = dict_get(
	194	action,
	195	['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
	196	if not isinstance(action_content, dict):
	197	return None
	198	item = dict_get(action_content, ['item', 'bannerRenderer'])
	199	if not isinstance(item, dict):
	200	return None
	201	renderer = dict_get(item, [
	202	# text
	203	'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
	204	'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
	205	# ticker
	206	'liveChatTickerPaidMessageItemRenderer',
	207	'liveChatTickerSponsorItemRenderer',
	208	# banner
	209	'liveChatBannerRenderer',
	210	])
	211	if not isinstance(renderer, dict):
	212	return None
	213	parent_item_getters = [
	214	lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
	215	lambda x: x['contents'],
	216	]
	217	parent_item = try_get(renderer, parent_item_getters, dict)
	218	if parent_item:
	219	renderer = dict_get(parent_item, [
	220	'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
	221	'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
	222	])
	223	if not isinstance(renderer, dict):
	224	return None
	225	return int_or_none(renderer.get('timestampUsec'), 1000)