]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/youtube_live_chat.py
Standardize retry mechanism (#1649)
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
CommitLineData
a78e3a57 1import json
c60ee3a2 2import time
ac668111 3import urllib.error
a78e3a57 4
5from .fragment import FragmentFD
be5c1ae8 6from ..utils import (
7 RegexNotFoundError,
8 RetryManager,
9 dict_get,
10 int_or_none,
11 try_get,
12)
a78e3a57 13
14
c60ee3a2 15class YoutubeLiveChatFD(FragmentFD):
16 """ Downloads YouTube live chats fragment by fragment """
a78e3a57 17
a78e3a57 18 def real_download(self, filename, info_dict):
19 video_id = info_dict['video_id']
20 self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
592b7485 21 if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
08d30158 22 self.report_warning('Live chat download runs until the livestream ends. '
23 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
a78e3a57 24
25 test = self.params.get('test', False)
26
27 ctx = {
28 'filename': filename,
29 'live': True,
30 'total_frags': None,
31 }
32
c487cf00 33 from ..extractor.youtube import YoutubeBaseInfoExtractor
34
35 ie = YoutubeBaseInfoExtractor(self.ydl)
a78e3a57 36
c60ee3a2 37 start_time = int(time.time() * 1000)
38
273762c8 39 def dl_fragment(url, data=None, headers=None):
40 http_headers = info_dict.get('http_headers', {})
41 if headers:
42 http_headers = http_headers.copy()
43 http_headers.update(headers)
44 return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57 45
c60ee3a2 46 def parse_actions_replay(live_chat_continuation):
c2603313 47 offset = continuation_id = click_tracking_params = None
c60ee3a2 48 processed_fragment = bytearray()
49 for action in live_chat_continuation.get('actions', []):
50 if 'replayChatItemAction' in action:
51 replay_chat_item_action = action['replayChatItemAction']
52 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
53 processed_fragment.extend(
0f06bcd7 54 json.dumps(action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 55 if offset is not None:
c2603313 56 continuation = try_get(
c60ee3a2 57 live_chat_continuation,
c2603313 58 lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
59 if continuation:
60 continuation_id = continuation.get('continuation')
61 click_tracking_params = continuation.get('clickTrackingParams')
c60ee3a2 62 self._append_fragment(ctx, processed_fragment)
c2603313 63 return continuation_id, offset, click_tracking_params
c60ee3a2 64
d534c452 65 def try_refresh_replay_beginning(live_chat_continuation):
66 # choose the second option that contains the unfiltered live chat replay
c2603313 67 refresh_continuation = try_get(
d534c452 68 live_chat_continuation,
c2603313 69 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
70 if refresh_continuation:
d534c452 71 # no data yet but required to call _append_fragment
72 self._append_fragment(ctx, b'')
c2603313 73 refresh_continuation_id = refresh_continuation.get('continuation')
74 offset = 0
75 click_tracking_params = refresh_continuation.get('trackingParams')
76 return refresh_continuation_id, offset, click_tracking_params
d534c452 77 return parse_actions_replay(live_chat_continuation)
78
c60ee3a2 79 live_offset = 0
80
81 def parse_actions_live(live_chat_continuation):
82 nonlocal live_offset
c2603313 83 continuation_id = click_tracking_params = None
c60ee3a2 84 processed_fragment = bytearray()
85 for action in live_chat_continuation.get('actions', []):
86 timestamp = self.parse_live_timestamp(action)
87 if timestamp is not None:
88 live_offset = timestamp - start_time
89 # compatibility with replay format
90 pseudo_action = {
91 'replayChatItemAction': {'actions': [action]},
92 'videoOffsetTimeMsec': str(live_offset),
93 'isLive': True,
94 }
95 processed_fragment.extend(
0f06bcd7 96 json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 97 continuation_data_getters = [
98 lambda x: x['continuations'][0]['invalidationContinuationData'],
99 lambda x: x['continuations'][0]['timedContinuationData'],
100 ]
101 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
102 if continuation_data:
103 continuation_id = continuation_data.get('continuation')
c2603313 104 click_tracking_params = continuation_data.get('clickTrackingParams')
c60ee3a2 105 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
106 if timeout_ms is not None:
107 time.sleep(timeout_ms / 1000)
108 self._append_fragment(ctx, processed_fragment)
c2603313 109 return continuation_id, live_offset, click_tracking_params
c60ee3a2 110
d534c452 111 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
be5c1ae8 112 for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
82e3f6eb 113 try:
d71fd412 114 success = dl_fragment(url, request_data, headers)
82e3f6eb 115 if not success:
c2603313 116 return False, None, None, None
d71fd412 117 raw_fragment = self._read_fragment(ctx)
d534c452 118 try:
11f9be09 119 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
d534c452 120 except RegexNotFoundError:
121 data = None
122 if not data:
123 data = json.loads(raw_fragment)
82e3f6eb 124 live_chat_continuation = try_get(
125 data,
126 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
be5c1ae8 127
128 func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
129 or frag_index == 1 and try_refresh_replay_beginning
130 or parse_actions_replay)
131 return (True, *func(live_chat_continuation))
ac668111 132 except urllib.error.HTTPError as err:
be5c1ae8 133 retry.error = err
134 continue
135 return False, None, None, None
82e3f6eb 136
3ba7740d 137 self._prepare_and_start_frag_download(ctx, info_dict)
a78e3a57 138
d71fd412 139 success = dl_fragment(info_dict['url'])
a78e3a57 140 if not success:
141 return False
d71fd412 142 raw_fragment = self._read_fragment(ctx)
273762c8 143 try:
11f9be09 144 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 145 except RegexNotFoundError:
146 return False
82e3f6eb 147 continuation_id = try_get(
148 data,
149 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57 150 # no data yet but required to call _append_fragment
151 self._append_fragment(ctx, b'')
152
11f9be09 153 ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 154
155 if not ytcfg:
156 return False
157 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
158 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
159 if not api_key or not innertube_context:
160 return False
c60ee3a2 161 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
162 if info_dict['protocol'] == 'youtube_live_chat_replay':
163 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452 164 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2 165 elif info_dict['protocol'] == 'youtube_live_chat':
166 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452 167 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8 168
82e3f6eb 169 frag_index = offset = 0
c2603313 170 click_tracking_params = None
a78e3a57 171 while continuation_id is not None:
82e3f6eb 172 frag_index += 1
273762c8 173 request_data = {
174 'context': innertube_context,
175 'continuation': continuation_id,
176 }
177 if frag_index > 1:
178 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
c2603313 179 if click_tracking_params:
180 request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
99e9e001 181 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
d534c452 182 headers.update({'content-type': 'application/json'})
0f06bcd7 183 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
c2603313 184 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
d534c452 185 url, frag_index, fragment_request_data, headers)
186 else:
c2603313 187 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
188 chat_page_url, frag_index)
82e3f6eb 189 if not success:
190 return False
191 if test:
a78e3a57 192 break
193
3ba7740d 194 self._finish_frag_download(ctx, info_dict)
a78e3a57 195 return True
c60ee3a2 196
197 @staticmethod
198 def parse_live_timestamp(action):
199 action_content = dict_get(
200 action,
201 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
202 if not isinstance(action_content, dict):
203 return None
204 item = dict_get(action_content, ['item', 'bannerRenderer'])
205 if not isinstance(item, dict):
206 return None
207 renderer = dict_get(item, [
208 # text
209 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
210 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
211 # ticker
212 'liveChatTickerPaidMessageItemRenderer',
213 'liveChatTickerSponsorItemRenderer',
214 # banner
215 'liveChatBannerRenderer',
216 ])
217 if not isinstance(renderer, dict):
218 return None
219 parent_item_getters = [
220 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
221 lambda x: x['contents'],
222 ]
223 parent_item = try_get(renderer, parent_item_getters, dict)
224 if parent_item:
225 renderer = dict_get(parent_item, [
226 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
227 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
228 ])
229 if not isinstance(renderer, dict):
230 return None
231 return int_or_none(renderer.get('timestampUsec'), 1000)