]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/youtube_live_chat.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
CommitLineData
a78e3a57 1import json
c60ee3a2 2import time
a78e3a57 3
4from .fragment import FragmentFD
3d2623a8 5from ..networking.exceptions import HTTPError
be5c1ae8 6from ..utils import (
7 RegexNotFoundError,
8 RetryManager,
9 dict_get,
10 int_or_none,
11 try_get,
12)
3d2623a8 13from ..utils.networking import HTTPHeaderDict
a78e3a57 14
15
c60ee3a2 16class YoutubeLiveChatFD(FragmentFD):
17 """ Downloads YouTube live chats fragment by fragment """
a78e3a57 18
a78e3a57 19 def real_download(self, filename, info_dict):
20 video_id = info_dict['video_id']
add96eb9 21 self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
592b7485 22 if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
08d30158 23 self.report_warning('Live chat download runs until the livestream ends. '
24 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
a78e3a57 25
26 test = self.params.get('test', False)
27
28 ctx = {
29 'filename': filename,
30 'live': True,
31 'total_frags': None,
32 }
33
c487cf00 34 from ..extractor.youtube import YoutubeBaseInfoExtractor
35
36 ie = YoutubeBaseInfoExtractor(self.ydl)
a78e3a57 37
c60ee3a2 38 start_time = int(time.time() * 1000)
39
273762c8 40 def dl_fragment(url, data=None, headers=None):
3d2623a8 41 http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
273762c8 42 return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57 43
c60ee3a2 44 def parse_actions_replay(live_chat_continuation):
c2603313 45 offset = continuation_id = click_tracking_params = None
c60ee3a2 46 processed_fragment = bytearray()
47 for action in live_chat_continuation.get('actions', []):
48 if 'replayChatItemAction' in action:
49 replay_chat_item_action = action['replayChatItemAction']
50 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
51 processed_fragment.extend(
0f06bcd7 52 json.dumps(action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 53 if offset is not None:
c2603313 54 continuation = try_get(
c60ee3a2 55 live_chat_continuation,
c2603313 56 lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
57 if continuation:
58 continuation_id = continuation.get('continuation')
59 click_tracking_params = continuation.get('clickTrackingParams')
c60ee3a2 60 self._append_fragment(ctx, processed_fragment)
c2603313 61 return continuation_id, offset, click_tracking_params
c60ee3a2 62
d534c452 63 def try_refresh_replay_beginning(live_chat_continuation):
64 # choose the second option that contains the unfiltered live chat replay
c2603313 65 refresh_continuation = try_get(
d534c452 66 live_chat_continuation,
c2603313 67 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
68 if refresh_continuation:
d534c452 69 # no data yet but required to call _append_fragment
70 self._append_fragment(ctx, b'')
c2603313 71 refresh_continuation_id = refresh_continuation.get('continuation')
72 offset = 0
73 click_tracking_params = refresh_continuation.get('trackingParams')
74 return refresh_continuation_id, offset, click_tracking_params
d534c452 75 return parse_actions_replay(live_chat_continuation)
76
c60ee3a2 77 live_offset = 0
78
79 def parse_actions_live(live_chat_continuation):
80 nonlocal live_offset
c2603313 81 continuation_id = click_tracking_params = None
c60ee3a2 82 processed_fragment = bytearray()
83 for action in live_chat_continuation.get('actions', []):
84 timestamp = self.parse_live_timestamp(action)
85 if timestamp is not None:
86 live_offset = timestamp - start_time
87 # compatibility with replay format
88 pseudo_action = {
89 'replayChatItemAction': {'actions': [action]},
90 'videoOffsetTimeMsec': str(live_offset),
91 'isLive': True,
92 }
93 processed_fragment.extend(
0f06bcd7 94 json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 95 continuation_data_getters = [
96 lambda x: x['continuations'][0]['invalidationContinuationData'],
97 lambda x: x['continuations'][0]['timedContinuationData'],
98 ]
99 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
100 if continuation_data:
101 continuation_id = continuation_data.get('continuation')
c2603313 102 click_tracking_params = continuation_data.get('clickTrackingParams')
c60ee3a2 103 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
104 if timeout_ms is not None:
105 time.sleep(timeout_ms / 1000)
106 self._append_fragment(ctx, processed_fragment)
c2603313 107 return continuation_id, live_offset, click_tracking_params
c60ee3a2 108
d534c452 109 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
be5c1ae8 110 for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
82e3f6eb 111 try:
d71fd412 112 success = dl_fragment(url, request_data, headers)
82e3f6eb 113 if not success:
c2603313 114 return False, None, None, None
d71fd412 115 raw_fragment = self._read_fragment(ctx)
d534c452 116 try:
11f9be09 117 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
d534c452 118 except RegexNotFoundError:
119 data = None
120 if not data:
121 data = json.loads(raw_fragment)
82e3f6eb 122 live_chat_continuation = try_get(
123 data,
124 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
be5c1ae8 125
126 func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
127 or frag_index == 1 and try_refresh_replay_beginning
128 or parse_actions_replay)
129 return (True, *func(live_chat_continuation))
3d2623a8 130 except HTTPError as err:
be5c1ae8 131 retry.error = err
132 continue
133 return False, None, None, None
82e3f6eb 134
3ba7740d 135 self._prepare_and_start_frag_download(ctx, info_dict)
a78e3a57 136
d71fd412 137 success = dl_fragment(info_dict['url'])
a78e3a57 138 if not success:
139 return False
d71fd412 140 raw_fragment = self._read_fragment(ctx)
273762c8 141 try:
11f9be09 142 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 143 except RegexNotFoundError:
144 return False
82e3f6eb 145 continuation_id = try_get(
146 data,
147 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57 148 # no data yet but required to call _append_fragment
149 self._append_fragment(ctx, b'')
150
11f9be09 151 ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 152
153 if not ytcfg:
154 return False
155 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
156 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
157 if not api_key or not innertube_context:
158 return False
c60ee3a2 159 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
160 if info_dict['protocol'] == 'youtube_live_chat_replay':
161 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452 162 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2 163 elif info_dict['protocol'] == 'youtube_live_chat':
164 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452 165 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8 166
82e3f6eb 167 frag_index = offset = 0
c2603313 168 click_tracking_params = None
a78e3a57 169 while continuation_id is not None:
82e3f6eb 170 frag_index += 1
273762c8 171 request_data = {
172 'context': innertube_context,
173 'continuation': continuation_id,
174 }
175 if frag_index > 1:
176 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
c2603313 177 if click_tracking_params:
178 request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
99e9e001 179 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
d534c452 180 headers.update({'content-type': 'application/json'})
0f06bcd7 181 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
c2603313 182 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
d534c452 183 url, frag_index, fragment_request_data, headers)
184 else:
c2603313 185 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
186 chat_page_url, frag_index)
82e3f6eb 187 if not success:
188 return False
189 if test:
a78e3a57 190 break
191
814bba39 192 return self._finish_frag_download(ctx, info_dict)
c60ee3a2 193
194 @staticmethod
195 def parse_live_timestamp(action):
196 action_content = dict_get(
197 action,
198 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
199 if not isinstance(action_content, dict):
200 return None
201 item = dict_get(action_content, ['item', 'bannerRenderer'])
202 if not isinstance(item, dict):
203 return None
204 renderer = dict_get(item, [
205 # text
206 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
207 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
208 # ticker
209 'liveChatTickerPaidMessageItemRenderer',
210 'liveChatTickerSponsorItemRenderer',
211 # banner
212 'liveChatBannerRenderer',
213 ])
214 if not isinstance(renderer, dict):
215 return None
216 parent_item_getters = [
217 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
218 lambda x: x['contents'],
219 ]
220 parent_item = try_get(renderer, parent_item_getters, dict)
221 if parent_item:
222 renderer = dict_get(parent_item, [
223 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
224 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
225 ])
226 if not isinstance(renderer, dict):
227 return None
228 return int_or_none(renderer.get('timestampUsec'), 1000)