]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/youtube_live_chat.py
[compat] Remove more functions
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
CommitLineData
a78e3a57 1import json
c60ee3a2 2import time
ac668111 3import urllib.error
a78e3a57 4
5from .fragment import FragmentFD
f8271158 6from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
a78e3a57 7
8
c60ee3a2 9class YoutubeLiveChatFD(FragmentFD):
10 """ Downloads YouTube live chats fragment by fragment """
a78e3a57 11
a78e3a57 12 def real_download(self, filename, info_dict):
13 video_id = info_dict['video_id']
14 self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
592b7485 15 if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
08d30158 16 self.report_warning('Live chat download runs until the livestream ends. '
17 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
a78e3a57 18
82e3f6eb 19 fragment_retries = self.params.get('fragment_retries', 0)
a78e3a57 20 test = self.params.get('test', False)
21
22 ctx = {
23 'filename': filename,
24 'live': True,
25 'total_frags': None,
26 }
27
c487cf00 28 from ..extractor.youtube import YoutubeBaseInfoExtractor
29
30 ie = YoutubeBaseInfoExtractor(self.ydl)
a78e3a57 31
c60ee3a2 32 start_time = int(time.time() * 1000)
33
273762c8 34 def dl_fragment(url, data=None, headers=None):
35 http_headers = info_dict.get('http_headers', {})
36 if headers:
37 http_headers = http_headers.copy()
38 http_headers.update(headers)
39 return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57 40
c60ee3a2 41 def parse_actions_replay(live_chat_continuation):
c2603313 42 offset = continuation_id = click_tracking_params = None
c60ee3a2 43 processed_fragment = bytearray()
44 for action in live_chat_continuation.get('actions', []):
45 if 'replayChatItemAction' in action:
46 replay_chat_item_action = action['replayChatItemAction']
47 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
48 processed_fragment.extend(
0f06bcd7 49 json.dumps(action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 50 if offset is not None:
c2603313 51 continuation = try_get(
c60ee3a2 52 live_chat_continuation,
c2603313 53 lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
54 if continuation:
55 continuation_id = continuation.get('continuation')
56 click_tracking_params = continuation.get('clickTrackingParams')
c60ee3a2 57 self._append_fragment(ctx, processed_fragment)
c2603313 58 return continuation_id, offset, click_tracking_params
c60ee3a2 59
d534c452 60 def try_refresh_replay_beginning(live_chat_continuation):
61 # choose the second option that contains the unfiltered live chat replay
c2603313 62 refresh_continuation = try_get(
d534c452 63 live_chat_continuation,
c2603313 64 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
65 if refresh_continuation:
d534c452 66 # no data yet but required to call _append_fragment
67 self._append_fragment(ctx, b'')
c2603313 68 refresh_continuation_id = refresh_continuation.get('continuation')
69 offset = 0
70 click_tracking_params = refresh_continuation.get('trackingParams')
71 return refresh_continuation_id, offset, click_tracking_params
d534c452 72 return parse_actions_replay(live_chat_continuation)
73
c60ee3a2 74 live_offset = 0
75
76 def parse_actions_live(live_chat_continuation):
77 nonlocal live_offset
c2603313 78 continuation_id = click_tracking_params = None
c60ee3a2 79 processed_fragment = bytearray()
80 for action in live_chat_continuation.get('actions', []):
81 timestamp = self.parse_live_timestamp(action)
82 if timestamp is not None:
83 live_offset = timestamp - start_time
84 # compatibility with replay format
85 pseudo_action = {
86 'replayChatItemAction': {'actions': [action]},
87 'videoOffsetTimeMsec': str(live_offset),
88 'isLive': True,
89 }
90 processed_fragment.extend(
0f06bcd7 91 json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 92 continuation_data_getters = [
93 lambda x: x['continuations'][0]['invalidationContinuationData'],
94 lambda x: x['continuations'][0]['timedContinuationData'],
95 ]
96 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
97 if continuation_data:
98 continuation_id = continuation_data.get('continuation')
c2603313 99 click_tracking_params = continuation_data.get('clickTrackingParams')
c60ee3a2 100 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
101 if timeout_ms is not None:
102 time.sleep(timeout_ms / 1000)
103 self._append_fragment(ctx, processed_fragment)
c2603313 104 return continuation_id, live_offset, click_tracking_params
c60ee3a2 105
d534c452 106 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
82e3f6eb 107 count = 0
108 while count <= fragment_retries:
109 try:
d71fd412 110 success = dl_fragment(url, request_data, headers)
82e3f6eb 111 if not success:
c2603313 112 return False, None, None, None
d71fd412 113 raw_fragment = self._read_fragment(ctx)
d534c452 114 try:
11f9be09 115 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
d534c452 116 except RegexNotFoundError:
117 data = None
118 if not data:
119 data = json.loads(raw_fragment)
82e3f6eb 120 live_chat_continuation = try_get(
121 data,
122 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
d534c452 123 if info_dict['protocol'] == 'youtube_live_chat_replay':
124 if frag_index == 1:
c2603313 125 continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
d534c452 126 else:
c2603313 127 continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
d534c452 128 elif info_dict['protocol'] == 'youtube_live_chat':
c2603313 129 continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
130 return True, continuation_id, offset, click_tracking_params
ac668111 131 except urllib.error.HTTPError as err:
82e3f6eb 132 count += 1
133 if count <= fragment_retries:
134 self.report_retry_fragment(err, frag_index, count, fragment_retries)
135 if count > fragment_retries:
136 self.report_error('giving up after %s fragment retries' % fragment_retries)
c2603313 137 return False, None, None, None
82e3f6eb 138
3ba7740d 139 self._prepare_and_start_frag_download(ctx, info_dict)
a78e3a57 140
d71fd412 141 success = dl_fragment(info_dict['url'])
a78e3a57 142 if not success:
143 return False
d71fd412 144 raw_fragment = self._read_fragment(ctx)
273762c8 145 try:
11f9be09 146 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 147 except RegexNotFoundError:
148 return False
82e3f6eb 149 continuation_id = try_get(
150 data,
151 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57 152 # no data yet but required to call _append_fragment
153 self._append_fragment(ctx, b'')
154
11f9be09 155 ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 156
157 if not ytcfg:
158 return False
159 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
160 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
161 if not api_key or not innertube_context:
162 return False
c60ee3a2 163 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
164 if info_dict['protocol'] == 'youtube_live_chat_replay':
165 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452 166 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2 167 elif info_dict['protocol'] == 'youtube_live_chat':
168 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452 169 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8 170
82e3f6eb 171 frag_index = offset = 0
c2603313 172 click_tracking_params = None
a78e3a57 173 while continuation_id is not None:
82e3f6eb 174 frag_index += 1
273762c8 175 request_data = {
176 'context': innertube_context,
177 'continuation': continuation_id,
178 }
179 if frag_index > 1:
180 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
c2603313 181 if click_tracking_params:
182 request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
99e9e001 183 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
d534c452 184 headers.update({'content-type': 'application/json'})
0f06bcd7 185 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
c2603313 186 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
d534c452 187 url, frag_index, fragment_request_data, headers)
188 else:
c2603313 189 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
190 chat_page_url, frag_index)
82e3f6eb 191 if not success:
192 return False
193 if test:
a78e3a57 194 break
195
3ba7740d 196 self._finish_frag_download(ctx, info_dict)
a78e3a57 197 return True
c60ee3a2 198
199 @staticmethod
200 def parse_live_timestamp(action):
201 action_content = dict_get(
202 action,
203 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
204 if not isinstance(action_content, dict):
205 return None
206 item = dict_get(action_content, ['item', 'bannerRenderer'])
207 if not isinstance(item, dict):
208 return None
209 renderer = dict_get(item, [
210 # text
211 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
212 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
213 # ticker
214 'liveChatTickerPaidMessageItemRenderer',
215 'liveChatTickerSponsorItemRenderer',
216 # banner
217 'liveChatBannerRenderer',
218 ])
219 if not isinstance(renderer, dict):
220 return None
221 parent_item_getters = [
222 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
223 lambda x: x['contents'],
224 ]
225 parent_item = try_get(renderer, parent_item_getters, dict)
226 if parent_item:
227 renderer = dict_get(parent_item, [
228 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
229 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
230 ])
231 if not isinstance(renderer, dict):
232 return None
233 return int_or_none(renderer.get('timestampUsec'), 1000)