]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/youtube_live_chat.py
[cleanup] Minor fixes (See desc)
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
CommitLineData
a78e3a57 1import json
c60ee3a2 2import time
a78e3a57 3
4from .fragment import FragmentFD
82e3f6eb 5from ..compat import compat_urllib_error
82e3f6eb 6from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
f8271158 7from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
a78e3a57 8
9
c60ee3a2 10class YoutubeLiveChatFD(FragmentFD):
11 """ Downloads YouTube live chats fragment by fragment """
a78e3a57 12
c60ee3a2 13 FD_NAME = 'youtube_live_chat'
a78e3a57 14
15 def real_download(self, filename, info_dict):
16 video_id = info_dict['video_id']
17 self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
592b7485 18 if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
08d30158 19 self.report_warning('Live chat download runs until the livestream ends. '
20 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
a78e3a57 21
82e3f6eb 22 fragment_retries = self.params.get('fragment_retries', 0)
a78e3a57 23 test = self.params.get('test', False)
24
25 ctx = {
26 'filename': filename,
27 'live': True,
28 'total_frags': None,
29 }
30
273762c8 31 ie = YT_BaseIE(self.ydl)
a78e3a57 32
c60ee3a2 33 start_time = int(time.time() * 1000)
34
273762c8 35 def dl_fragment(url, data=None, headers=None):
36 http_headers = info_dict.get('http_headers', {})
37 if headers:
38 http_headers = http_headers.copy()
39 http_headers.update(headers)
40 return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57 41
c60ee3a2 42 def parse_actions_replay(live_chat_continuation):
c2603313 43 offset = continuation_id = click_tracking_params = None
c60ee3a2 44 processed_fragment = bytearray()
45 for action in live_chat_continuation.get('actions', []):
46 if 'replayChatItemAction' in action:
47 replay_chat_item_action = action['replayChatItemAction']
48 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
49 processed_fragment.extend(
0f06bcd7 50 json.dumps(action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 51 if offset is not None:
c2603313 52 continuation = try_get(
c60ee3a2 53 live_chat_continuation,
c2603313 54 lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
55 if continuation:
56 continuation_id = continuation.get('continuation')
57 click_tracking_params = continuation.get('clickTrackingParams')
c60ee3a2 58 self._append_fragment(ctx, processed_fragment)
c2603313 59 return continuation_id, offset, click_tracking_params
c60ee3a2 60
d534c452 61 def try_refresh_replay_beginning(live_chat_continuation):
62 # choose the second option that contains the unfiltered live chat replay
c2603313 63 refresh_continuation = try_get(
d534c452 64 live_chat_continuation,
c2603313 65 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
66 if refresh_continuation:
d534c452 67 # no data yet but required to call _append_fragment
68 self._append_fragment(ctx, b'')
c2603313 69 refresh_continuation_id = refresh_continuation.get('continuation')
70 offset = 0
71 click_tracking_params = refresh_continuation.get('trackingParams')
72 return refresh_continuation_id, offset, click_tracking_params
d534c452 73 return parse_actions_replay(live_chat_continuation)
74
c60ee3a2 75 live_offset = 0
76
77 def parse_actions_live(live_chat_continuation):
78 nonlocal live_offset
c2603313 79 continuation_id = click_tracking_params = None
c60ee3a2 80 processed_fragment = bytearray()
81 for action in live_chat_continuation.get('actions', []):
82 timestamp = self.parse_live_timestamp(action)
83 if timestamp is not None:
84 live_offset = timestamp - start_time
85 # compatibility with replay format
86 pseudo_action = {
87 'replayChatItemAction': {'actions': [action]},
88 'videoOffsetTimeMsec': str(live_offset),
89 'isLive': True,
90 }
91 processed_fragment.extend(
0f06bcd7 92 json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
c60ee3a2 93 continuation_data_getters = [
94 lambda x: x['continuations'][0]['invalidationContinuationData'],
95 lambda x: x['continuations'][0]['timedContinuationData'],
96 ]
97 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
98 if continuation_data:
99 continuation_id = continuation_data.get('continuation')
c2603313 100 click_tracking_params = continuation_data.get('clickTrackingParams')
c60ee3a2 101 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
102 if timeout_ms is not None:
103 time.sleep(timeout_ms / 1000)
104 self._append_fragment(ctx, processed_fragment)
c2603313 105 return continuation_id, live_offset, click_tracking_params
c60ee3a2 106
d534c452 107 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
82e3f6eb 108 count = 0
109 while count <= fragment_retries:
110 try:
d71fd412 111 success = dl_fragment(url, request_data, headers)
82e3f6eb 112 if not success:
c2603313 113 return False, None, None, None
d71fd412 114 raw_fragment = self._read_fragment(ctx)
d534c452 115 try:
11f9be09 116 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
d534c452 117 except RegexNotFoundError:
118 data = None
119 if not data:
120 data = json.loads(raw_fragment)
82e3f6eb 121 live_chat_continuation = try_get(
122 data,
123 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
d534c452 124 if info_dict['protocol'] == 'youtube_live_chat_replay':
125 if frag_index == 1:
c2603313 126 continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
d534c452 127 else:
c2603313 128 continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
d534c452 129 elif info_dict['protocol'] == 'youtube_live_chat':
c2603313 130 continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
131 return True, continuation_id, offset, click_tracking_params
82e3f6eb 132 except compat_urllib_error.HTTPError as err:
133 count += 1
134 if count <= fragment_retries:
135 self.report_retry_fragment(err, frag_index, count, fragment_retries)
136 if count > fragment_retries:
137 self.report_error('giving up after %s fragment retries' % fragment_retries)
c2603313 138 return False, None, None, None
82e3f6eb 139
3ba7740d 140 self._prepare_and_start_frag_download(ctx, info_dict)
a78e3a57 141
d71fd412 142 success = dl_fragment(info_dict['url'])
a78e3a57 143 if not success:
144 return False
d71fd412 145 raw_fragment = self._read_fragment(ctx)
273762c8 146 try:
11f9be09 147 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 148 except RegexNotFoundError:
149 return False
82e3f6eb 150 continuation_id = try_get(
151 data,
152 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57 153 # no data yet but required to call _append_fragment
154 self._append_fragment(ctx, b'')
155
11f9be09 156 ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 157
158 if not ytcfg:
159 return False
160 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
161 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
162 if not api_key or not innertube_context:
163 return False
c60ee3a2 164 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
165 if info_dict['protocol'] == 'youtube_live_chat_replay':
166 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452 167 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2 168 elif info_dict['protocol'] == 'youtube_live_chat':
169 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452 170 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8 171
82e3f6eb 172 frag_index = offset = 0
c2603313 173 click_tracking_params = None
a78e3a57 174 while continuation_id is not None:
82e3f6eb 175 frag_index += 1
273762c8 176 request_data = {
177 'context': innertube_context,
178 'continuation': continuation_id,
179 }
180 if frag_index > 1:
181 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
c2603313 182 if click_tracking_params:
183 request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
99e9e001 184 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
d534c452 185 headers.update({'content-type': 'application/json'})
0f06bcd7 186 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
c2603313 187 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
d534c452 188 url, frag_index, fragment_request_data, headers)
189 else:
c2603313 190 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
191 chat_page_url, frag_index)
82e3f6eb 192 if not success:
193 return False
194 if test:
a78e3a57 195 break
196
3ba7740d 197 self._finish_frag_download(ctx, info_dict)
a78e3a57 198 return True
c60ee3a2 199
200 @staticmethod
201 def parse_live_timestamp(action):
202 action_content = dict_get(
203 action,
204 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
205 if not isinstance(action_content, dict):
206 return None
207 item = dict_get(action_content, ['item', 'bannerRenderer'])
208 if not isinstance(item, dict):
209 return None
210 renderer = dict_get(item, [
211 # text
212 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
213 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
214 # ticker
215 'liveChatTickerPaidMessageItemRenderer',
216 'liveChatTickerSponsorItemRenderer',
217 # banner
218 'liveChatBannerRenderer',
219 ])
220 if not isinstance(renderer, dict):
221 return None
222 parent_item_getters = [
223 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
224 lambda x: x['contents'],
225 ]
226 parent_item = try_get(renderer, parent_item_getters, dict)
227 if parent_item:
228 renderer = dict_get(parent_item, [
229 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
230 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
231 ])
232 if not isinstance(renderer, dict):
233 return None
234 return int_or_none(renderer.get('timestampUsec'), 1000)