]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/youtube_live_chat.py
[cleanup, docs] Misc cleanup
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
CommitLineData
a78e3a57 1from __future__ import division, unicode_literals
2
a78e3a57 3import json
c60ee3a2 4import time
a78e3a57 5
6from .fragment import FragmentFD
82e3f6eb 7from ..compat import compat_urllib_error
273762c8 8from ..utils import (
9 try_get,
c60ee3a2 10 dict_get,
11 int_or_none,
273762c8 12 RegexNotFoundError,
13)
82e3f6eb 14from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
a78e3a57 15
16
c60ee3a2 17class YoutubeLiveChatFD(FragmentFD):
18 """ Downloads YouTube live chats fragment by fragment """
a78e3a57 19
c60ee3a2 20 FD_NAME = 'youtube_live_chat'
a78e3a57 21
22 def real_download(self, filename, info_dict):
23 video_id = info_dict['video_id']
24 self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
08d30158 25 if not self.params.get('skip_download'):
26 self.report_warning('Live chat download runs until the livestream ends. '
27 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
a78e3a57 28
82e3f6eb 29 fragment_retries = self.params.get('fragment_retries', 0)
a78e3a57 30 test = self.params.get('test', False)
31
32 ctx = {
33 'filename': filename,
34 'live': True,
35 'total_frags': None,
36 }
37
273762c8 38 ie = YT_BaseIE(self.ydl)
a78e3a57 39
c60ee3a2 40 start_time = int(time.time() * 1000)
41
273762c8 42 def dl_fragment(url, data=None, headers=None):
43 http_headers = info_dict.get('http_headers', {})
44 if headers:
45 http_headers = http_headers.copy()
46 http_headers.update(headers)
47 return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57 48
c60ee3a2 49 def parse_actions_replay(live_chat_continuation):
c2603313 50 offset = continuation_id = click_tracking_params = None
c60ee3a2 51 processed_fragment = bytearray()
52 for action in live_chat_continuation.get('actions', []):
53 if 'replayChatItemAction' in action:
54 replay_chat_item_action = action['replayChatItemAction']
55 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
56 processed_fragment.extend(
57 json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
58 if offset is not None:
c2603313 59 continuation = try_get(
c60ee3a2 60 live_chat_continuation,
c2603313 61 lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
62 if continuation:
63 continuation_id = continuation.get('continuation')
64 click_tracking_params = continuation.get('clickTrackingParams')
c60ee3a2 65 self._append_fragment(ctx, processed_fragment)
c2603313 66 return continuation_id, offset, click_tracking_params
c60ee3a2 67
d534c452 68 def try_refresh_replay_beginning(live_chat_continuation):
69 # choose the second option that contains the unfiltered live chat replay
c2603313 70 refresh_continuation = try_get(
d534c452 71 live_chat_continuation,
c2603313 72 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
73 if refresh_continuation:
d534c452 74 # no data yet but required to call _append_fragment
75 self._append_fragment(ctx, b'')
c2603313 76 refresh_continuation_id = refresh_continuation.get('continuation')
77 offset = 0
78 click_tracking_params = refresh_continuation.get('trackingParams')
79 return refresh_continuation_id, offset, click_tracking_params
d534c452 80 return parse_actions_replay(live_chat_continuation)
81
c60ee3a2 82 live_offset = 0
83
84 def parse_actions_live(live_chat_continuation):
85 nonlocal live_offset
c2603313 86 continuation_id = click_tracking_params = None
c60ee3a2 87 processed_fragment = bytearray()
88 for action in live_chat_continuation.get('actions', []):
89 timestamp = self.parse_live_timestamp(action)
90 if timestamp is not None:
91 live_offset = timestamp - start_time
92 # compatibility with replay format
93 pseudo_action = {
94 'replayChatItemAction': {'actions': [action]},
95 'videoOffsetTimeMsec': str(live_offset),
96 'isLive': True,
97 }
98 processed_fragment.extend(
99 json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
100 continuation_data_getters = [
101 lambda x: x['continuations'][0]['invalidationContinuationData'],
102 lambda x: x['continuations'][0]['timedContinuationData'],
103 ]
104 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
105 if continuation_data:
106 continuation_id = continuation_data.get('continuation')
c2603313 107 click_tracking_params = continuation_data.get('clickTrackingParams')
c60ee3a2 108 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
109 if timeout_ms is not None:
110 time.sleep(timeout_ms / 1000)
111 self._append_fragment(ctx, processed_fragment)
c2603313 112 return continuation_id, live_offset, click_tracking_params
c60ee3a2 113
d534c452 114 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
82e3f6eb 115 count = 0
116 while count <= fragment_retries:
117 try:
c60ee3a2 118 success, raw_fragment = dl_fragment(url, request_data, headers)
82e3f6eb 119 if not success:
c2603313 120 return False, None, None, None
d534c452 121 try:
11f9be09 122 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
d534c452 123 except RegexNotFoundError:
124 data = None
125 if not data:
126 data = json.loads(raw_fragment)
82e3f6eb 127 live_chat_continuation = try_get(
128 data,
129 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
d534c452 130 if info_dict['protocol'] == 'youtube_live_chat_replay':
131 if frag_index == 1:
c2603313 132 continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
d534c452 133 else:
c2603313 134 continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
d534c452 135 elif info_dict['protocol'] == 'youtube_live_chat':
c2603313 136 continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
137 return True, continuation_id, offset, click_tracking_params
82e3f6eb 138 except compat_urllib_error.HTTPError as err:
139 count += 1
140 if count <= fragment_retries:
141 self.report_retry_fragment(err, frag_index, count, fragment_retries)
142 if count > fragment_retries:
143 self.report_error('giving up after %s fragment retries' % fragment_retries)
c2603313 144 return False, None, None, None
82e3f6eb 145
3ba7740d 146 self._prepare_and_start_frag_download(ctx, info_dict)
a78e3a57 147
83b20a97 148 success, raw_fragment = dl_fragment(info_dict['url'])
a78e3a57 149 if not success:
150 return False
273762c8 151 try:
11f9be09 152 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 153 except RegexNotFoundError:
154 return False
82e3f6eb 155 continuation_id = try_get(
156 data,
157 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57 158 # no data yet but required to call _append_fragment
159 self._append_fragment(ctx, b'')
160
11f9be09 161 ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
273762c8 162
163 if not ytcfg:
164 return False
165 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
166 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
167 if not api_key or not innertube_context:
168 return False
c60ee3a2 169 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
170 if info_dict['protocol'] == 'youtube_live_chat_replay':
171 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452 172 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2 173 elif info_dict['protocol'] == 'youtube_live_chat':
174 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452 175 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8 176
82e3f6eb 177 frag_index = offset = 0
c2603313 178 click_tracking_params = None
a78e3a57 179 while continuation_id is not None:
82e3f6eb 180 frag_index += 1
273762c8 181 request_data = {
182 'context': innertube_context,
183 'continuation': continuation_id,
184 }
185 if frag_index > 1:
186 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
c2603313 187 if click_tracking_params:
188 request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
99e9e001 189 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
d534c452 190 headers.update({'content-type': 'application/json'})
191 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
c2603313 192 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
d534c452 193 url, frag_index, fragment_request_data, headers)
194 else:
c2603313 195 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
196 chat_page_url, frag_index)
82e3f6eb 197 if not success:
198 return False
199 if test:
a78e3a57 200 break
201
3ba7740d 202 self._finish_frag_download(ctx, info_dict)
a78e3a57 203 return True
c60ee3a2 204
205 @staticmethod
206 def parse_live_timestamp(action):
207 action_content = dict_get(
208 action,
209 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
210 if not isinstance(action_content, dict):
211 return None
212 item = dict_get(action_content, ['item', 'bannerRenderer'])
213 if not isinstance(item, dict):
214 return None
215 renderer = dict_get(item, [
216 # text
217 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
218 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
219 # ticker
220 'liveChatTickerPaidMessageItemRenderer',
221 'liveChatTickerSponsorItemRenderer',
222 # banner
223 'liveChatBannerRenderer',
224 ])
225 if not isinstance(renderer, dict):
226 return None
227 parent_item_getters = [
228 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
229 lambda x: x['contents'],
230 ]
231 parent_item = try_get(renderer, parent_item_getters, dict)
232 if parent_item:
233 renderer = dict_get(parent_item, [
234 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
235 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
236 ])
237 if not isinstance(renderer, dict):
238 return None
239 return int_or_none(renderer.get('timestampUsec'), 1000)