]> jfr.im git - yt-dlp.git/blob - yt_dlp/downloader/youtube_live_chat.py
[youtube] Cleanup authentication code (#786)
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
1 from __future__ import division, unicode_literals
2
3 import json
4 import time
5
6 from .fragment import FragmentFD
7 from ..compat import compat_urllib_error
8 from ..utils import (
9 try_get,
10 dict_get,
11 int_or_none,
12 RegexNotFoundError,
13 )
14 from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
15
16
17 class YoutubeLiveChatFD(FragmentFD):
18 """ Downloads YouTube live chats fragment by fragment """
19
20 FD_NAME = 'youtube_live_chat'
21
22 def real_download(self, filename, info_dict):
23 video_id = info_dict['video_id']
24 self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
25
26 fragment_retries = self.params.get('fragment_retries', 0)
27 test = self.params.get('test', False)
28
29 ctx = {
30 'filename': filename,
31 'live': True,
32 'total_frags': None,
33 }
34
35 ie = YT_BaseIE(self.ydl)
36
37 start_time = int(time.time() * 1000)
38
39 def dl_fragment(url, data=None, headers=None):
40 http_headers = info_dict.get('http_headers', {})
41 if headers:
42 http_headers = http_headers.copy()
43 http_headers.update(headers)
44 return self._download_fragment(ctx, url, info_dict, http_headers, data)
45
46 def parse_actions_replay(live_chat_continuation):
47 offset = continuation_id = click_tracking_params = None
48 processed_fragment = bytearray()
49 for action in live_chat_continuation.get('actions', []):
50 if 'replayChatItemAction' in action:
51 replay_chat_item_action = action['replayChatItemAction']
52 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
53 processed_fragment.extend(
54 json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
55 if offset is not None:
56 continuation = try_get(
57 live_chat_continuation,
58 lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
59 if continuation:
60 continuation_id = continuation.get('continuation')
61 click_tracking_params = continuation.get('clickTrackingParams')
62 self._append_fragment(ctx, processed_fragment)
63 return continuation_id, offset, click_tracking_params
64
65 def try_refresh_replay_beginning(live_chat_continuation):
66 # choose the second option that contains the unfiltered live chat replay
67 refresh_continuation = try_get(
68 live_chat_continuation,
69 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
70 if refresh_continuation:
71 # no data yet but required to call _append_fragment
72 self._append_fragment(ctx, b'')
73 refresh_continuation_id = refresh_continuation.get('continuation')
74 offset = 0
75 click_tracking_params = refresh_continuation.get('trackingParams')
76 return refresh_continuation_id, offset, click_tracking_params
77 return parse_actions_replay(live_chat_continuation)
78
79 live_offset = 0
80
81 def parse_actions_live(live_chat_continuation):
82 nonlocal live_offset
83 continuation_id = click_tracking_params = None
84 processed_fragment = bytearray()
85 for action in live_chat_continuation.get('actions', []):
86 timestamp = self.parse_live_timestamp(action)
87 if timestamp is not None:
88 live_offset = timestamp - start_time
89 # compatibility with replay format
90 pseudo_action = {
91 'replayChatItemAction': {'actions': [action]},
92 'videoOffsetTimeMsec': str(live_offset),
93 'isLive': True,
94 }
95 processed_fragment.extend(
96 json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
97 continuation_data_getters = [
98 lambda x: x['continuations'][0]['invalidationContinuationData'],
99 lambda x: x['continuations'][0]['timedContinuationData'],
100 ]
101 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
102 if continuation_data:
103 continuation_id = continuation_data.get('continuation')
104 click_tracking_params = continuation_data.get('clickTrackingParams')
105 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
106 if timeout_ms is not None:
107 time.sleep(timeout_ms / 1000)
108 self._append_fragment(ctx, processed_fragment)
109 return continuation_id, live_offset, click_tracking_params
110
111 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
112 count = 0
113 while count <= fragment_retries:
114 try:
115 success, raw_fragment = dl_fragment(url, request_data, headers)
116 if not success:
117 return False, None, None, None
118 try:
119 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
120 except RegexNotFoundError:
121 data = None
122 if not data:
123 data = json.loads(raw_fragment)
124 live_chat_continuation = try_get(
125 data,
126 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
127 if info_dict['protocol'] == 'youtube_live_chat_replay':
128 if frag_index == 1:
129 continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
130 else:
131 continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
132 elif info_dict['protocol'] == 'youtube_live_chat':
133 continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
134 return True, continuation_id, offset, click_tracking_params
135 except compat_urllib_error.HTTPError as err:
136 count += 1
137 if count <= fragment_retries:
138 self.report_retry_fragment(err, frag_index, count, fragment_retries)
139 if count > fragment_retries:
140 self.report_error('giving up after %s fragment retries' % fragment_retries)
141 return False, None, None, None
142
143 self._prepare_and_start_frag_download(ctx, info_dict)
144
145 success, raw_fragment = dl_fragment(info_dict['url'])
146 if not success:
147 return False
148 try:
149 data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
150 except RegexNotFoundError:
151 return False
152 continuation_id = try_get(
153 data,
154 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
155 # no data yet but required to call _append_fragment
156 self._append_fragment(ctx, b'')
157
158 ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
159
160 if not ytcfg:
161 return False
162 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
163 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
164 if not api_key or not innertube_context:
165 return False
166 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
167 if info_dict['protocol'] == 'youtube_live_chat_replay':
168 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
169 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
170 elif info_dict['protocol'] == 'youtube_live_chat':
171 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
172 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
173
174 frag_index = offset = 0
175 click_tracking_params = None
176 while continuation_id is not None:
177 frag_index += 1
178 request_data = {
179 'context': innertube_context,
180 'continuation': continuation_id,
181 }
182 if frag_index > 1:
183 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
184 if click_tracking_params:
185 request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
186 headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
187 headers.update({'content-type': 'application/json'})
188 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
189 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
190 url, frag_index, fragment_request_data, headers)
191 else:
192 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
193 chat_page_url, frag_index)
194 if not success:
195 return False
196 if test:
197 break
198
199 self._finish_frag_download(ctx, info_dict)
200 return True
201
202 @staticmethod
203 def parse_live_timestamp(action):
204 action_content = dict_get(
205 action,
206 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
207 if not isinstance(action_content, dict):
208 return None
209 item = dict_get(action_content, ['item', 'bannerRenderer'])
210 if not isinstance(item, dict):
211 return None
212 renderer = dict_get(item, [
213 # text
214 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
215 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
216 # ticker
217 'liveChatTickerPaidMessageItemRenderer',
218 'liveChatTickerSponsorItemRenderer',
219 # banner
220 'liveChatBannerRenderer',
221 ])
222 if not isinstance(renderer, dict):
223 return None
224 parent_item_getters = [
225 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
226 lambda x: x['contents'],
227 ]
228 parent_item = try_get(renderer, parent_item_getters, dict)
229 if parent_item:
230 renderer = dict_get(parent_item, [
231 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
232 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
233 ])
234 if not isinstance(renderer, dict):
235 return None
236 return int_or_none(renderer.get('timestampUsec'), 1000)