]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/youtube_live_chat.py
[TBS] Support livestreams (#448)
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
CommitLineData
a78e3a57 1from __future__ import division, unicode_literals
2
a78e3a57 3import json
c60ee3a2 4import time
a78e3a57 5
6from .fragment import FragmentFD
82e3f6eb 7from ..compat import compat_urllib_error
273762c8 8from ..utils import (
9 try_get,
c60ee3a2 10 dict_get,
11 int_or_none,
273762c8 12 RegexNotFoundError,
13)
82e3f6eb 14from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
a78e3a57 15
16
c60ee3a2 17class YoutubeLiveChatFD(FragmentFD):
18 """ Downloads YouTube live chats fragment by fragment """
a78e3a57 19
c60ee3a2 20 FD_NAME = 'youtube_live_chat'
a78e3a57 21
22 def real_download(self, filename, info_dict):
23 video_id = info_dict['video_id']
24 self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
25
82e3f6eb 26 fragment_retries = self.params.get('fragment_retries', 0)
a78e3a57 27 test = self.params.get('test', False)
28
29 ctx = {
30 'filename': filename,
31 'live': True,
32 'total_frags': None,
33 }
34
273762c8 35 ie = YT_BaseIE(self.ydl)
a78e3a57 36
c60ee3a2 37 start_time = int(time.time() * 1000)
38
273762c8 39 def dl_fragment(url, data=None, headers=None):
40 http_headers = info_dict.get('http_headers', {})
41 if headers:
42 http_headers = http_headers.copy()
43 http_headers.update(headers)
44 return self._download_fragment(ctx, url, info_dict, http_headers, data)
a78e3a57 45
c60ee3a2 46 def parse_actions_replay(live_chat_continuation):
47 offset = continuation_id = None
48 processed_fragment = bytearray()
49 for action in live_chat_continuation.get('actions', []):
50 if 'replayChatItemAction' in action:
51 replay_chat_item_action = action['replayChatItemAction']
52 offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
53 processed_fragment.extend(
54 json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
55 if offset is not None:
56 continuation_id = try_get(
57 live_chat_continuation,
58 lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
59 self._append_fragment(ctx, processed_fragment)
60 return continuation_id, offset
61
d534c452 62 def try_refresh_replay_beginning(live_chat_continuation):
63 # choose the second option that contains the unfiltered live chat replay
64 refresh_continuation_id = try_get(
65 live_chat_continuation,
66 lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData']['continuation'], str)
67 if refresh_continuation_id:
68 # no data yet but required to call _append_fragment
69 self._append_fragment(ctx, b'')
70 return refresh_continuation_id, 0
71 return parse_actions_replay(live_chat_continuation)
72
c60ee3a2 73 live_offset = 0
74
75 def parse_actions_live(live_chat_continuation):
76 nonlocal live_offset
77 continuation_id = None
78 processed_fragment = bytearray()
79 for action in live_chat_continuation.get('actions', []):
80 timestamp = self.parse_live_timestamp(action)
81 if timestamp is not None:
82 live_offset = timestamp - start_time
83 # compatibility with replay format
84 pseudo_action = {
85 'replayChatItemAction': {'actions': [action]},
86 'videoOffsetTimeMsec': str(live_offset),
87 'isLive': True,
88 }
89 processed_fragment.extend(
90 json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
91 continuation_data_getters = [
92 lambda x: x['continuations'][0]['invalidationContinuationData'],
93 lambda x: x['continuations'][0]['timedContinuationData'],
94 ]
95 continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
96 if continuation_data:
97 continuation_id = continuation_data.get('continuation')
98 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
99 if timeout_ms is not None:
100 time.sleep(timeout_ms / 1000)
101 self._append_fragment(ctx, processed_fragment)
102 return continuation_id, live_offset
103
d534c452 104 def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
82e3f6eb 105 count = 0
106 while count <= fragment_retries:
107 try:
c60ee3a2 108 success, raw_fragment = dl_fragment(url, request_data, headers)
82e3f6eb 109 if not success:
110 return False, None, None
d534c452 111 try:
112 data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
113 except RegexNotFoundError:
114 data = None
115 if not data:
116 data = json.loads(raw_fragment)
82e3f6eb 117 live_chat_continuation = try_get(
118 data,
119 lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
d534c452 120 if info_dict['protocol'] == 'youtube_live_chat_replay':
121 if frag_index == 1:
122 continuation_id, offset = try_refresh_replay_beginning(live_chat_continuation)
123 else:
124 continuation_id, offset = parse_actions_replay(live_chat_continuation)
125 elif info_dict['protocol'] == 'youtube_live_chat':
126 continuation_id, offset = parse_actions_live(live_chat_continuation)
82e3f6eb 127 return True, continuation_id, offset
128 except compat_urllib_error.HTTPError as err:
129 count += 1
130 if count <= fragment_retries:
131 self.report_retry_fragment(err, frag_index, count, fragment_retries)
132 if count > fragment_retries:
133 self.report_error('giving up after %s fragment retries' % fragment_retries)
134 return False, None, None
135
a78e3a57 136 self._prepare_and_start_frag_download(ctx)
137
83b20a97 138 success, raw_fragment = dl_fragment(info_dict['url'])
a78e3a57 139 if not success:
140 return False
273762c8 141 try:
142 data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
143 except RegexNotFoundError:
144 return False
82e3f6eb 145 continuation_id = try_get(
146 data,
147 lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
a78e3a57 148 # no data yet but required to call _append_fragment
149 self._append_fragment(ctx, b'')
150
273762c8 151 ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
152
153 if not ytcfg:
154 return False
155 api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
156 innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
157 if not api_key or not innertube_context:
158 return False
c60ee3a2 159 visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
160 if info_dict['protocol'] == 'youtube_live_chat_replay':
161 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
d534c452 162 chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
c60ee3a2 163 elif info_dict['protocol'] == 'youtube_live_chat':
164 url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
d534c452 165 chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
273762c8 166
82e3f6eb 167 frag_index = offset = 0
a78e3a57 168 while continuation_id is not None:
82e3f6eb 169 frag_index += 1
273762c8 170 request_data = {
171 'context': innertube_context,
172 'continuation': continuation_id,
173 }
174 if frag_index > 1:
175 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
d534c452 176 headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
177 headers.update({'content-type': 'application/json'})
178 fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
179 success, continuation_id, offset = download_and_parse_fragment(
180 url, frag_index, fragment_request_data, headers)
181 else:
182 success, continuation_id, offset = download_and_parse_fragment(chat_page_url, frag_index)
82e3f6eb 183 if not success:
184 return False
185 if test:
a78e3a57 186 break
187
188 self._finish_frag_download(ctx)
a78e3a57 189 return True
c60ee3a2 190
191 @staticmethod
192 def parse_live_timestamp(action):
193 action_content = dict_get(
194 action,
195 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
196 if not isinstance(action_content, dict):
197 return None
198 item = dict_get(action_content, ['item', 'bannerRenderer'])
199 if not isinstance(item, dict):
200 return None
201 renderer = dict_get(item, [
202 # text
203 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
204 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
205 # ticker
206 'liveChatTickerPaidMessageItemRenderer',
207 'liveChatTickerSponsorItemRenderer',
208 # banner
209 'liveChatBannerRenderer',
210 ])
211 if not isinstance(renderer, dict):
212 return None
213 parent_item_getters = [
214 lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
215 lambda x: x['contents'],
216 ]
217 parent_item = try_get(renderer, parent_item_getters, dict)
218 if parent_item:
219 renderer = dict_get(parent_item, [
220 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
221 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
222 ])
223 if not isinstance(renderer, dict):
224 return None
225 return int_or_none(renderer.get('timestampUsec'), 1000)