]>
Commit | Line | Data |
---|---|---|
a78e3a57 | 1 | from __future__ import division, unicode_literals |
2 | ||
a78e3a57 | 3 | import json |
c60ee3a2 | 4 | import time |
a78e3a57 | 5 | |
6 | from .fragment import FragmentFD | |
82e3f6eb | 7 | from ..compat import compat_urllib_error |
273762c8 | 8 | from ..utils import ( |
9 | try_get, | |
c60ee3a2 | 10 | dict_get, |
11 | int_or_none, | |
273762c8 | 12 | RegexNotFoundError, |
13 | ) | |
82e3f6eb | 14 | from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE |
a78e3a57 | 15 | |
16 | ||
c60ee3a2 | 17 | class YoutubeLiveChatFD(FragmentFD): |
18 | """ Downloads YouTube live chats fragment by fragment """ | |
a78e3a57 | 19 | |
c60ee3a2 | 20 | FD_NAME = 'youtube_live_chat' |
a78e3a57 | 21 | |
22 | def real_download(self, filename, info_dict): | |
23 | video_id = info_dict['video_id'] | |
24 | self.to_screen('[%s] Downloading live chat' % self.FD_NAME) | |
08d30158 | 25 | if not self.params.get('skip_download'): |
26 | self.report_warning('Live chat download runs until the livestream ends. ' | |
27 | 'If you wish to download the video simultaneously, run a separate yt-dlp instance') | |
a78e3a57 | 28 | |
82e3f6eb | 29 | fragment_retries = self.params.get('fragment_retries', 0) |
a78e3a57 | 30 | test = self.params.get('test', False) |
31 | ||
32 | ctx = { | |
33 | 'filename': filename, | |
34 | 'live': True, | |
35 | 'total_frags': None, | |
36 | } | |
37 | ||
273762c8 | 38 | ie = YT_BaseIE(self.ydl) |
a78e3a57 | 39 | |
c60ee3a2 | 40 | start_time = int(time.time() * 1000) |
41 | ||
273762c8 | 42 | def dl_fragment(url, data=None, headers=None): |
43 | http_headers = info_dict.get('http_headers', {}) | |
44 | if headers: | |
45 | http_headers = http_headers.copy() | |
46 | http_headers.update(headers) | |
47 | return self._download_fragment(ctx, url, info_dict, http_headers, data) | |
a78e3a57 | 48 | |
c60ee3a2 | 49 | def parse_actions_replay(live_chat_continuation): |
c2603313 | 50 | offset = continuation_id = click_tracking_params = None |
c60ee3a2 | 51 | processed_fragment = bytearray() |
52 | for action in live_chat_continuation.get('actions', []): | |
53 | if 'replayChatItemAction' in action: | |
54 | replay_chat_item_action = action['replayChatItemAction'] | |
55 | offset = int(replay_chat_item_action['videoOffsetTimeMsec']) | |
56 | processed_fragment.extend( | |
57 | json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') | |
58 | if offset is not None: | |
c2603313 | 59 | continuation = try_get( |
c60ee3a2 | 60 | live_chat_continuation, |
c2603313 | 61 | lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) |
62 | if continuation: | |
63 | continuation_id = continuation.get('continuation') | |
64 | click_tracking_params = continuation.get('clickTrackingParams') | |
c60ee3a2 | 65 | self._append_fragment(ctx, processed_fragment) |
c2603313 | 66 | return continuation_id, offset, click_tracking_params |
c60ee3a2 | 67 | |
d534c452 | 68 | def try_refresh_replay_beginning(live_chat_continuation): |
69 | # choose the second option that contains the unfiltered live chat replay | |
c2603313 | 70 | refresh_continuation = try_get( |
d534c452 | 71 | live_chat_continuation, |
c2603313 | 72 | lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) |
73 | if refresh_continuation: | |
d534c452 | 74 | # no data yet but required to call _append_fragment |
75 | self._append_fragment(ctx, b'') | |
c2603313 | 76 | refresh_continuation_id = refresh_continuation.get('continuation') |
77 | offset = 0 | |
78 | click_tracking_params = refresh_continuation.get('trackingParams') | |
79 | return refresh_continuation_id, offset, click_tracking_params | |
d534c452 | 80 | return parse_actions_replay(live_chat_continuation) |
81 | ||
c60ee3a2 | 82 | live_offset = 0 |
83 | ||
84 | def parse_actions_live(live_chat_continuation): | |
85 | nonlocal live_offset | |
c2603313 | 86 | continuation_id = click_tracking_params = None |
c60ee3a2 | 87 | processed_fragment = bytearray() |
88 | for action in live_chat_continuation.get('actions', []): | |
89 | timestamp = self.parse_live_timestamp(action) | |
90 | if timestamp is not None: | |
91 | live_offset = timestamp - start_time | |
92 | # compatibility with replay format | |
93 | pseudo_action = { | |
94 | 'replayChatItemAction': {'actions': [action]}, | |
95 | 'videoOffsetTimeMsec': str(live_offset), | |
96 | 'isLive': True, | |
97 | } | |
98 | processed_fragment.extend( | |
99 | json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') | |
100 | continuation_data_getters = [ | |
101 | lambda x: x['continuations'][0]['invalidationContinuationData'], | |
102 | lambda x: x['continuations'][0]['timedContinuationData'], | |
103 | ] | |
104 | continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) | |
105 | if continuation_data: | |
106 | continuation_id = continuation_data.get('continuation') | |
c2603313 | 107 | click_tracking_params = continuation_data.get('clickTrackingParams') |
c60ee3a2 | 108 | timeout_ms = int_or_none(continuation_data.get('timeoutMs')) |
109 | if timeout_ms is not None: | |
110 | time.sleep(timeout_ms / 1000) | |
111 | self._append_fragment(ctx, processed_fragment) | |
c2603313 | 112 | return continuation_id, live_offset, click_tracking_params |
c60ee3a2 | 113 | |
d534c452 | 114 | def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): |
82e3f6eb | 115 | count = 0 |
116 | while count <= fragment_retries: | |
117 | try: | |
c60ee3a2 | 118 | success, raw_fragment = dl_fragment(url, request_data, headers) |
82e3f6eb | 119 | if not success: |
c2603313 | 120 | return False, None, None, None |
d534c452 | 121 | try: |
11f9be09 | 122 | data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) |
d534c452 | 123 | except RegexNotFoundError: |
124 | data = None | |
125 | if not data: | |
126 | data = json.loads(raw_fragment) | |
82e3f6eb | 127 | live_chat_continuation = try_get( |
128 | data, | |
129 | lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} | |
d534c452 | 130 | if info_dict['protocol'] == 'youtube_live_chat_replay': |
131 | if frag_index == 1: | |
c2603313 | 132 | continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation) |
d534c452 | 133 | else: |
c2603313 | 134 | continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation) |
d534c452 | 135 | elif info_dict['protocol'] == 'youtube_live_chat': |
c2603313 | 136 | continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation) |
137 | return True, continuation_id, offset, click_tracking_params | |
82e3f6eb | 138 | except compat_urllib_error.HTTPError as err: |
139 | count += 1 | |
140 | if count <= fragment_retries: | |
141 | self.report_retry_fragment(err, frag_index, count, fragment_retries) | |
142 | if count > fragment_retries: | |
143 | self.report_error('giving up after %s fragment retries' % fragment_retries) | |
c2603313 | 144 | return False, None, None, None |
82e3f6eb | 145 | |
3ba7740d | 146 | self._prepare_and_start_frag_download(ctx, info_dict) |
a78e3a57 | 147 | |
83b20a97 | 148 | success, raw_fragment = dl_fragment(info_dict['url']) |
a78e3a57 | 149 | if not success: |
150 | return False | |
273762c8 | 151 | try: |
11f9be09 | 152 | data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) |
273762c8 | 153 | except RegexNotFoundError: |
154 | return False | |
82e3f6eb | 155 | continuation_id = try_get( |
156 | data, | |
157 | lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) | |
a78e3a57 | 158 | # no data yet but required to call _append_fragment |
159 | self._append_fragment(ctx, b'') | |
160 | ||
11f9be09 | 161 | ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) |
273762c8 | 162 | |
163 | if not ytcfg: | |
164 | return False | |
165 | api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) | |
166 | innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) | |
167 | if not api_key or not innertube_context: | |
168 | return False | |
c60ee3a2 | 169 | visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) |
170 | if info_dict['protocol'] == 'youtube_live_chat_replay': | |
171 | url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key | |
d534c452 | 172 | chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id |
c60ee3a2 | 173 | elif info_dict['protocol'] == 'youtube_live_chat': |
174 | url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key | |
d534c452 | 175 | chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id |
273762c8 | 176 | |
82e3f6eb | 177 | frag_index = offset = 0 |
c2603313 | 178 | click_tracking_params = None |
a78e3a57 | 179 | while continuation_id is not None: |
82e3f6eb | 180 | frag_index += 1 |
273762c8 | 181 | request_data = { |
182 | 'context': innertube_context, | |
183 | 'continuation': continuation_id, | |
184 | } | |
185 | if frag_index > 1: | |
186 | request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} | |
c2603313 | 187 | if click_tracking_params: |
188 | request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} | |
99e9e001 | 189 | headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) |
d534c452 | 190 | headers.update({'content-type': 'application/json'}) |
191 | fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' | |
c2603313 | 192 | success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( |
d534c452 | 193 | url, frag_index, fragment_request_data, headers) |
194 | else: | |
c2603313 | 195 | success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( |
196 | chat_page_url, frag_index) | |
82e3f6eb | 197 | if not success: |
198 | return False | |
199 | if test: | |
a78e3a57 | 200 | break |
201 | ||
3ba7740d | 202 | self._finish_frag_download(ctx, info_dict) |
a78e3a57 | 203 | return True |
c60ee3a2 | 204 | |
205 | @staticmethod | |
206 | def parse_live_timestamp(action): | |
207 | action_content = dict_get( | |
208 | action, | |
209 | ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) | |
210 | if not isinstance(action_content, dict): | |
211 | return None | |
212 | item = dict_get(action_content, ['item', 'bannerRenderer']) | |
213 | if not isinstance(item, dict): | |
214 | return None | |
215 | renderer = dict_get(item, [ | |
216 | # text | |
217 | 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', | |
218 | 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', | |
219 | # ticker | |
220 | 'liveChatTickerPaidMessageItemRenderer', | |
221 | 'liveChatTickerSponsorItemRenderer', | |
222 | # banner | |
223 | 'liveChatBannerRenderer', | |
224 | ]) | |
225 | if not isinstance(renderer, dict): | |
226 | return None | |
227 | parent_item_getters = [ | |
228 | lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], | |
229 | lambda x: x['contents'], | |
230 | ] | |
231 | parent_item = try_get(renderer, parent_item_getters, dict) | |
232 | if parent_item: | |
233 | renderer = dict_get(parent_item, [ | |
234 | 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', | |
235 | 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', | |
236 | ]) | |
237 | if not isinstance(renderer, dict): | |
238 | return None | |
239 | return int_or_none(renderer.get('timestampUsec'), 1000) |