]>
Commit | Line | Data |
---|---|---|
1 | import json | |
2 | import time | |
3 | ||
4 | from .fragment import FragmentFD | |
5 | from ..networking.exceptions import HTTPError | |
6 | from ..utils import ( | |
7 | RegexNotFoundError, | |
8 | RetryManager, | |
9 | dict_get, | |
10 | int_or_none, | |
11 | try_get, | |
12 | ) | |
13 | from ..utils.networking import HTTPHeaderDict | |
14 | ||
15 | ||
16 | class YoutubeLiveChatFD(FragmentFD): | |
17 | """ Downloads YouTube live chats fragment by fragment """ | |
18 | ||
19 | def real_download(self, filename, info_dict): | |
20 | video_id = info_dict['video_id'] | |
21 | self.to_screen(f'[{self.FD_NAME}] Downloading live chat') | |
22 | if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': | |
23 | self.report_warning('Live chat download runs until the livestream ends. ' | |
24 | 'If you wish to download the video simultaneously, run a separate yt-dlp instance') | |
25 | ||
26 | test = self.params.get('test', False) | |
27 | ||
28 | ctx = { | |
29 | 'filename': filename, | |
30 | 'live': True, | |
31 | 'total_frags': None, | |
32 | } | |
33 | ||
34 | from ..extractor.youtube import YoutubeBaseInfoExtractor | |
35 | ||
36 | ie = YoutubeBaseInfoExtractor(self.ydl) | |
37 | ||
38 | start_time = int(time.time() * 1000) | |
39 | ||
40 | def dl_fragment(url, data=None, headers=None): | |
41 | http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers) | |
42 | return self._download_fragment(ctx, url, info_dict, http_headers, data) | |
43 | ||
44 | def parse_actions_replay(live_chat_continuation): | |
45 | offset = continuation_id = click_tracking_params = None | |
46 | processed_fragment = bytearray() | |
47 | for action in live_chat_continuation.get('actions', []): | |
48 | if 'replayChatItemAction' in action: | |
49 | replay_chat_item_action = action['replayChatItemAction'] | |
50 | offset = int(replay_chat_item_action['videoOffsetTimeMsec']) | |
51 | processed_fragment.extend( | |
52 | json.dumps(action, ensure_ascii=False).encode() + b'\n') | |
53 | if offset is not None: | |
54 | continuation = try_get( | |
55 | live_chat_continuation, | |
56 | lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) | |
57 | if continuation: | |
58 | continuation_id = continuation.get('continuation') | |
59 | click_tracking_params = continuation.get('clickTrackingParams') | |
60 | self._append_fragment(ctx, processed_fragment) | |
61 | return continuation_id, offset, click_tracking_params | |
62 | ||
63 | def try_refresh_replay_beginning(live_chat_continuation): | |
64 | # choose the second option that contains the unfiltered live chat replay | |
65 | refresh_continuation = try_get( | |
66 | live_chat_continuation, | |
67 | lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) | |
68 | if refresh_continuation: | |
69 | # no data yet but required to call _append_fragment | |
70 | self._append_fragment(ctx, b'') | |
71 | refresh_continuation_id = refresh_continuation.get('continuation') | |
72 | offset = 0 | |
73 | click_tracking_params = refresh_continuation.get('trackingParams') | |
74 | return refresh_continuation_id, offset, click_tracking_params | |
75 | return parse_actions_replay(live_chat_continuation) | |
76 | ||
77 | live_offset = 0 | |
78 | ||
79 | def parse_actions_live(live_chat_continuation): | |
80 | nonlocal live_offset | |
81 | continuation_id = click_tracking_params = None | |
82 | processed_fragment = bytearray() | |
83 | for action in live_chat_continuation.get('actions', []): | |
84 | timestamp = self.parse_live_timestamp(action) | |
85 | if timestamp is not None: | |
86 | live_offset = timestamp - start_time | |
87 | # compatibility with replay format | |
88 | pseudo_action = { | |
89 | 'replayChatItemAction': {'actions': [action]}, | |
90 | 'videoOffsetTimeMsec': str(live_offset), | |
91 | 'isLive': True, | |
92 | } | |
93 | processed_fragment.extend( | |
94 | json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') | |
95 | continuation_data_getters = [ | |
96 | lambda x: x['continuations'][0]['invalidationContinuationData'], | |
97 | lambda x: x['continuations'][0]['timedContinuationData'], | |
98 | ] | |
99 | continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) | |
100 | if continuation_data: | |
101 | continuation_id = continuation_data.get('continuation') | |
102 | click_tracking_params = continuation_data.get('clickTrackingParams') | |
103 | timeout_ms = int_or_none(continuation_data.get('timeoutMs')) | |
104 | if timeout_ms is not None: | |
105 | time.sleep(timeout_ms / 1000) | |
106 | self._append_fragment(ctx, processed_fragment) | |
107 | return continuation_id, live_offset, click_tracking_params | |
108 | ||
109 | def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): | |
110 | for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index): | |
111 | try: | |
112 | success = dl_fragment(url, request_data, headers) | |
113 | if not success: | |
114 | return False, None, None, None | |
115 | raw_fragment = self._read_fragment(ctx) | |
116 | try: | |
117 | data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) | |
118 | except RegexNotFoundError: | |
119 | data = None | |
120 | if not data: | |
121 | data = json.loads(raw_fragment) | |
122 | live_chat_continuation = try_get( | |
123 | data, | |
124 | lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} | |
125 | ||
126 | func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live | |
127 | or frag_index == 1 and try_refresh_replay_beginning | |
128 | or parse_actions_replay) | |
129 | return (True, *func(live_chat_continuation)) | |
130 | except HTTPError as err: | |
131 | retry.error = err | |
132 | continue | |
133 | return False, None, None, None | |
134 | ||
135 | self._prepare_and_start_frag_download(ctx, info_dict) | |
136 | ||
137 | success = dl_fragment(info_dict['url']) | |
138 | if not success: | |
139 | return False | |
140 | raw_fragment = self._read_fragment(ctx) | |
141 | try: | |
142 | data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) | |
143 | except RegexNotFoundError: | |
144 | return False | |
145 | continuation_id = try_get( | |
146 | data, | |
147 | lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) | |
148 | # no data yet but required to call _append_fragment | |
149 | self._append_fragment(ctx, b'') | |
150 | ||
151 | ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) | |
152 | ||
153 | if not ytcfg: | |
154 | return False | |
155 | api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) | |
156 | innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) | |
157 | if not api_key or not innertube_context: | |
158 | return False | |
159 | visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) | |
160 | if info_dict['protocol'] == 'youtube_live_chat_replay': | |
161 | url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key | |
162 | chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id | |
163 | elif info_dict['protocol'] == 'youtube_live_chat': | |
164 | url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key | |
165 | chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id | |
166 | ||
167 | frag_index = offset = 0 | |
168 | click_tracking_params = None | |
169 | while continuation_id is not None: | |
170 | frag_index += 1 | |
171 | request_data = { | |
172 | 'context': innertube_context, | |
173 | 'continuation': continuation_id, | |
174 | } | |
175 | if frag_index > 1: | |
176 | request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} | |
177 | if click_tracking_params: | |
178 | request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} | |
179 | headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) | |
180 | headers.update({'content-type': 'application/json'}) | |
181 | fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' | |
182 | success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( | |
183 | url, frag_index, fragment_request_data, headers) | |
184 | else: | |
185 | success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( | |
186 | chat_page_url, frag_index) | |
187 | if not success: | |
188 | return False | |
189 | if test: | |
190 | break | |
191 | ||
192 | return self._finish_frag_download(ctx, info_dict) | |
193 | ||
194 | @staticmethod | |
195 | def parse_live_timestamp(action): | |
196 | action_content = dict_get( | |
197 | action, | |
198 | ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) | |
199 | if not isinstance(action_content, dict): | |
200 | return None | |
201 | item = dict_get(action_content, ['item', 'bannerRenderer']) | |
202 | if not isinstance(item, dict): | |
203 | return None | |
204 | renderer = dict_get(item, [ | |
205 | # text | |
206 | 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', | |
207 | 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', | |
208 | # ticker | |
209 | 'liveChatTickerPaidMessageItemRenderer', | |
210 | 'liveChatTickerSponsorItemRenderer', | |
211 | # banner | |
212 | 'liveChatBannerRenderer', | |
213 | ]) | |
214 | if not isinstance(renderer, dict): | |
215 | return None | |
216 | parent_item_getters = [ | |
217 | lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], | |
218 | lambda x: x['contents'], | |
219 | ] | |
220 | parent_item = try_get(renderer, parent_item_getters, dict) | |
221 | if parent_item: | |
222 | renderer = dict_get(parent_item, [ | |
223 | 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', | |
224 | 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', | |
225 | ]) | |
226 | if not isinstance(renderer, dict): | |
227 | return None | |
228 | return int_or_none(renderer.get('timestampUsec'), 1000) |