5 from .fragment
import FragmentFD
15 class YoutubeLiveChatFD(FragmentFD
):
16 """ Downloads YouTube live chats fragment by fragment """
18 def real_download(self
, filename
, info_dict
):
19 video_id
= info_dict
['video_id']
20 self
.to_screen('[%s] Downloading live chat' % self
.FD_NAME
)
21 if not self
.params
.get('skip_download') and info_dict
['protocol'] == 'youtube_live_chat':
22 self
.report_warning('Live chat download runs until the livestream ends. '
23 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
25 test
= self
.params
.get('test', False)
33 from ..extractor
.youtube
import YoutubeBaseInfoExtractor
35 ie
= YoutubeBaseInfoExtractor(self
.ydl
)
37 start_time
= int(time
.time() * 1000)
39 def dl_fragment(url
, data
=None, headers
=None):
40 http_headers
= info_dict
.get('http_headers', {})
42 http_headers
= http_headers
.copy()
43 http_headers
.update(headers
)
44 return self
._download
_fragment
(ctx
, url
, info_dict
, http_headers
, data
)
46 def parse_actions_replay(live_chat_continuation
):
47 offset
= continuation_id
= click_tracking_params
= None
48 processed_fragment
= bytearray()
49 for action
in live_chat_continuation
.get('actions', []):
50 if 'replayChatItemAction' in action
:
51 replay_chat_item_action
= action
['replayChatItemAction']
52 offset
= int(replay_chat_item_action
['videoOffsetTimeMsec'])
53 processed_fragment
.extend(
54 json
.dumps(action
, ensure_ascii
=False).encode() + b
'\n')
55 if offset
is not None:
56 continuation
= try_get(
57 live_chat_continuation
,
58 lambda x
: x
['continuations'][0]['liveChatReplayContinuationData'], dict)
60 continuation_id
= continuation
.get('continuation')
61 click_tracking_params
= continuation
.get('clickTrackingParams')
62 self
._append
_fragment
(ctx
, processed_fragment
)
63 return continuation_id
, offset
, click_tracking_params
65 def try_refresh_replay_beginning(live_chat_continuation
):
66 # choose the second option that contains the unfiltered live chat replay
67 refresh_continuation
= try_get(
68 live_chat_continuation
,
69 lambda x
: x
['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
70 if refresh_continuation
:
71 # no data yet but required to call _append_fragment
72 self
._append
_fragment
(ctx
, b
'')
73 refresh_continuation_id
= refresh_continuation
.get('continuation')
75 click_tracking_params
= refresh_continuation
.get('trackingParams')
76 return refresh_continuation_id
, offset
, click_tracking_params
77 return parse_actions_replay(live_chat_continuation
)
81 def parse_actions_live(live_chat_continuation
):
83 continuation_id
= click_tracking_params
= None
84 processed_fragment
= bytearray()
85 for action
in live_chat_continuation
.get('actions', []):
86 timestamp
= self
.parse_live_timestamp(action
)
87 if timestamp
is not None:
88 live_offset
= timestamp
- start_time
89 # compatibility with replay format
91 'replayChatItemAction': {'actions': [action]}
,
92 'videoOffsetTimeMsec': str(live_offset
),
95 processed_fragment
.extend(
96 json
.dumps(pseudo_action
, ensure_ascii
=False).encode() + b
'\n')
97 continuation_data_getters
= [
98 lambda x
: x
['continuations'][0]['invalidationContinuationData'],
99 lambda x
: x
['continuations'][0]['timedContinuationData'],
101 continuation_data
= try_get(live_chat_continuation
, continuation_data_getters
, dict)
102 if continuation_data
:
103 continuation_id
= continuation_data
.get('continuation')
104 click_tracking_params
= continuation_data
.get('clickTrackingParams')
105 timeout_ms
= int_or_none(continuation_data
.get('timeoutMs'))
106 if timeout_ms
is not None:
107 time
.sleep(timeout_ms
/ 1000)
108 self
._append
_fragment
(ctx
, processed_fragment
)
109 return continuation_id
, live_offset
, click_tracking_params
111 def download_and_parse_fragment(url
, frag_index
, request_data
=None, headers
=None):
112 for retry
in RetryManager(self
.params
.get('fragment_retries'), self
.report_retry
, frag_index
=frag_index
):
114 success
= dl_fragment(url
, request_data
, headers
)
116 return False, None, None, None
117 raw_fragment
= self
._read
_fragment
(ctx
)
119 data
= ie
.extract_yt_initial_data(video_id
, raw_fragment
.decode('utf-8', 'replace'))
120 except RegexNotFoundError
:
123 data
= json
.loads(raw_fragment
)
124 live_chat_continuation
= try_get(
126 lambda x
: x
['continuationContents']['liveChatContinuation'], dict) or {}
128 func
= (info_dict
['protocol'] == 'youtube_live_chat' and parse_actions_live
129 or frag_index
== 1 and try_refresh_replay_beginning
130 or parse_actions_replay
)
131 return (True, *func(live_chat_continuation
))
132 except urllib
.error
.HTTPError
as err
:
135 return False, None, None, None
137 self
._prepare
_and
_start
_frag
_download
(ctx
, info_dict
)
139 success
= dl_fragment(info_dict
['url'])
142 raw_fragment
= self
._read
_fragment
(ctx
)
144 data
= ie
.extract_yt_initial_data(video_id
, raw_fragment
.decode('utf-8', 'replace'))
145 except RegexNotFoundError
:
147 continuation_id
= try_get(
149 lambda x
: x
['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
150 # no data yet but required to call _append_fragment
151 self
._append
_fragment
(ctx
, b
'')
153 ytcfg
= ie
.extract_ytcfg(video_id
, raw_fragment
.decode('utf-8', 'replace'))
157 api_key
= try_get(ytcfg
, lambda x
: x
['INNERTUBE_API_KEY'])
158 innertube_context
= try_get(ytcfg
, lambda x
: x
['INNERTUBE_CONTEXT'])
159 if not api_key
or not innertube_context
:
161 visitor_data
= try_get(innertube_context
, lambda x
: x
['client']['visitorData'], str)
162 if info_dict
['protocol'] == 'youtube_live_chat_replay':
163 url
= 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
164 chat_page_url
= 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
165 elif info_dict
['protocol'] == 'youtube_live_chat':
166 url
= 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
167 chat_page_url
= 'https://www.youtube.com/live_chat?continuation=' + continuation_id
169 frag_index
= offset
= 0
170 click_tracking_params
= None
171 while continuation_id
is not None:
174 'context': innertube_context
,
175 'continuation': continuation_id
,
178 request_data
['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
179 if click_tracking_params
:
180 request_data
['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
181 headers
= ie
.generate_api_headers(ytcfg
=ytcfg
, visitor_data
=visitor_data
)
182 headers
.update({'content-type': 'application/json'}
)
183 fragment_request_data
= json
.dumps(request_data
, ensure_ascii
=False).encode() + b
'\n'
184 success
, continuation_id
, offset
, click_tracking_params
= download_and_parse_fragment(
185 url
, frag_index
, fragment_request_data
, headers
)
187 success
, continuation_id
, offset
, click_tracking_params
= download_and_parse_fragment(
188 chat_page_url
, frag_index
)
194 self
._finish
_frag
_download
(ctx
, info_dict
)
198 def parse_live_timestamp(action
):
199 action_content
= dict_get(
201 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
202 if not isinstance(action_content
, dict):
204 item
= dict_get(action_content
, ['item', 'bannerRenderer'])
205 if not isinstance(item
, dict):
207 renderer
= dict_get(item
, [
209 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
210 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
212 'liveChatTickerPaidMessageItemRenderer',
213 'liveChatTickerSponsorItemRenderer',
215 'liveChatBannerRenderer',
217 if not isinstance(renderer
, dict):
219 parent_item_getters
= [
220 lambda x
: x
['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
221 lambda x
: x
['contents'],
223 parent_item
= try_get(renderer
, parent_item_getters
, dict)
225 renderer
= dict_get(parent_item
, [
226 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
227 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
229 if not isinstance(renderer
, dict):
231 return int_or_none(renderer
.get('timestampUsec'), 1000)