1 from __future__
import division
, unicode_literals
6 from .fragment
import FragmentFD
7 from ..compat
import compat_urllib_error
14 from ..extractor
.youtube
import YoutubeBaseInfoExtractor
as YT_BaseIE
17 class YoutubeLiveChatFD(FragmentFD
):
18 """ Downloads YouTube live chats fragment by fragment """
20 FD_NAME
= 'youtube_live_chat'
22 def real_download(self
, filename
, info_dict
):
23 video_id
= info_dict
['video_id']
24 self
.to_screen('[%s] Downloading live chat' % self
.FD_NAME
)
26 fragment_retries
= self
.params
.get('fragment_retries', 0)
27 test
= self
.params
.get('test', False)
35 ie
= YT_BaseIE(self
.ydl
)
37 start_time
= int(time
.time() * 1000)
39 def dl_fragment(url
, data
=None, headers
=None):
40 http_headers
= info_dict
.get('http_headers', {})
42 http_headers
= http_headers
.copy()
43 http_headers
.update(headers
)
44 return self
._download
_fragment
(ctx
, url
, info_dict
, http_headers
, data
)
46 def parse_actions_replay(live_chat_continuation
):
47 offset
= continuation_id
= click_tracking_params
= None
48 processed_fragment
= bytearray()
49 for action
in live_chat_continuation
.get('actions', []):
50 if 'replayChatItemAction' in action
:
51 replay_chat_item_action
= action
['replayChatItemAction']
52 offset
= int(replay_chat_item_action
['videoOffsetTimeMsec'])
53 processed_fragment
.extend(
54 json
.dumps(action
, ensure_ascii
=False).encode('utf-8') + b
'\n')
55 if offset
is not None:
56 continuation
= try_get(
57 live_chat_continuation
,
58 lambda x
: x
['continuations'][0]['liveChatReplayContinuationData'], dict)
60 continuation_id
= continuation
.get('continuation')
61 click_tracking_params
= continuation
.get('clickTrackingParams')
62 self
._append
_fragment
(ctx
, processed_fragment
)
63 return continuation_id
, offset
, click_tracking_params
65 def try_refresh_replay_beginning(live_chat_continuation
):
66 # choose the second option that contains the unfiltered live chat replay
67 refresh_continuation
= try_get(
68 live_chat_continuation
,
69 lambda x
: x
['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
70 if refresh_continuation
:
71 # no data yet but required to call _append_fragment
72 self
._append
_fragment
(ctx
, b
'')
73 refresh_continuation_id
= refresh_continuation
.get('continuation')
75 click_tracking_params
= refresh_continuation
.get('trackingParams')
76 return refresh_continuation_id
, offset
, click_tracking_params
77 return parse_actions_replay(live_chat_continuation
)
81 def parse_actions_live(live_chat_continuation
):
83 continuation_id
= click_tracking_params
= None
84 processed_fragment
= bytearray()
85 for action
in live_chat_continuation
.get('actions', []):
86 timestamp
= self
.parse_live_timestamp(action
)
87 if timestamp
is not None:
88 live_offset
= timestamp
- start_time
89 # compatibility with replay format
91 'replayChatItemAction': {'actions': [action]}
,
92 'videoOffsetTimeMsec': str(live_offset
),
95 processed_fragment
.extend(
96 json
.dumps(pseudo_action
, ensure_ascii
=False).encode('utf-8') + b
'\n')
97 continuation_data_getters
= [
98 lambda x
: x
['continuations'][0]['invalidationContinuationData'],
99 lambda x
: x
['continuations'][0]['timedContinuationData'],
101 continuation_data
= try_get(live_chat_continuation
, continuation_data_getters
, dict)
102 if continuation_data
:
103 continuation_id
= continuation_data
.get('continuation')
104 click_tracking_params
= continuation_data
.get('clickTrackingParams')
105 timeout_ms
= int_or_none(continuation_data
.get('timeoutMs'))
106 if timeout_ms
is not None:
107 time
.sleep(timeout_ms
/ 1000)
108 self
._append
_fragment
(ctx
, processed_fragment
)
109 return continuation_id
, live_offset
, click_tracking_params
111 def download_and_parse_fragment(url
, frag_index
, request_data
=None, headers
=None):
113 while count
<= fragment_retries
:
115 success
, raw_fragment
= dl_fragment(url
, request_data
, headers
)
117 return False, None, None, None
119 data
= ie
.extract_yt_initial_data(video_id
, raw_fragment
.decode('utf-8', 'replace'))
120 except RegexNotFoundError
:
123 data
= json
.loads(raw_fragment
)
124 live_chat_continuation
= try_get(
126 lambda x
: x
['continuationContents']['liveChatContinuation'], dict) or {}
127 if info_dict
['protocol'] == 'youtube_live_chat_replay':
129 continuation_id
, offset
, click_tracking_params
= try_refresh_replay_beginning(live_chat_continuation
)
131 continuation_id
, offset
, click_tracking_params
= parse_actions_replay(live_chat_continuation
)
132 elif info_dict
['protocol'] == 'youtube_live_chat':
133 continuation_id
, offset
, click_tracking_params
= parse_actions_live(live_chat_continuation
)
134 return True, continuation_id
, offset
, click_tracking_params
135 except compat_urllib_error
.HTTPError
as err
:
137 if count
<= fragment_retries
:
138 self
.report_retry_fragment(err
, frag_index
, count
, fragment_retries
)
139 if count
> fragment_retries
:
140 self
.report_error('giving up after %s fragment retries' % fragment_retries
)
141 return False, None, None, None
143 self
._prepare
_and
_start
_frag
_download
(ctx
, info_dict
)
145 success
, raw_fragment
= dl_fragment(info_dict
['url'])
149 data
= ie
.extract_yt_initial_data(video_id
, raw_fragment
.decode('utf-8', 'replace'))
150 except RegexNotFoundError
:
152 continuation_id
= try_get(
154 lambda x
: x
['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
155 # no data yet but required to call _append_fragment
156 self
._append
_fragment
(ctx
, b
'')
158 ytcfg
= ie
.extract_ytcfg(video_id
, raw_fragment
.decode('utf-8', 'replace'))
162 api_key
= try_get(ytcfg
, lambda x
: x
['INNERTUBE_API_KEY'])
163 innertube_context
= try_get(ytcfg
, lambda x
: x
['INNERTUBE_CONTEXT'])
164 if not api_key
or not innertube_context
:
166 visitor_data
= try_get(innertube_context
, lambda x
: x
['client']['visitorData'], str)
167 if info_dict
['protocol'] == 'youtube_live_chat_replay':
168 url
= 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
169 chat_page_url
= 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
170 elif info_dict
['protocol'] == 'youtube_live_chat':
171 url
= 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
172 chat_page_url
= 'https://www.youtube.com/live_chat?continuation=' + continuation_id
174 frag_index
= offset
= 0
175 click_tracking_params
= None
176 while continuation_id
is not None:
179 'context': innertube_context
,
180 'continuation': continuation_id
,
183 request_data
['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
184 if click_tracking_params
:
185 request_data
['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
186 headers
= ie
.generate_api_headers(ytcfg
=ytcfg
, visitor_data
=visitor_data
)
187 headers
.update({'content-type': 'application/json'}
)
188 fragment_request_data
= json
.dumps(request_data
, ensure_ascii
=False).encode('utf-8') + b
'\n'
189 success
, continuation_id
, offset
, click_tracking_params
= download_and_parse_fragment(
190 url
, frag_index
, fragment_request_data
, headers
)
192 success
, continuation_id
, offset
, click_tracking_params
= download_and_parse_fragment(
193 chat_page_url
, frag_index
)
199 self
._finish
_frag
_download
(ctx
, info_dict
)
203 def parse_live_timestamp(action
):
204 action_content
= dict_get(
206 ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
207 if not isinstance(action_content
, dict):
209 item
= dict_get(action_content
, ['item', 'bannerRenderer'])
210 if not isinstance(item
, dict):
212 renderer
= dict_get(item
, [
214 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
215 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
217 'liveChatTickerPaidMessageItemRenderer',
218 'liveChatTickerSponsorItemRenderer',
220 'liveChatBannerRenderer',
222 if not isinstance(renderer
, dict):
224 parent_item_getters
= [
225 lambda x
: x
['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
226 lambda x
: x
['contents'],
228 parent_item
= try_get(renderer
, parent_item_getters
, dict)
230 renderer
= dict_get(parent_item
, [
231 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
232 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
234 if not isinstance(renderer
, dict):
236 return int_or_none(renderer
.get('timestampUsec'), 1000)