import json
from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import try_get
+from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
class YoutubeLiveChatReplayFD(FragmentFD):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+ fragment_retries = self.params.get('fragment_retries', 0)
test = self.params.get('test', False)
ctx = {
return self._download_fragment(ctx, url, info_dict, headers)
def parse_yt_initial_data(data):
- window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
- var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
- for patt in window_patt, var_patt:
+ patterns = (
+ r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
+ r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
+ data = data.decode('utf-8', 'replace')
+ for patt in patterns:
try:
raw_json = re.search(patt, data).group(1)
return json.loads(raw_json)
except AttributeError:
continue
+ def download_and_parse_fragment(url, frag_index):
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False, None, None
+ data = parse_yt_initial_data(raw_fragment) or json.loads(raw_fragment)['response']
+
+ live_chat_continuation = try_get(
+ data,
+ lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
+ offset = continuation_id = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ if offset is not None:
+ continuation_id = try_get(
+ live_chat_continuation,
+ lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
+ self._append_fragment(ctx, processed_fragment)
+
+ return True, continuation_id, offset
+ except compat_urllib_error.HTTPError as err:
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False, None, None
+
self._prepare_and_start_frag_download(ctx)
success, raw_fragment = dl_fragment(
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
- continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ continuation_id = try_get(
+ data,
+ lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')
- first = True
- offset = None
+ frag_index = offset = 0
while continuation_id is not None:
- data = None
- if first:
- url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
- success, raw_fragment = dl_fragment(url)
- if not success:
- return False
- data = parse_yt_initial_data(raw_fragment)
- else:
- url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
- + '?continuation={}'.format(continuation_id)
- + '&playerOffsetMs={}'.format(max(offset - 5000, 0))
- + '&hidden=false'
- + '&pbj=1')
- success, raw_fragment = dl_fragment(url)
- if not success:
- return False
- data = json.loads(raw_fragment)['response']
-
- first = False
- continuation_id = None
-
- live_chat_continuation = data['continuationContents']['liveChatContinuation']
- offset = None
- processed_fragment = bytearray()
- if 'actions' in live_chat_continuation:
- for action in live_chat_continuation['actions']:
- if 'replayChatItemAction' in action:
- replay_chat_item_action = action['replayChatItemAction']
- offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
- processed_fragment.extend(
- json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
- try:
- continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
- except KeyError:
- continuation_id = None
-
- self._append_fragment(ctx, processed_fragment)
-
- if test or offset is None:
+ frag_index += 1
+ url = 'https://www.youtube.com/live_chat_replay?continuation=%s' % continuation_id
+ if frag_index > 1:
+ url += '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0)
+ success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
+ if not success:
+ return False
+ if test:
break
self._finish_frag_download(ctx)
-
return True
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
- def _get_yt_initial_data(self, video_id, webpage):
- config = self._search_regex(
- (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
- r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
- webpage, 'ytInitialData', default=None)
- if config:
- return self._parse_json(
- uppercase_escape(config), video_id, fatal=False)
-
def _real_initialize(self):
if self._downloader is None:
return
has_live_chat_replay = False
if not is_live:
- yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
+ yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
try:
yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
has_live_chat_replay = True