determine_ext,
error_to_compat_str,
float_or_none,
+ format_field,
get_element_by_id,
get_first,
int_or_none,
)\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/|
- groups/[^/]+/permalink/|
+ groups/[^/]+/(?:permalink|posts)/|
watchparty/
)|
facebook:
'uploader_id': '100013949973717',
},
'skip': 'Requires logging in',
+ }, {
+ # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
+ 'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
+ 'info_dict': {
+ 'id': '1569199726448814',
+ 'ext': 'mp4',
+ 'title': 'Pence MUST GO!',
+ 'description': 'Vickie Gentry shared a memory.',
+ 'timestamp': 1511548260,
+ 'upload_date': '20171124',
+ 'uploader': 'Vickie Gentry',
+ 'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 148.435,
+ },
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
post = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
+
+ automatic_captions, subtitles = {}, {}
+ subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
+ k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
+ is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
+ captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
+ if url_or_none(captions): # if subs_data only had a 'captions_url'
+ locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
+ subtitles[locale] = [{'url': captions}]
+ # or else subs_data had 'video_available_captions_locales', a list of dicts
+ for caption in traverse_obj(captions, (
+ {lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
+ ):
+ lang = caption.get('localized_language') or ''
+ subs = {
+ 'url': caption['captions_url'],
+ 'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
+ }
+ if caption.get('localized_creation_method') or is_video_broadcast:
+ automatic_captions.setdefault(caption['locale'], []).append(subs)
+ else:
+ subtitles.setdefault(caption['locale'], []).append(subs)
+
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text'))
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
+ 'automatic_captions': automatic_captions,
+ 'subtitles': subtitles,
}
info_json_ld = self._search_json_ld(webpage, video_id, default={})
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
- ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
+ ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
+ 'attachment', {dict}))
for attachment in attachments:
- ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
+ ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
+ ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns:
parse_attachment(n)
parse_attachment(attachment)
if len(entries) > 1:
return self.playlist_result(entries, video_id)
- video_info = entries[0]
+ video_info = entries[0] if entries else {'id': video_id}
webpage_info = extract_metadata(webpage)
# honor precise duration in video info
if video_info.get('duration'):