[yt-dlp.git] / youtube_dl / extractor / ted.py

from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor

from ..compat import compat_str
from ..utils import (
    int_or_none,
    try_get,
)


class TEDIE(InfoExtractor):
    IE_NAME = 'ted'
    _VALID_URL = r'''(?x)
        (?P<proto>https?://)
        (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
        (
            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
            |
            ((?P<type_talk>talks)) # We have a simple talk
            |
            (?P<type_watch>watch)/[^/]+/[^/]+
        )
        (/lang/(.*?))? # The url may contain the language
        /(?P<name>[\w-]+) # Here goes the name and then ".html"
        .*)$
        '''
    _TESTS = [{
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
        'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
        'info_dict': {
            'id': '102',
            'ext': 'mp4',
            'title': 'The illusion of consciousness',
            'description': ('Philosopher Dan Dennett makes a compelling '
                            'argument that not only don\'t we understand our own '
                            'consciousness, but that half the time our brains are '
                            'actively fooling us.'),
            'uploader': 'Dan Dennett',
            'width': 853,
            'duration': 1308,
        }
    }, {
        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
        'md5': 'b899ac15e345fb39534d913f7606082b',
        'info_dict': {
            'id': 'tSVI8ta_P4w',
            'ext': 'mp4',
            'title': 'Vishal Sikka: The beauty and power of algorithms',
            'thumbnail': r're:^https?://.+\.jpg',
            'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
            'upload_date': '20140122',
            'uploader_id': 'TEDInstitute',
            'uploader': 'TED Institute',
        },
        'add_ie': ['Youtube'],
    }, {
        'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
        'md5': '71b3ab2f4233012dce09d515c9c39ce2',
        'info_dict': {
            'id': '1972',
            'ext': 'mp4',
            'title': 'Be passionate. Be courageous. Be your best.',
            'uploader': 'Gabby Giffords and Mark Kelly',
            'description': 'md5:5174aed4d0f16021b704120360f72b92',
            'duration': 1128,
        },
    }, {
        'url': 'http://www.ted.com/playlists/who_are_the_hackers',
        'info_dict': {
            'id': '10',
            'title': 'Who are the hackers?',
        },
        'playlist_mincount': 6,
    }, {
        # contains a youtube video
        'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
        'add_ie': ['Youtube'],
        'info_dict': {
            'id': '_ZG8HBuDjgc',
            'ext': 'webm',
            'title': 'Douglas Adams: Parrots the Universe and Everything',
            'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
            'uploader': 'University of California Television (UCTV)',
            'uploader_id': 'UCtelevision',
            'upload_date': '20080522',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # YouTube video
        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
        'add_ie': ['Youtube'],
        'info_dict': {
            'id': 'aFBIPO-P7LM',
            'ext': 'mp4',
            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
            'uploader': 'TEDx Talks',
            'uploader_id': 'TEDxTalks',
            'upload_date': '20111216',
        },
        'params': {
            'skip_download': True,
        },
    }]

    _NATIVE_FORMATS = {
        'low': {'width': 320, 'height': 180},
        'medium': {'width': 512, 'height': 288},
        'high': {'width': 854, 'height': 480},
    }

    def _extract_info(self, webpage):
        info_json = self._search_regex(
            r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
            webpage, 'info json')
        return json.loads(info_json)

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url, re.VERBOSE)
        if m.group('type').startswith('embed'):
            desktop_url = m.group('proto') + 'www' + m.group('urlmain')
            return self.url_result(desktop_url, 'TED')
        name = m.group('name')
        if m.group('type_talk'):
            return self._talk_info(url, name)
        elif m.group('type_watch'):
            return self._watch_info(url, name)
        else:
            return self._playlist_videos_info(url, name)

    def _playlist_videos_info(self, url, name):
        '''Returns the videos of the playlist'''

        webpage = self._download_webpage(url, name,
                                         'Downloading playlist webpage')
        info = self._extract_info(webpage)

        playlist_info = try_get(
            info, lambda x: x['__INITIAL_DATA__']['playlist'],
            dict) or info['playlist']

        playlist_entries = [
            self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
            for talk in try_get(
                info, lambda x: x['__INITIAL_DATA__']['talks'],
                dict) or info['talks']
        ]
        return self.playlist_result(
            playlist_entries,
            playlist_id=compat_str(playlist_info['id']),
            playlist_title=playlist_info['title'])

    def _talk_info(self, url, video_name):
        webpage = self._download_webpage(url, video_name)

        info = self._extract_info(webpage)

        talk_info = try_get(
            info, lambda x: x['__INITIAL_DATA__']['talks'][0],
            dict) or info['talks'][0]

        title = talk_info['title'].strip()

        external = talk_info.get('external')
        if external:
            service = external['service']
            self.to_screen('Found video from %s' % service)
            ext_url = None
            if service.lower() == 'youtube':
                ext_url = external.get('code')
            return {
                '_type': 'url',
                'url': ext_url or external['uri'],
            }

        native_downloads = try_get(
            talk_info, lambda x: x['downloads']['nativeDownloads'],
            dict) or talk_info['nativeDownloads']

        formats = [{
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
        } for (format_id, format_url) in native_downloads.items() if format_url is not None]
        if formats:
            for f in formats:
                finfo = self._NATIVE_FORMATS.get(f['format_id'])
                if finfo:
                    f.update(finfo)

        player_talk = talk_info['player_talks'][0]

        resources_ = player_talk.get('resources') or talk_info.get('resources')

        http_url = None
        for format_id, resources in resources_.items():
            if format_id == 'h264':
                for resource in resources:
                    h264_url = resource.get('file')
                    if not h264_url:
                        continue
                    bitrate = int_or_none(resource.get('bitrate'))
                    formats.append({
                        'url': h264_url,
                        'format_id': '%s-%sk' % (format_id, bitrate),
                        'tbr': bitrate,
                    })
                    if re.search(r'\d+k', h264_url):
                        http_url = h264_url
            elif format_id == 'rtmp':
                streamer = talk_info.get('streamer')
                if not streamer:
                    continue
                for resource in resources:
                    formats.append({
                        'format_id': '%s-%s' % (format_id, resource.get('name')),
                        'url': streamer,
                        'play_path': resource['file'],
                        'ext': 'flv',
                        'width': int_or_none(resource.get('width')),
                        'height': int_or_none(resource.get('height')),
                        'tbr': int_or_none(resource.get('bitrate')),
                    })
            elif format_id == 'hls':
                formats.extend(self._extract_m3u8_formats(
                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))

        m3u8_formats = list(filter(
            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
            formats))
        if http_url:
            for m3u8_format in m3u8_formats:
                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                if not bitrate:
                    continue
                f = m3u8_format.copy()
                f.update({
                    'url': re.sub(r'\d+k', bitrate, http_url),
                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                    'protocol': 'http',
                })
                formats.append(f)

        audio_download = talk_info.get('audioDownload')
        if audio_download:
            formats.append({
                'url': audio_download,
                'format_id': 'audio',
                'vcodec': 'none',
            })

        self._sort_formats(formats)

        video_id = compat_str(talk_info['id'])

        return {
            'id': video_id,
            'title': title,
            'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
            'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
            'description': self._og_search_description(webpage),
            'subtitles': self._get_subtitles(video_id, talk_info),
            'formats': formats,
            'duration': talk_info.get('duration'),
        }

    def _get_subtitles(self, video_id, talk_info):
        languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
        if languages:
            sub_lang_list = {}
            for l in languages:
                sub_lang_list[l] = [
                    {
                        'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
                        'ext': ext,
                    }
                    for ext in ['ted', 'srt']
                ]
            return sub_lang_list
        else:
            return {}

    def _watch_info(self, url, name):
        webpage = self._download_webpage(url, name)

        config_json = self._html_search_regex(
            r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
            webpage, 'config', default=None)
        if not config_json:
            embed_url = self._search_regex(
                r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
            return self.url_result(self._proto_relative_url(embed_url))
        config = json.loads(config_json)['config']
        video_url = config['video']['url']
        thumbnail = config.get('image', {}).get('url')

        title = self._html_search_regex(
            r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
        description = self._html_search_regex(
            [
                r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
                r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
            ],
            webpage, 'description', fatal=False)

        return {
            'id': name,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
            'description': description,
        }
Commit	Line	Data
f853f859 PH	1	from __future__ import unicode_literals
f853f859 PH	2
9fd5ce0c PH	3	import json
	4	import re
	5
a504ced0	6	from .common import InfoExtractor
9fd5ce0c	7
66ee7b32	8	from ..compat import compat_str
49174788 S	9	from ..utils import (
	10	int_or_none,
	11	try_get,
	12	)
4ed3e510	13
f853f859	14
a504ced0	15	class TEDIE(InfoExtractor):
cfbee8a4	16	IE_NAME = 'ted'
aab74fa1 PH	17	_VALID_URL = r'''(?x)
aab74fa1 PH	18	(?P<proto>https?://)
cd791a5e	19	(?P<type>www\|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
bacac173 JMF	20	(
	21	(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
	22	\|
	23	((?P<type_talk>talks)) # We have a simple talk
ac6c1048 PH	24	\|
ac6c1048 PH	25	(?P<type_watch>watch)/[^/]+/[^/]+
bacac173 JMF	26	)
bacac173 JMF	27	(/lang/(.*?))? # The url may contain the language
ac6c1048	28	/(?P<name>[\w-]+) # Here goes the name and then ".html"
aab74fa1	29	.*)$
bacac173	30	'''
ac6c1048	31	_TESTS = [{
f853f859	32	'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
f628d800	33	'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
f853f859	34	'info_dict': {
7b9965ea JMF	35	'id': '102',
7b9965ea JMF	36	'ext': 'mp4',
652bee05	37	'title': 'The illusion of consciousness',
bacac173	38	'description': ('Philosopher Dan Dennett makes a compelling '
9e1a5b84 JW	39	'argument that not only don\'t we understand our own '
	40	'consciousness, but that half the time our brains are '
	41	'actively fooling us.'),
652bee05	42	'uploader': 'Dan Dennett',
f628d800	43	'width': 853,
eb4cb42a	44	'duration': 1308,
6f5ac90c	45	}
ac6c1048 PH	46	}, {
ac6c1048 PH	47	'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
f628d800	48	'md5': 'b899ac15e345fb39534d913f7606082b',
ac6c1048	49	'info_dict': {
f628d800	50	'id': 'tSVI8ta_P4w',
ac6c1048 PH	51	'ext': 'mp4',
ac6c1048 PH	52	'title': 'Vishal Sikka: The beauty and power of algorithms',
ec85ded8	53	'thumbnail': r're:^https?://.+\.jpg',
f628d800	54	'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
	55	'upload_date': '20140122',
	56	'uploader_id': 'TEDInstitute',
	57	'uploader': 'TED Institute',
	58	},
	59	'add_ie': ['Youtube'],
2d4c98db JMF	60	}, {
2d4c98db JMF	61	'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
f628d800	62	'md5': '71b3ab2f4233012dce09d515c9c39ce2',
2d4c98db JMF	63	'info_dict': {
2d4c98db JMF	64	'id': '1972',
5bec5748	65	'ext': 'mp4',
2d4c98db JMF	66	'title': 'Be passionate. Be courageous. Be your best.',
2d4c98db JMF	67	'uploader': 'Gabby Giffords and Mark Kelly',
5bec5748	68	'description': 'md5:5174aed4d0f16021b704120360f72b92',
eb4cb42a	69	'duration': 1128,
2d4c98db	70	},
22a6f150 PH	71	}, {
	72	'url': 'http://www.ted.com/playlists/who_are_the_hackers',
	73	'info_dict': {
	74	'id': '10',
	75	'title': 'Who are the hackers?',
	76	},
	77	'playlist_mincount': 6,
a72cbfac JMF	78	}, {
	79	# contains a youtube video
	80	'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
	81	'add_ie': ['Youtube'],
	82	'info_dict': {
	83	'id': '_ZG8HBuDjgc',
f22ba4bd	84	'ext': 'webm',
a72cbfac JMF	85	'title': 'Douglas Adams: Parrots the Universe and Everything',
	86	'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
	87	'uploader': 'University of California Television (UCTV)',
	88	'uploader_id': 'UCtelevision',
	89	'upload_date': '20080522',
	90	},
	91	'params': {
	92	'skip_download': True,
	93	},
a461a119 S	94	}, {
	95	# YouTube video
	96	'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
	97	'add_ie': ['Youtube'],
	98	'info_dict': {
	99	'id': 'aFBIPO-P7LM',
	100	'ext': 'mp4',
	101	'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
	102	'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
	103	'uploader': 'TEDx Talks',
	104	'uploader_id': 'TEDxTalks',
	105	'upload_date': '20111216',
	106	},
	107	'params': {
	108	'skip_download': True,
	109	},
ac6c1048	110	}]
9fd5ce0c	111
0ba77818	112	_NATIVE_FORMATS = {
11fa3d7f	113	'low': {'width': 320, 'height': 180},
	114	'medium': {'width': 512, 'height': 288},
	115	'high': {'width': 854, 'height': 480},
652bee05	116	}
9fd5ce0c	117
ca1fee34	118	def _extract_info(self, webpage):
49174788 S	119	info_json = self._search_regex(
	120	r'(?s)q\(\s"\w+.init"\s,\s({.+})\)\s</script>',
	121	webpage, 'info json')
ca1fee34 JMF	122	return json.loads(info_json)
ca1fee34 JMF	123
9fd5ce0c	124	def _real_extract(self, url):
bacac173	125	m = re.match(self._VALID_URL, url, re.VERBOSE)
cd791a5e	126	if m.group('type').startswith('embed'):
aab74fa1 PH	127	desktop_url = m.group('proto') + 'www' + m.group('urlmain')
aab74fa1 PH	128	return self.url_result(desktop_url, 'TED')
bacac173	129	name = m.group('name')
9fd5ce0c	130	if m.group('type_talk'):
bacac173	131	return self._talk_info(url, name)
ac6c1048 PH	132	elif m.group('type_watch'):
ac6c1048 PH	133	return self._watch_info(url, name)
bacac173	134	else:
ca1fee34	135	return self._playlist_videos_info(url, name)
9fd5ce0c	136
ca1fee34	137	def _playlist_videos_info(self, url, name):
9fd5ce0c	138	'''Returns the videos of the playlist'''
fc2ef392	139
ca1fee34	140	webpage = self._download_webpage(url, name,
9e1a5b84	141	'Downloading playlist webpage')
ca1fee34	142	info = self._extract_info(webpage)
49174788 S	143
	144	playlist_info = try_get(
	145	info, lambda x: x['__INITIAL_DATA__']['playlist'],
	146	dict) or info['playlist']
9fd5ce0c	147
fc2ef392	148	playlist_entries = [
f07a9f6f	149	self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
49174788 S	150	for talk in try_get(
	151	info, lambda x: x['__INITIAL_DATA__']['talks'],
	152	dict) or info['talks']
fc2ef392 PH	153	]
fc2ef392 PH	154	return self.playlist_result(
ca1fee34 JMF	155	playlist_entries,
	156	playlist_id=compat_str(playlist_info['id']),
	157	playlist_title=playlist_info['title'])
9fd5ce0c	158
bacac173 JMF	159	def _talk_info(self, url, video_name):
bacac173 JMF	160	webpage = self._download_webpage(url, video_name)
a9a3876d	161
49174788 S	162	info = self._extract_info(webpage)
	163
	164	talk_info = try_get(
	165	info, lambda x: x['__INITIAL_DATA__']['talks'][0],
	166	dict) or info['talks'][0]
	167
	168	title = talk_info['title'].strip()
a9a3876d	169
a461a119 S	170	external = talk_info.get('external')
	171	if external:
	172	service = external['service']
	173	self.to_screen('Found video from %s' % service)
	174	ext_url = None
	175	if service.lower() == 'youtube':
	176	ext_url = external.get('code')
a72cbfac JMF	177	return {
a72cbfac JMF	178	'_type': 'url',
a461a119	179	'url': ext_url or external['uri'],
a72cbfac JMF	180	}
a72cbfac JMF	181
49174788 S	182	native_downloads = try_get(
	183	talk_info, lambda x: x['downloads']['nativeDownloads'],
	184	dict) or talk_info['nativeDownloads']
	185
652bee05	186	formats = [{
652bee05 JMF	187	'url': format_url,
	188	'format_id': format_id,
	189	'format': format_id,
49174788	190	} for (format_id, format_url) in native_downloads.items() if format_url is not None]
2d4c98db JMF	191	if formats:
	192	for f in formats:
	193	finfo = self._NATIVE_FORMATS.get(f['format_id'])
	194	if finfo:
	195	f.update(finfo)
66ee7b32	196
49174788 S	197	player_talk = talk_info['player_talks'][0]
	198
	199	resources_ = player_talk.get('resources') or talk_info.get('resources')
	200
11fa3d7f	201	http_url = None
49174788	202	for format_id, resources in resources_.items():
66ee7b32 S	203	if format_id == 'h264':
66ee7b32 S	204	for resource in resources:
11fa3d7f	205	h264_url = resource.get('file')
	206	if not h264_url:
	207	continue
66ee7b32 S	208	bitrate = int_or_none(resource.get('bitrate'))
66ee7b32 S	209	formats.append({
11fa3d7f	210	'url': h264_url,
66ee7b32 S	211	'format_id': '%s-%sk' % (format_id, bitrate),
	212	'tbr': bitrate,
	213	})
ec85ded8	214	if re.search(r'\d+k', h264_url):
11fa3d7f	215	http_url = h264_url
66ee7b32 S	216	elif format_id == 'rtmp':
	217	streamer = talk_info.get('streamer')
	218	if not streamer:
	219	continue
	220	for resource in resources:
	221	formats.append({
	222	'format_id': '%s-%s' % (format_id, resource.get('name')),
	223	'url': streamer,
	224	'play_path': resource['file'],
	225	'ext': 'flv',
	226	'width': int_or_none(resource.get('width')),
	227	'height': int_or_none(resource.get('height')),
	228	'tbr': int_or_none(resource.get('bitrate')),
	229	})
	230	elif format_id == 'hls':
11fa3d7f	231	formats.extend(self._extract_m3u8_formats(
	232	resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
	233
	234	m3u8_formats = list(filter(
ff99fe52	235	lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
11fa3d7f	236	formats))
	237	if http_url:
	238	for m3u8_format in m3u8_formats:
	239	bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
	240	if not bitrate:
	241	continue
	242	f = m3u8_format.copy()
	243	f.update({
	244	'url': re.sub(r'\d+k', bitrate, http_url),
	245	'format_id': m3u8_format['format_id'].replace('hls', 'http'),
	246	'protocol': 'http',
	247	})
	248	formats.append(f)
66ee7b32 S	249
	250	audio_download = talk_info.get('audioDownload')
	251	if audio_download:
	252	formats.append({
	253	'url': audio_download,
	254	'format_id': 'audio',
736785ab	255	'vcodec': 'none',
66ee7b32 S	256	})
66ee7b32 S	257
f628d800	258	self._sort_formats(formats)
652bee05	259
7b9965ea	260	video_id = compat_str(talk_info['id'])
a9a3876d	261
463a9087	262	return {
a9a3876d	263	'id': video_id,
49174788 S	264	'title': title,
	265	'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
	266	'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
652bee05	267	'description': self._og_search_description(webpage),
03091e37	268	'subtitles': self._get_subtitles(video_id, talk_info),
0d8cb1cc	269	'formats': formats,
eb4cb42a	270	'duration': talk_info.get('duration'),
0d8cb1cc PH	271	}
0d8cb1cc PH	272
a504ced0	273	def _get_subtitles(self, video_id, talk_info):
652bee05 JMF	274	languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
	275	if languages:
	276	sub_lang_list = {}
	277	for l in languages:
a504ced0 JMF	278	sub_lang_list[l] = [
	279	{
	280	'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
	281	'ext': ext,
	282	}
	283	for ext in ['ted', 'srt']
	284	]
652bee05 JMF	285	return sub_lang_list
652bee05 JMF	286	else:
652bee05	287	return {}
ac6c1048 PH	288
	289	def _watch_info(self, url, name):
	290	webpage = self._download_webpage(url, name)
	291
	292	config_json = self._html_search_regex(
de9bd74b	293	r'"pages\.jwplayer"\s,\s({.+?})\s\)\s</script>',
f628d800	294	webpage, 'config', default=None)
	295	if not config_json:
	296	embed_url = self._search_regex(
	297	r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
	298	return self.url_result(self._proto_relative_url(embed_url))
de9bd74b	299	config = json.loads(config_json)['config']
ac6c1048 PH	300	video_url = config['video']['url']
	301	thumbnail = config.get('image', {}).get('url')
	302
	303	title = self._html_search_regex(
	304	r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
	305	description = self._html_search_regex(
621f33c9 PH	306	[
	307	r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.?</h4>(.?)</div>',
	308	r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
	309	],
ac6c1048 PH	310	webpage, 'description', fatal=False)
	311
	312	return {
	313	'id': name,
	314	'url': video_url,
	315	'title': title,
	316	'thumbnail': thumbnail,
	317	'description': description,
	318	}