]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/digg.py
[extractor/youtube] Ignore incomplete data for comment threads by default (#7475)
[yt-dlp.git] / yt_dlp / extractor / digg.py
1 from .common import InfoExtractor
2 from ..utils import js_to_json
3
4
5 class DiggIE(InfoExtractor):
6 _VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
7 _TESTS = [{
8 # JWPlatform via provider
9 'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
10 'info_dict': {
11 'id': 'LcqvmS0b',
12 'ext': 'mp4',
13 'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
14 'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
15 'upload_date': '20180109',
16 'timestamp': 1515530551,
17 },
18 'params': {
19 'skip_download': True,
20 },
21 }, {
22 # Youtube via provider
23 'url': 'http://digg.com/video/dog-boat-seal-play',
24 'only_matching': True,
25 }, {
26 # vimeo as regular embed
27 'url': 'http://digg.com/video/dream-girl-short-film',
28 'only_matching': True,
29 }]
30
31 def _real_extract(self, url):
32 display_id = self._match_id(url)
33
34 webpage = self._download_webpage(url, display_id)
35
36 info = self._parse_json(
37 self._search_regex(
38 r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
39 default='{}'), display_id, transform_source=js_to_json,
40 fatal=False)
41
42 video_id = info.get('video_id')
43
44 if video_id:
45 provider = info.get('provider_name')
46 if provider == 'youtube':
47 return self.url_result(
48 video_id, ie='Youtube', video_id=video_id)
49 elif provider == 'jwplayer':
50 return self.url_result(
51 'jwplatform:%s' % video_id, ie='JWPlatform',
52 video_id=video_id)
53
54 return self.url_result(url, 'Generic')