]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/indavideo.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / indavideo.py
1 from .common import InfoExtractor
2 from ..compat import compat_str
3 from ..utils import (
4 int_or_none,
5 parse_age_limit,
6 parse_iso8601,
7 update_url_query,
8 )
9
10
11 class IndavideoEmbedIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
13 # Some example URLs covered by generic extractor:
14 # http://indavideo.hu/video/Vicces_cica_1
15 # http://index.indavideo.hu/video/2015_0728_beregszasz
16 # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
17 # http://erotika.indavideo.hu/video/Amator_tini_punci
18 # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
19 # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
20 _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
21 _TESTS = [{
22 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
23 'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
24 'info_dict': {
25 'id': '1837039',
26 'ext': 'mp4',
27 'title': 'Cicatánc',
28 'description': '',
29 'thumbnail': r're:^https?://.*\.jpg$',
30 'uploader': 'cukiajanlo',
31 'uploader_id': '83729',
32 'timestamp': 1439193826,
33 'upload_date': '20150810',
34 'duration': 72,
35 'age_limit': 0,
36 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
37 },
38 }, {
39 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
40 'only_matching': True,
41 }, {
42 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
43 'only_matching': True,
44 }]
45
46 def _real_extract(self, url):
47 video_id = self._match_id(url)
48
49 video = self._download_json(
50 'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
51 video_id)['data']
52
53 title = video['title']
54
55 video_urls = []
56
57 video_files = video.get('video_files')
58 if isinstance(video_files, list):
59 video_urls.extend(video_files)
60 elif isinstance(video_files, dict):
61 video_urls.extend(video_files.values())
62
63 video_file = video.get('video_file')
64 if video:
65 video_urls.append(video_file)
66 video_urls = list(set(video_urls))
67
68 video_prefix = video_urls[0].rsplit('/', 1)[0]
69
70 for flv_file in video.get('flv_files', []):
71 flv_url = '%s/%s' % (video_prefix, flv_file)
72 if flv_url not in video_urls:
73 video_urls.append(flv_url)
74
75 filesh = video.get('filesh')
76
77 formats = []
78 for video_url in video_urls:
79 height = int_or_none(self._search_regex(
80 r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
81 if filesh:
82 if not height:
83 continue
84 token = filesh.get(compat_str(height))
85 if token is None:
86 continue
87 video_url = update_url_query(video_url, {'token': token})
88 formats.append({
89 'url': video_url,
90 'height': height,
91 })
92 self._sort_formats(formats)
93
94 timestamp = video.get('date')
95 if timestamp:
96 # upload date is in CEST
97 timestamp = parse_iso8601(timestamp + ' +0200', ' ')
98
99 thumbnails = [{
100 'url': self._proto_relative_url(thumbnail)
101 } for thumbnail in video.get('thumbnails', [])]
102
103 tags = [tag['title'] for tag in video.get('tags') or []]
104
105 return {
106 'id': video.get('id') or video_id,
107 'title': title,
108 'description': video.get('description'),
109 'thumbnails': thumbnails,
110 'uploader': video.get('user_name'),
111 'uploader_id': video.get('user_id'),
112 'timestamp': timestamp,
113 'duration': int_or_none(video.get('length')),
114 'age_limit': parse_age_limit(video.get('age_limit')),
115 'tags': tags,
116 'formats': formats,
117 }