]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/veoh.py
[dplay] Add extractors for site changes (#2401)
[yt-dlp.git] / yt_dlp / extractor / veoh.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 parse_duration,
7 qualities,
8 try_get
9 )
10
11
12 class VeohIE(InfoExtractor):
13 _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
14
15 _TESTS = [{
16 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
17 'md5': '620e68e6a3cff80086df3348426c9ca3',
18 'info_dict': {
19 'id': 'v56314296nk7Zdmz3',
20 'ext': 'mp4',
21 'title': 'Straight Backs Are Stronger',
22 'description': 'md5:203f976279939a6dc664d4001e13f5f4',
23 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
24 'uploader': 'LUMOback',
25 'duration': 46,
26 'view_count': int,
27 'average_rating': int,
28 'comment_count': int,
29 'age_limit': 0,
30 'categories': ['technology_and_gaming'],
31 'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
32 },
33 }, {
34 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
35 'only_matching': True,
36 }, {
37 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
38 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
39 'info_dict': {
40 'id': '27701988',
41 'ext': 'mp4',
42 'title': 'Chile workers cover up to avoid skin damage',
43 'description': 'md5:2bd151625a60a32822873efc246ba20d',
44 'uploader': 'afp-news',
45 'duration': 123,
46 },
47 'skip': 'This video has been deleted.',
48 }, {
49 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
50 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
51 'note': 'Embedded ooyala video',
52 'info_dict': {
53 'id': '69525809',
54 'ext': 'mp4',
55 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
56 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
57 'uploader': 'newsy-videos',
58 },
59 'skip': 'This video has been deleted.',
60 }, {
61 'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
62 'only_matching': True,
63 }, {
64 'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
65 'md5': 'cceb73f3909063d64f4b93d4defca1b3',
66 'info_dict': {
67 'id': 'v16374379WA437rMH',
68 'ext': 'mp4',
69 'title': 'Phantasmagoria 2, pt. 1-3',
70 'description': 'Phantasmagoria: a Puzzle of Flesh',
71 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
72 'uploader': 'davidspackage',
73 'duration': 968,
74 'view_count': int,
75 'average_rating': int,
76 'comment_count': int,
77 'age_limit': 18,
78 'categories': ['technology_and_gaming', 'gaming'],
79 'tags': ['puzzle', 'of', 'flesh'],
80 }
81 }]
82
83 def _real_extract(self, url):
84 video_id = self._match_id(url)
85 metadata = self._download_json(
86 'https://www.veoh.com/watch/getVideo/' + video_id,
87 video_id)
88 video = metadata['video']
89 title = video['title']
90
91 thumbnail_url = None
92 q = qualities(['Regular', 'HQ'])
93 formats = []
94 for f_id, f_url in video.get('src', {}).items():
95 if not f_url:
96 continue
97 if f_id == 'poster':
98 thumbnail_url = f_url
99 else:
100 formats.append({
101 'format_id': f_id,
102 'quality': q(f_id),
103 'url': f_url,
104 })
105 self._sort_formats(formats)
106
107 categories = metadata.get('categoryPath')
108 if not categories:
109 category = try_get(video, lambda x: x['category'].strip().removeprefix('category_'))
110 categories = [category] if category else None
111 tags = video.get('tags')
112
113 return {
114 'id': video_id,
115 'title': title,
116 'description': video.get('description'),
117 'thumbnail': thumbnail_url,
118 'uploader': video.get('author', {}).get('nickname'),
119 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
120 'view_count': int_or_none(video.get('views')),
121 'formats': formats,
122 'average_rating': int_or_none(video.get('rating')),
123 'comment_count': int_or_none(video.get('numOfComments')),
124 'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
125 'categories': categories,
126 'tags': tags.split(', ') if tags else None,
127 }