]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/veoh.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / veoh.py
1 import functools
2 import json
3
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 OnDemandPagedList,
8 int_or_none,
9 parse_duration,
10 qualities,
11 try_get,
12 )
13
14
15 class VeohIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
17
18 _TESTS = [{
19 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
20 'md5': '620e68e6a3cff80086df3348426c9ca3',
21 'info_dict': {
22 'id': 'v56314296nk7Zdmz3',
23 'ext': 'mp4',
24 'title': 'Straight Backs Are Stronger',
25 'description': 'md5:203f976279939a6dc664d4001e13f5f4',
26 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
27 'uploader': 'LUMOback',
28 'duration': 46,
29 'view_count': int,
30 'average_rating': int,
31 'comment_count': int,
32 'age_limit': 0,
33 'categories': ['technology_and_gaming'],
34 'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
35 },
36 }, {
37 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
38 'only_matching': True,
39 }, {
40 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
41 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
42 'info_dict': {
43 'id': '27701988',
44 'ext': 'mp4',
45 'title': 'Chile workers cover up to avoid skin damage',
46 'description': 'md5:2bd151625a60a32822873efc246ba20d',
47 'uploader': 'afp-news',
48 'duration': 123,
49 },
50 'skip': 'This video has been deleted.',
51 }, {
52 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
53 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
54 'note': 'Embedded ooyala video',
55 'info_dict': {
56 'id': '69525809',
57 'ext': 'mp4',
58 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
59 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
60 'uploader': 'newsy-videos',
61 },
62 'skip': 'This video has been deleted.',
63 }, {
64 'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
65 'only_matching': True,
66 }, {
67 'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
68 'md5': 'cceb73f3909063d64f4b93d4defca1b3',
69 'info_dict': {
70 'id': 'v16374379WA437rMH',
71 'ext': 'mp4',
72 'title': 'Phantasmagoria 2, pt. 1-3',
73 'description': 'Phantasmagoria: a Puzzle of Flesh',
74 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
75 'uploader': 'davidspackage',
76 'duration': 968,
77 'view_count': int,
78 'average_rating': int,
79 'comment_count': int,
80 'age_limit': 18,
81 'categories': ['technology_and_gaming', 'gaming'],
82 'tags': ['puzzle', 'of', 'flesh'],
83 }
84 }]
85
86 def _real_extract(self, url):
87 video_id = self._match_id(url)
88 metadata = self._download_json(
89 'https://www.veoh.com/watch/getVideo/' + video_id,
90 video_id)
91 video = metadata['video']
92 title = video['title']
93
94 thumbnail_url = None
95 q = qualities(['Regular', 'HQ'])
96 formats = []
97 for f_id, f_url in video.get('src', {}).items():
98 if not f_url:
99 continue
100 if f_id == 'poster':
101 thumbnail_url = f_url
102 else:
103 formats.append({
104 'format_id': f_id,
105 'quality': q(f_id),
106 'url': f_url,
107 })
108
109 categories = metadata.get('categoryPath')
110 if not categories:
111 category = try_get(video, lambda x: x['category'].strip().removeprefix('category_'))
112 categories = [category] if category else None
113 tags = video.get('tags')
114
115 return {
116 'id': video_id,
117 'title': title,
118 'description': video.get('description'),
119 'thumbnail': thumbnail_url,
120 'uploader': video.get('author', {}).get('nickname'),
121 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
122 'view_count': int_or_none(video.get('views')),
123 'formats': formats,
124 'average_rating': int_or_none(video.get('rating')),
125 'comment_count': int_or_none(video.get('numOfComments')),
126 'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
127 'categories': categories,
128 'tags': tags.split(', ') if tags else None,
129 }
130
131
132 class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
133 _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
134 IE_NAME = 'veoh:user'
135
136 _TESTS = [
137 {
138 'url': 'https://www.veoh.com/users/valentinazoe',
139 'info_dict': {
140 'id': 'valentinazoe',
141 'title': 'valentinazoe (Uploads)'
142 },
143 'playlist_mincount': 75
144 },
145 {
146 'url': 'https://www.veoh.com/users/PiensaLibre',
147 'info_dict': {
148 'id': 'PiensaLibre',
149 'title': 'PiensaLibre (Uploads)'
150 },
151 'playlist_mincount': 2
152 }]
153
154 _PAGE_SIZE = 16
155
156 def _fetch_page(self, uploader, page):
157 response = self._download_json(
158 'https://www.veoh.com/users/published/videos', uploader,
159 note=f'Downloading videos page {page + 1}',
160 headers={
161 'x-csrf-token': self._TOKEN,
162 'content-type': 'application/json;charset=UTF-8'
163 },
164 data=json.dumps({
165 'username': uploader,
166 'maxResults': self._PAGE_SIZE,
167 'page': page + 1,
168 'requestName': 'userPage'
169 }).encode('utf-8'))
170 if not response.get('success'):
171 raise ExtractorError(response['message'])
172
173 for video in response['videos']:
174 yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
175 video['permalinkId'], video.get('title'))
176
177 def _real_initialize(self):
178 webpage = self._download_webpage(
179 'https://www.veoh.com', None, note='Downloading authorization token')
180 self._TOKEN = self._search_regex(
181 r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
182 'request token', group='token')
183
184 def _real_extract(self, url):
185 uploader = self._match_id(url)
186 return self.playlist_result(OnDemandPagedList(
187 functools.partial(self._fetch_page, uploader),
188 self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')