]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/cloudy.py
[ie/box] Fix formats extraction (#8649)
[yt-dlp.git] / yt_dlp / extractor / cloudy.py
1 from .common import InfoExtractor
2 from ..utils import (
3 str_to_int,
4 unified_strdate,
5 )
6
7
8 class CloudyIE(InfoExtractor):
9 _IE_DESC = 'cloudy.ec'
10 _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
11 _TESTS = [{
12 'url': 'https://www.cloudy.ec/v/af511e2527aac',
13 'md5': '29832b05028ead1b58be86bf319397ca',
14 'info_dict': {
15 'id': 'af511e2527aac',
16 'ext': 'mp4',
17 'title': 'Funny Cats and Animals Compilation june 2013',
18 'upload_date': '20130913',
19 'view_count': int,
20 }
21 }, {
22 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
23 'only_matching': True,
24 }]
25
26 def _real_extract(self, url):
27 video_id = self._match_id(url)
28
29 webpage = self._download_webpage(
30 'https://www.cloudy.ec/embed.php', video_id, query={
31 'id': video_id,
32 'playerPage': 1,
33 'autoplay': 1,
34 })
35
36 info = self._parse_html5_media_entries(url, webpage, video_id)[0]
37
38 webpage = self._download_webpage(
39 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
40
41 if webpage:
42 info.update({
43 'title': self._search_regex(
44 r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
45 'upload_date': unified_strdate(self._search_regex(
46 r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
47 'upload date', fatal=False)),
48 'view_count': str_to_int(self._search_regex(
49 r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
50 })
51
52 if not info.get('title'):
53 info['title'] = video_id
54
55 info['id'] = video_id
56
57 return info