]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cloudy.py
[ccc] Improve extraction (closes #14601, closes #20355)
[yt-dlp.git] / youtube_dl / extractor / cloudy.py
CommitLineData
20ff802c 1# coding: utf-8
2from __future__ import unicode_literals
3
20ff802c 4from .common import InfoExtractor
8c25f81b 5from ..utils import (
579c99a2
S
6 str_to_int,
7 unified_strdate,
8c25f81b 8)
20ff802c 9
10
11class CloudyIE(InfoExtractor):
998895df 12 _IE_DESC = 'cloudy.ec'
579c99a2
S
13 _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
14 _TESTS = [{
998895df 15 'url': 'https://www.cloudy.ec/v/af511e2527aac',
579c99a2 16 'md5': '29832b05028ead1b58be86bf319397ca',
998895df
YCH
17 'info_dict': {
18 'id': 'af511e2527aac',
579c99a2 19 'ext': 'mp4',
998895df 20 'title': 'Funny Cats and Animals Compilation june 2013',
579c99a2
S
21 'upload_date': '20130913',
22 'view_count': int,
20ff802c 23 }
579c99a2
S
24 }, {
25 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
26 'only_matching': True,
27 }]
eb3bd7ba 28
579c99a2
S
29 def _real_extract(self, url):
30 video_id = self._match_id(url)
20ff802c 31
579c99a2 32 webpage = self._download_webpage(
f9c48d89
S
33 'https://www.cloudy.ec/embed.php', video_id, query={
34 'id': video_id,
35 'playerPage': 1,
36 'autoplay': 1,
37 })
20ff802c 38
579c99a2 39 info = self._parse_html5_media_entries(url, webpage, video_id)[0]
eb3bd7ba 40
579c99a2
S
41 webpage = self._download_webpage(
42 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
20ff802c 43
579c99a2
S
44 if webpage:
45 info.update({
46 'title': self._search_regex(
47 r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
48 'upload_date': unified_strdate(self._search_regex(
49 r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
50 'upload date', fatal=False)),
51 'view_count': str_to_int(self._search_regex(
52 r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
53 })
eb3bd7ba 54
579c99a2
S
55 if not info.get('title'):
56 info['title'] = video_id
eb3bd7ba 57
579c99a2 58 info['id'] = video_id
eb3bd7ba 59
579c99a2 60 return info