]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/popcorntv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / popcorntv.py
CommitLineData
9c2a17f2
S
1from .common import InfoExtractor
2from ..utils import (
3 extract_attributes,
4 int_or_none,
5 unified_timestamp,
6)
7
8
9class PopcornTVIE(InfoExtractor):
10 _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
11 _TESTS = [{
12 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
13 'md5': '47d65a48d147caf692ab8562fe630b45',
14 'info_dict': {
15 'id': '9183',
16 'display_id': 'food-wars-battaglie-culinarie-episodio-01',
17 'ext': 'mp4',
18 'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
19 'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
20 'thumbnail': r're:^https?://.*\.jpg$',
21 'timestamp': 1497610857,
22 'upload_date': '20170616',
23 'duration': 1440,
24 'view_count': int,
25 },
26 }, {
27 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
28 'only_matching': True,
29 }]
30
31 def _real_extract(self, url):
5ad28e7f 32 mobj = self._match_valid_url(url)
9c2a17f2
S
33 display_id, video_id = mobj.group('display_id', 'id')
34
35 webpage = self._download_webpage(url, display_id)
36
37 m3u8_url = extract_attributes(
38 self._search_regex(
39 r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
40 webpage, 'content'
41 ))['href']
42
43 formats = self._extract_m3u8_formats(
44 m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
45 m3u8_id='hls')
46
47 title = self._search_regex(
48 r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
49 'title', default=None) or self._og_search_title(webpage)
50
51 description = self._html_search_regex(
52 r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
53 webpage, 'description', fatal=False)
54 thumbnail = self._og_search_thumbnail(webpage)
55 timestamp = unified_timestamp(self._html_search_meta(
56 'uploadDate', webpage, 'timestamp'))
9c2a17f2
S
57 duration = int_or_none(self._html_search_meta(
58 'duration', webpage), invscale=60)
59 view_count = int_or_none(self._html_search_meta(
60 'interactionCount', webpage, 'view count'))
61
62 return {
63 'id': video_id,
64 'display_id': display_id,
65 'title': title,
66 'description': description,
67 'thumbnail': thumbnail,
68 'timestamp': timestamp,
69 'duration': duration,
70 'view_count': view_count,
71 'formats': formats,
72 }