]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/noodlemagazine.py
[ie/SVTPlay] Fix extraction (#7789)
[yt-dlp.git] / yt_dlp / extractor / noodlemagazine.py
CommitLineData
e88e1feb 1from .common import InfoExtractor
2from ..utils import (
bae48342
R
3 extract_attributes,
4 get_element_html_by_id,
5 int_or_none,
e88e1feb 6 parse_count,
bae48342
R
7 parse_duration,
8 unified_strdate,
9 urljoin,
e88e1feb 10)
bae48342 11from ..utils.traversal import traverse_obj
e88e1feb 12
13
14class NoodleMagazineIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www|adult\.)?noodlemagazine\.com/watch/(?P<id>[0-9-_]+)'
16 _TEST = {
17 'url': 'https://adult.noodlemagazine.com/watch/-67421364_456239604',
18 'md5': '9e02aa763612929d0b4b850591a9248b',
19 'info_dict': {
20 'id': '-67421364_456239604',
21 'title': 'Aria alexander manojob',
22 'thumbnail': r're:^https://.*\.jpg',
23 'ext': 'mp4',
24 'duration': 903,
25 'view_count': int,
26 'like_count': int,
27 'description': 'Aria alexander manojob',
28 'tags': ['aria', 'alexander', 'manojob'],
29 'upload_date': '20190218',
30 'age_limit': 18
31 }
32 }
33
34 def _real_extract(self, url):
35 video_id = self._match_id(url)
36 webpage = self._download_webpage(url, video_id)
37 title = self._og_search_title(webpage)
38 duration = parse_duration(self._html_search_meta('video:duration', webpage, 'duration', default=None))
39 description = self._og_search_property('description', webpage, default='').replace(' watch online hight quality video', '')
40 tags = self._html_search_meta('video:tag', webpage, default='').split(', ')
41 view_count = parse_count(self._html_search_meta('ya:ovs:views_total', webpage, default=None))
42 like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None))
43 upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default=''))
44
bae48342
R
45 player_path = extract_attributes(get_element_html_by_id('iplayer', webpage) or '')['src']
46 player_iframe = self._download_webpage(
47 urljoin('https://adult.noodlemagazine.com', player_path), video_id, 'Downloading iframe page')
48 playlist_url = self._search_regex(
49 r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
50 playlist_info = self._download_json(
51 urljoin('https://adult.noodlemagazine.com', playlist_url), video_id, headers={'Referer': url})
e88e1feb 52
bae48342
R
53 thumbnail = self._og_search_property('image', webpage, default=None) or playlist_info.get('image')
54 formats = traverse_obj(playlist_info, ('sources', lambda _, v: v['file'], {
55 'url': 'file',
56 'format_id': 'label',
57 'height': ('label', {int_or_none}),
58 'ext': 'type',
59 }))
e88e1feb 60
e88e1feb 61 return {
62 'id': video_id,
63 'formats': formats,
64 'title': title,
65 'thumbnail': thumbnail,
66 'duration': duration,
67 'description': description,
68 'tags': tags,
69 'view_count': view_count,
70 'like_count': like_count,
71 'upload_date': upload_date,
72 'age_limit': 18
73 }