]>
Commit | Line | Data |
---|---|---|
e88e1feb | 1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
bae48342 | 3 | int_or_none, |
e88e1feb | 4 | parse_count, |
bae48342 R |
5 | parse_duration, |
6 | unified_strdate, | |
7 | urljoin, | |
e88e1feb | 8 | ) |
bae48342 | 9 | from ..utils.traversal import traverse_obj |
e88e1feb | 10 | |
11 | ||
12 | class NoodleMagazineIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://(?:www|adult\.)?noodlemagazine\.com/watch/(?P<id>[0-9-_]+)' | |
14 | _TEST = { | |
15 | 'url': 'https://adult.noodlemagazine.com/watch/-67421364_456239604', | |
16 | 'md5': '9e02aa763612929d0b4b850591a9248b', | |
17 | 'info_dict': { | |
18 | 'id': '-67421364_456239604', | |
19 | 'title': 'Aria alexander manojob', | |
20 | 'thumbnail': r're:^https://.*\.jpg', | |
21 | 'ext': 'mp4', | |
22 | 'duration': 903, | |
23 | 'view_count': int, | |
24 | 'like_count': int, | |
25 | 'description': 'Aria alexander manojob', | |
26 | 'tags': ['aria', 'alexander', 'manojob'], | |
27 | 'upload_date': '20190218', | |
28 | 'age_limit': 18 | |
29 | } | |
30 | } | |
31 | ||
32 | def _real_extract(self, url): | |
33 | video_id = self._match_id(url) | |
34 | webpage = self._download_webpage(url, video_id) | |
35 | title = self._og_search_title(webpage) | |
36 | duration = parse_duration(self._html_search_meta('video:duration', webpage, 'duration', default=None)) | |
37 | description = self._og_search_property('description', webpage, default='').replace(' watch online hight quality video', '') | |
38 | tags = self._html_search_meta('video:tag', webpage, default='').split(', ') | |
39 | view_count = parse_count(self._html_search_meta('ya:ovs:views_total', webpage, default=None)) | |
40 | like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None)) | |
41 | upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default='')) | |
42 | ||
69dbfe01 | 43 | def build_url(url_or_path): |
44 | return urljoin('https://adult.noodlemagazine.com', url_or_path) | |
45 | ||
46 | headers = {'Referer': url} | |
47 | player_path = self._html_search_regex( | |
48 | r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path') | |
bae48342 | 49 | player_iframe = self._download_webpage( |
69dbfe01 | 50 | build_url(player_path), video_id, 'Downloading iframe page', headers=headers) |
bae48342 R |
51 | playlist_url = self._search_regex( |
52 | r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url') | |
69dbfe01 | 53 | playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers) |
e88e1feb | 54 | |
69dbfe01 | 55 | formats = [] |
56 | for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])): | |
57 | if source.get('type') == 'hls': | |
58 | formats.extend(self._extract_m3u8_formats( | |
59 | build_url(source['file']), video_id, 'mp4', fatal=False, m3u8_id='hls')) | |
60 | else: | |
61 | formats.append(traverse_obj(source, { | |
62 | 'url': ('file', {build_url}), | |
63 | 'format_id': 'label', | |
64 | 'height': ('label', {int_or_none}), | |
65 | 'ext': 'type', | |
66 | })) | |
e88e1feb | 67 | |
e88e1feb | 68 | return { |
69 | 'id': video_id, | |
70 | 'formats': formats, | |
71 | 'title': title, | |
69dbfe01 | 72 | 'thumbnail': self._og_search_property('image', webpage, default=None) or playlist_info.get('image'), |
e88e1feb | 73 | 'duration': duration, |
74 | 'description': description, | |
75 | 'tags': tags, | |
76 | 'view_count': view_count, | |
77 | 'like_count': like_count, | |
78 | 'upload_date': upload_date, | |
79 | 'age_limit': 18 | |
80 | } |