]>
Commit | Line | Data |
---|---|---|
db182c63 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import compat_urlparse | |
8 | from ..utils import ( | |
8fffac69 | 9 | extract_attributes, |
db182c63 YCH |
10 | get_element_by_class, |
11 | urlencode_postdata, | |
12 | ) | |
13 | ||
14 | ||
15 | class NJPWWorldIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)' | |
17 | IE_DESC = '新日本プロレスワールド' | |
18 | _NETRC_MACHINE = 'njpwworld' | |
19 | ||
20 | _TEST = { | |
21 | 'url': 'http://njpwworld.com/p/s_series_00155_1_9/', | |
22 | 'info_dict': { | |
23 | 'id': 's_series_00155_1_9', | |
24 | 'ext': 'mp4', | |
25 | 'title': '第9試合 ランディ・サベージ vs リック・スタイナー', | |
26 | 'tags': list, | |
27 | }, | |
28 | 'params': { | |
29 | 'skip_download': True, # AES-encrypted m3u8 | |
30 | }, | |
31 | 'skip': 'Requires login', | |
32 | } | |
33 | ||
c70ba664 S |
34 | _LOGIN_URL = 'https://front.njpwworld.com/auth/login' |
35 | ||
db182c63 YCH |
36 | def _real_initialize(self): |
37 | self._login() | |
38 | ||
39 | def _login(self): | |
40 | username, password = self._get_login_info() | |
41 | # No authentication to be performed | |
42 | if not username: | |
43 | return True | |
44 | ||
c70ba664 S |
45 | # Setup session (will set necessary cookies) |
46 | self._request_webpage( | |
47 | 'https://njpwworld.com/', None, note='Setting up session') | |
48 | ||
db182c63 | 49 | webpage, urlh = self._download_webpage_handle( |
c70ba664 | 50 | self._LOGIN_URL, None, |
db182c63 | 51 | note='Logging in', errnote='Unable to login', |
cc7f6c72 | 52 | data=urlencode_postdata({'login_id': username, 'pw': password}), |
c70ba664 | 53 | headers={'Referer': 'https://front.njpwworld.com/auth'}) |
db182c63 | 54 | # /auth/login will return 302 for successful logins |
c70ba664 | 55 | if urlh.geturl() == self._LOGIN_URL: |
db182c63 YCH |
56 | self.report_warning('unable to login') |
57 | return False | |
58 | ||
59 | return True | |
60 | ||
61 | def _real_extract(self, url): | |
62 | video_id = self._match_id(url) | |
63 | ||
64 | webpage = self._download_webpage(url, video_id) | |
65 | ||
66 | formats = [] | |
8fffac69 S |
67 | for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage): |
68 | player = extract_attributes(mobj.group(0)) | |
69 | player_path = player.get('href') | |
70 | if not player_path: | |
71 | continue | |
72 | kind = self._search_regex( | |
73 | r'(low|high)$', player.get('class') or '', 'kind', | |
74 | default='low') | |
75 | player_url = compat_urlparse.urljoin(url, player_path) | |
db182c63 YCH |
76 | player_page = self._download_webpage( |
77 | player_url, video_id, note='Downloading player page') | |
db182c63 YCH |
78 | entries = self._parse_html5_media_entries( |
79 | player_url, player_page, video_id, m3u8_id='hls-%s' % kind, | |
8fffac69 S |
80 | m3u8_entry_protocol='m3u8_native') |
81 | kind_formats = entries[0]['formats'] | |
82 | for f in kind_formats: | |
83 | f['quality'] = 2 if kind == 'high' else 1 | |
84 | formats.extend(kind_formats) | |
db182c63 YCH |
85 | |
86 | self._sort_formats(formats) | |
87 | ||
88 | post_content = get_element_by_class('post-content', webpage) | |
89 | tags = re.findall( | |
90 | r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content | |
91 | ) if post_content else None | |
92 | ||
93 | return { | |
94 | 'id': video_id, | |
95 | 'title': self._og_search_title(webpage), | |
96 | 'formats': formats, | |
97 | 'tags': tags, | |
98 | } |