]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/wwe.py
[wwe] Add extractor
[yt-dlp.git] / youtube_dl / extractor / wwe.py
CommitLineData
11d19ff5 1from __future__ import unicode_literals
2
3from .common import InfoExtractor
4from ..compat import compat_str
5from ..utils import urljoin
6
7
8class WWEIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:\w+\.)?wwe.com/(?:.*/)?videos/(?P<id>[\w-]+)'
10 _TESTS = [{
11 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
12 'md5': '30cbc824b51f4010ea885bfcaec76972',
13 'info_dict': {
14 'id': '40048199',
15 'ext': 'mp4',
16 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
17 'description': 'Still fuming after he and his wife Brie Bella were attacked by The Miz and Maryse last week, Daniel Bryan takes care of some unfinished business with Andrade "Cien" Almas.',
18 'thumbnail': r're:^https?://.*\.jpg$',
19 }
20 }, {
21 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
22 'only_matching': True,
23 }]
24
25 def _real_extract(self, url):
26 display_id = self._match_id(url)
27 webpage = self._download_webpage(url, display_id)
28
29 drupal_settings = self._parse_json(
30 self._html_search_regex(
31 r'(?s)Drupal\.settings\s*,\s*({.+?})\);',
32 webpage, 'drupal settings'),
33 display_id)
34
35 player = drupal_settings['WWEVideoLanding']['initialVideo']
36 metadata = player['playlist'][0]
37
38 id = compat_str(metadata['nid'])
39 title = metadata.get('title') or self._og_search_title(webpage)
40 video_url = 'https:' + metadata['file']
41 thumbnail = None
42 if metadata.get('image') is not None:
43 thumbnail = urljoin(url, metadata.get('image'))
44 description = metadata.get('description')
45
46 formats = self._extract_m3u8_formats(video_url, id, 'mp4')
47
48 return {
49 'id': id,
50 'title': title,
51 'formats': formats,
52 'url': video_url,
53 'display_id': display_id,
54 'thumbnail': thumbnail,
55 'description': description,
56 }