]>
Commit | Line | Data |
---|---|---|
5f3e0b69 E |
1 | from __future__ import unicode_literals |
2 | ||
90e3f18f S |
3 | import re |
4 | ||
5f3e0b69 E |
5 | from .common import InfoExtractor |
6 | from .ooyala import OoyalaIE | |
7 | ||
8 | ||
9 | class TheSunIE(InfoExtractor): | |
90e3f18f | 10 | _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)' |
5f3e0b69 E |
11 | _TEST = { |
12 | 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/', | |
5f3e0b69 | 13 | 'info_dict': { |
90e3f18f S |
14 | 'id': '2261604', |
15 | 'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf', | |
16 | }, | |
17 | 'playlist_count': 2, | |
5f3e0b69 E |
18 | } |
19 | ||
20 | def _real_extract(self, url): | |
90e3f18f S |
21 | article_id = self._match_id(url) |
22 | ||
23 | webpage = self._download_webpage(url, article_id) | |
5f3e0b69 | 24 | |
90e3f18f S |
25 | entries = [] |
26 | for ooyala_id in re.findall( | |
27 | r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)', | |
28 | webpage): | |
29 | entries.append(OoyalaIE._build_url_result(ooyala_id)) | |
5f3e0b69 | 30 | |
90e3f18f S |
31 | return self.playlist_result( |
32 | entries, article_id, self._og_search_title(webpage, fatal=False)) |