]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | ||
8 | from .brightcove import BrightcoveNewIE | |
9 | ||
10 | ||
11 | class VrakIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P<id>[0-9\.]+).*' | |
13 | _TEST = { | |
14 | 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', | |
15 | 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', | |
16 | 'info_dict': { | |
17 | 'id': '5231040869001', | |
18 | 'ext': 'mp4', | |
19 | 'title': 'Référendums américains, animés japonais et hooligans russes', | |
20 | 'upload_date': '20161201', | |
21 | 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', | |
22 | 'timestamp': 1480628425, | |
23 | 'uploader_id': '2890187628001', | |
24 | } | |
25 | } | |
26 | ||
27 | def _real_extract(self, url): | |
28 | url_id = self._match_id(url) | |
29 | webpage = self._download_webpage(url, url_id) | |
30 | ||
31 | result = {} | |
32 | result['title'] = self._html_search_regex( | |
33 | r'<h3 class="videoTitle">(.+?)</h3>', webpage, 'title') | |
34 | ||
35 | # Inspired from BrightcoveNewIE._extract_url() | |
36 | entries = [] | |
37 | for account_id, player_id, _, video_id in re.findall( | |
38 | # account_id, player_id and embed from: | |
39 | # <div class="video-player [...] | |
40 | # data-publisher-id="2890187628001" | |
41 | # data-player-id="VkSnGw3cx" | |
42 | # video id is extracted from weird CMS Java/Javascript notation: | |
43 | # RW java.lang.String value = '5231040869001'; | |
44 | # Need to use backtrack to pin to a ref since video is in grid | |
45 | # layout with others | |
46 | r'''(?sx) | |
47 | <div[^>]+ | |
48 | data-publisher-id=["\'](\d+)["\'] | |
49 | [^>]* | |
50 | data-player-id=["\']([^"\']+)["\'] | |
51 | [^>]* | |
52 | refId":"([^&]+)" | |
53 | [^>]* | |
54 | >.*? | |
55 | </div>.*? | |
56 | RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' | |
57 | [^>]* | |
58 | RW\ java\.lang\.String\ value\ =\ \'(\d+)\' | |
59 | ''', webpage): | |
60 | ||
61 | entries.append( | |
62 | 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' | |
63 | % (account_id, player_id, 'default', video_id)) | |
64 | ||
65 | if entries: | |
66 | result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) | |
67 | ||
68 | return result |