]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ExtractorError | |
3 | ||
4 | ||
5 | class SmotrimIE(InfoExtractor): | |
6 | _VALID_URL = r'https?://smotrim\.ru/(?P<type>brand|video|article|live)/(?P<id>[0-9]+)' | |
7 | _TESTS = [{ # video | |
8 | 'url': 'https://smotrim.ru/video/1539617', | |
9 | 'md5': 'b1923a533c8cab09679789d720d0b1c5', | |
10 | 'info_dict': { | |
11 | 'id': '1539617', | |
12 | 'ext': 'mp4', | |
13 | 'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16', | |
14 | 'description': '', | |
15 | }, | |
16 | 'add_ie': ['RUTV'], | |
17 | }, { # article (geo-restricted? plays fine from the US and JP) | |
18 | 'url': 'https://smotrim.ru/article/2813445', | |
19 | 'md5': 'e0ac453952afbc6a2742e850b4dc8e77', | |
20 | 'info_dict': { | |
21 | 'id': '2431846', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"', | |
24 | 'description': 'md5:94a4a22472da4252bf5587a4ee441b99', | |
25 | }, | |
26 | 'add_ie': ['RUTV'], | |
27 | }, { # brand, redirect | |
28 | 'url': 'https://smotrim.ru/brand/64356', | |
29 | 'md5': '740472999ccff81d7f6df79cecd91c18', | |
30 | 'info_dict': { | |
31 | 'id': '2354523', | |
32 | 'ext': 'mp4', | |
33 | 'title': 'Большие и маленькие. Лучшее. 4-й выпуск', | |
34 | 'description': 'md5:84089e834429008371ea41ea3507b989', | |
35 | }, | |
36 | 'add_ie': ['RUTV'], | |
37 | }, { # live | |
38 | 'url': 'https://smotrim.ru/live/19201', | |
39 | 'info_dict': { | |
40 | 'id': '19201', | |
41 | 'ext': 'mp4', | |
42 | # this looks like a TV channel name | |
43 | 'title': 'Россия Культура. Прямой эфир', | |
44 | 'description': '', | |
45 | }, | |
46 | 'add_ie': ['RUTV'], | |
47 | }] | |
48 | ||
49 | def _real_extract(self, url): | |
50 | video_id, typ = self._match_valid_url(url).group('id', 'type') | |
51 | rutv_type = 'video' | |
52 | if typ not in ('video', 'live'): | |
53 | webpage = self._download_webpage(url, video_id, f'Resolving {typ} link') | |
54 | # there are two cases matching regex: | |
55 | # 1. "embedUrl" in JSON LD (/brand/) | |
56 | # 2. "src" attribute from iframe (/article/) | |
57 | video_id = self._search_regex( | |
58 | r'"https://player.smotrim.ru/iframe/video/id/(?P<video_id>\d+)/', | |
59 | webpage, 'video_id', default=None) | |
60 | if not video_id: | |
61 | raise ExtractorError('There are no video in this page.', expected=True) | |
62 | elif typ == 'live': | |
63 | rutv_type = 'live' | |
64 | ||
65 | return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}') |