]>
Commit | Line | Data |
---|---|---|
fc6861b1 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
3a738295 | 7 | from ..compat import compat_urlparse |
fc6861b1 | 8 | from ..utils import ( |
260c7036 | 9 | js_to_json, |
77d9cb2f | 10 | unified_strdate, |
fc6861b1 S |
11 | ) |
12 | ||
13 | ||
14 | class SportBoxIE(InfoExtractor): | |
a7b8467a | 15 | _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' |
3a738295 S |
16 | _TESTS = [{ |
17 | 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', | |
18 | 'md5': 'ff56a598c2cf411a9a38a69709e97079', | |
19 | 'info_dict': { | |
20 | 'id': '80822', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', | |
77d9cb2f | 23 | 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', |
3a738295 | 24 | 'thumbnail': 're:^https?://.*\.jpg$', |
3a738295 | 25 | 'upload_date': '20140928', |
e9ca615a | 26 | }, |
3a738295 S |
27 | 'params': { |
28 | # m3u8 download | |
29 | 'skip_download': True, | |
30 | }, | |
31 | }, { | |
32 | 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', | |
33 | 'only_matching': True, | |
34 | }, { | |
35 | 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', | |
36 | 'only_matching': True, | |
37 | }] | |
fc6861b1 S |
38 | |
39 | def _real_extract(self, url): | |
40 | mobj = re.match(self._VALID_URL, url) | |
41 | display_id = mobj.group('display_id') | |
42 | ||
43 | webpage = self._download_webpage(url, display_id) | |
44 | ||
3a738295 S |
45 | player = self._search_regex( |
46 | r'src="/?(vdl/player/[^"]+)"', webpage, 'player') | |
fc6861b1 S |
47 | |
48 | title = self._html_search_regex( | |
77d9cb2f S |
49 | [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'], |
50 | webpage, 'title') | |
51 | description = self._og_search_description(webpage) or self._html_search_meta( | |
52 | 'description', webpage, 'description') | |
fc6861b1 | 53 | thumbnail = self._og_search_thumbnail(webpage) |
77d9cb2f S |
54 | upload_date = unified_strdate(self._html_search_meta( |
55 | 'dateCreated', webpage, 'upload date')) | |
fc6861b1 S |
56 | |
57 | return { | |
3a738295 S |
58 | '_type': 'url_transparent', |
59 | 'url': compat_urlparse.urljoin(url, '/%s' % player), | |
fc6861b1 S |
60 | 'display_id': display_id, |
61 | 'title': title, | |
62 | 'description': description, | |
63 | 'thumbnail': thumbnail, | |
77d9cb2f | 64 | 'upload_date': upload_date, |
3a738295 S |
65 | } |
66 | ||
67 | ||
68 | class SportBoxEmbedIE(InfoExtractor): | |
69 | _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' | |
70 | _TESTS = [{ | |
71 | 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', | |
72 | 'info_dict': { | |
73 | 'id': '211355', | |
74 | 'ext': 'mp4', | |
75 | 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', | |
76 | 'thumbnail': 're:^https?://.*\.jpg$', | |
77 | }, | |
78 | 'params': { | |
79 | # m3u8 download | |
80 | 'skip_download': True, | |
81 | }, | |
82 | }, { | |
83 | 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', | |
84 | 'only_matching': True, | |
85 | }] | |
86 | ||
1436a683 S |
87 | @staticmethod |
88 | def _extract_urls(webpage): | |
89 | return re.findall( | |
90 | r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', | |
91 | webpage) | |
92 | ||
3a738295 S |
93 | def _real_extract(self, url): |
94 | video_id = self._match_id(url) | |
95 | ||
96 | webpage = self._download_webpage(url, video_id) | |
97 | ||
260c7036 | 98 | formats = [] |
3a738295 | 99 | |
260c7036 YCH |
100 | def cleanup_js(code): |
101 | # desktop_advert_config contains complex Javascripts and we don't need it | |
102 | return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) | |
103 | ||
104 | jwplayer_data = self._parse_json(self._search_regex( | |
105 | r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, | |
106 | transform_source=cleanup_js) | |
107 | ||
108 | hls_url = jwplayer_data.get('hls_url') | |
109 | if hls_url: | |
110 | formats.extend(self._extract_m3u8_formats( | |
111 | hls_url, video_id, ext='mp4', m3u8_id='hls')) | |
3a738295 | 112 | |
260c7036 YCH |
113 | rtsp_url = jwplayer_data.get('rtsp_url') |
114 | if rtsp_url: | |
115 | formats.append({ | |
116 | 'url': rtsp_url, | |
117 | 'format_id': 'rtsp', | |
118 | }) | |
119 | ||
120 | self._sort_formats(formats) | |
3a738295 | 121 | |
260c7036 YCH |
122 | title = jwplayer_data['node_title'] |
123 | thumbnail = jwplayer_data.get('image_url') | |
3a738295 S |
124 | |
125 | return { | |
126 | 'id': video_id, | |
127 | 'title': title, | |
128 | 'thumbnail': thumbnail, | |
fc6861b1 S |
129 | 'formats': formats, |
130 | } |