]>
Commit | Line | Data |
---|---|---|
a572ae61 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import compat_str | |
8 | from ..utils import ( | |
9 | int_or_none, | |
10 | unescapeHTML, | |
11 | ) | |
12 | ||
13 | ||
14 | class TVNetIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?P<id>[0-9]+)' | |
16 | _TESTS = [{ | |
17 | # video | |
18 | 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', | |
19 | 'md5': 'b4d7abe0252c9b47774760b7519c7558', | |
20 | 'info_dict': { | |
21 | 'id': '109788', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang', | |
24 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
25 | 'is_live': False, | |
26 | 'view_count': int, | |
27 | }, | |
28 | }, { | |
29 | # audio | |
30 | 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi', | |
31 | 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae', | |
32 | 'info_dict': { | |
33 | 'id': '27017', | |
34 | 'ext': 'm4a', | |
35 | 'title': 'VOV1 - Bản tin chiều (10/06/2018)', | |
36 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
37 | 'is_live': False, | |
38 | }, | |
39 | }, { | |
40 | # live stream | |
41 | 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', | |
42 | 'info_dict': { | |
43 | 'id': '1011', | |
44 | 'ext': 'mp4', | |
45 | 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
46 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
47 | 'is_live': True, | |
48 | }, | |
49 | 'params': { | |
50 | 'skip_download': True, | |
51 | }, | |
52 | }, { | |
53 | # radio live stream | |
54 | 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', | |
55 | 'info_dict': { | |
56 | 'id': '1014', | |
57 | 'ext': 'm4a', | |
58 | 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
59 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
60 | 'is_live': True, | |
61 | }, | |
62 | 'params': { | |
63 | 'skip_download': True, | |
64 | }, | |
65 | }] | |
66 | ||
67 | def _real_extract(self, url): | |
68 | video_id = self._match_id(url) | |
69 | ||
70 | webpage = self._download_webpage(url, video_id) | |
71 | ||
72 | title = self._og_search_title( | |
73 | webpage, default=None) or self._html_search_meta( | |
74 | 'title', webpage, default=None) or self._search_regex( | |
75 | r'<title>([^<]+)<', webpage, 'title') | |
76 | title = re.sub(r'\s*-\s*TV Net\s*$', '', title) | |
77 | ||
78 | if '/video/' in url or '/radio/' in url: | |
79 | is_live = False | |
80 | elif '/kenh-truyen-hinh/' in url: | |
81 | is_live = True | |
82 | else: | |
83 | is_live = None | |
84 | ||
85 | data_file = unescapeHTML(self._search_regex( | |
86 | r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, | |
87 | 'data file', group='url')) | |
88 | ||
89 | stream_urls = set() | |
90 | formats = [] | |
91 | for stream in self._download_json(data_file, video_id): | |
92 | if not isinstance(stream, dict): | |
93 | continue | |
94 | stream_url = stream.get('url') | |
95 | if (stream_url in stream_urls or not stream_url or | |
96 | not isinstance(stream_url, compat_str)): | |
97 | continue | |
98 | stream_urls.add(stream_url) | |
99 | formats.extend(self._extract_m3u8_formats( | |
100 | stream_url, video_id, 'mp4', | |
101 | entry_protocol='m3u8' if is_live else 'm3u8_native', | |
102 | m3u8_id='hls', fatal=False)) | |
103 | self._sort_formats(formats) | |
104 | ||
105 | # better support for radio streams | |
106 | if title.startswith('VOV'): | |
107 | for f in formats: | |
108 | f.update({ | |
109 | 'ext': 'm4a', | |
110 | 'vcodec': 'none', | |
111 | }) | |
112 | ||
113 | thumbnail = self._og_search_thumbnail( | |
114 | webpage, default=None) or unescapeHTML( | |
115 | self._search_regex( | |
116 | r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, | |
117 | 'thumbnail', default=None, group='url')) | |
118 | ||
119 | if is_live: | |
120 | title = self._live_title(title) | |
121 | ||
122 | view_count = int_or_none(self._search_regex( | |
123 | r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>', | |
124 | webpage, 'view count', default=None)) | |
125 | ||
126 | return { | |
127 | 'id': video_id, | |
128 | 'title': title, | |
129 | 'thumbnail': thumbnail, | |
130 | 'is_live': is_live, | |
131 | 'view_count': view_count, | |
132 | 'formats': formats, | |
133 | } |