]>
Commit | Line | Data |
---|---|---|
a572ae61 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import compat_str | |
8 | from ..utils import ( | |
9 | int_or_none, | |
10 | unescapeHTML, | |
11 | ) | |
12 | ||
13 | ||
14 | class TVNetIE(InfoExtractor): | |
7dc9c60b | 15 | _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)' |
a572ae61 S |
16 | _TESTS = [{ |
17 | # video | |
18 | 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', | |
19 | 'md5': 'b4d7abe0252c9b47774760b7519c7558', | |
20 | 'info_dict': { | |
21 | 'id': '109788', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang', | |
24 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
25 | 'is_live': False, | |
26 | 'view_count': int, | |
27 | }, | |
28 | }, { | |
29 | # audio | |
30 | 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi', | |
31 | 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae', | |
32 | 'info_dict': { | |
33 | 'id': '27017', | |
34 | 'ext': 'm4a', | |
35 | 'title': 'VOV1 - Bản tin chiều (10/06/2018)', | |
36 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
37 | 'is_live': False, | |
38 | }, | |
e5175275 S |
39 | }, { |
40 | 'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705', | |
41 | 'info_dict': { | |
42 | 'id': '129999', | |
43 | 'ext': 'mp4', | |
44 | 'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)', | |
45 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
46 | 'is_live': False, | |
47 | }, | |
48 | 'params': { | |
49 | 'skip_download': True, | |
50 | }, | |
a572ae61 S |
51 | }, { |
52 | # live stream | |
53 | 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', | |
54 | 'info_dict': { | |
55 | 'id': '1011', | |
56 | 'ext': 'mp4', | |
57 | 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
58 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
59 | 'is_live': True, | |
60 | }, | |
61 | 'params': { | |
62 | 'skip_download': True, | |
63 | }, | |
64 | }, { | |
65 | # radio live stream | |
66 | 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', | |
67 | 'info_dict': { | |
68 | 'id': '1014', | |
69 | 'ext': 'm4a', | |
70 | 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
71 | 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', | |
72 | 'is_live': True, | |
73 | }, | |
74 | 'params': { | |
75 | 'skip_download': True, | |
76 | }, | |
e5175275 S |
77 | }, { |
78 | 'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh', | |
79 | 'only_matching': True, | |
a572ae61 S |
80 | }] |
81 | ||
82 | def _real_extract(self, url): | |
83 | video_id = self._match_id(url) | |
84 | ||
85 | webpage = self._download_webpage(url, video_id) | |
86 | ||
87 | title = self._og_search_title( | |
88 | webpage, default=None) or self._html_search_meta( | |
89 | 'title', webpage, default=None) or self._search_regex( | |
90 | r'<title>([^<]+)<', webpage, 'title') | |
91 | title = re.sub(r'\s*-\s*TV Net\s*$', '', title) | |
92 | ||
93 | if '/video/' in url or '/radio/' in url: | |
94 | is_live = False | |
95 | elif '/kenh-truyen-hinh/' in url: | |
96 | is_live = True | |
97 | else: | |
98 | is_live = None | |
99 | ||
100 | data_file = unescapeHTML(self._search_regex( | |
101 | r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, | |
102 | 'data file', group='url')) | |
103 | ||
104 | stream_urls = set() | |
105 | formats = [] | |
106 | for stream in self._download_json(data_file, video_id): | |
107 | if not isinstance(stream, dict): | |
108 | continue | |
109 | stream_url = stream.get('url') | |
110 | if (stream_url in stream_urls or not stream_url or | |
111 | not isinstance(stream_url, compat_str)): | |
112 | continue | |
113 | stream_urls.add(stream_url) | |
114 | formats.extend(self._extract_m3u8_formats( | |
115 | stream_url, video_id, 'mp4', | |
116 | entry_protocol='m3u8' if is_live else 'm3u8_native', | |
117 | m3u8_id='hls', fatal=False)) | |
118 | self._sort_formats(formats) | |
119 | ||
120 | # better support for radio streams | |
121 | if title.startswith('VOV'): | |
122 | for f in formats: | |
123 | f.update({ | |
124 | 'ext': 'm4a', | |
125 | 'vcodec': 'none', | |
126 | }) | |
127 | ||
128 | thumbnail = self._og_search_thumbnail( | |
129 | webpage, default=None) or unescapeHTML( | |
130 | self._search_regex( | |
131 | r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, | |
132 | 'thumbnail', default=None, group='url')) | |
133 | ||
134 | if is_live: | |
135 | title = self._live_title(title) | |
136 | ||
137 | view_count = int_or_none(self._search_regex( | |
138 | r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>', | |
139 | webpage, 'view count', default=None)) | |
140 | ||
141 | return { | |
142 | 'id': video_id, | |
143 | 'title': title, | |
144 | 'thumbnail': thumbnail, | |
145 | 'is_live': is_live, | |
146 | 'view_count': view_count, | |
147 | 'formats': formats, | |
148 | } |