]>
Commit | Line | Data |
---|---|---|
659aa21b | 1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
75427031 | 7 | from ..compat import compat_urlparse |
bcb891e8 | 8 | from ..utils import ( |
75427031 | 9 | determine_ext, |
bcb891e8 | 10 | int_or_none, |
fc26f3b4 S |
11 | unified_strdate, |
12 | ExtractorError, | |
bcb891e8 | 13 | ) |
659aa21b | 14 | |
15 | ||
16 | class LifeNewsIE(InfoExtractor): | |
17 | IE_NAME = 'lifenews' | |
18 | IE_DESC = 'LIFE | NEWS' | |
48006517 | 19 | _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' |
bcb891e8 | 20 | |
848edeab | 21 | _TESTS = [{ |
659aa21b | 22 | 'url': 'http://lifenews.ru/news/126342', |
659aa21b | 23 | 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a', |
24 | 'info_dict': { | |
bcb891e8 S |
25 | 'id': '126342', |
26 | 'ext': 'mp4', | |
6d845922 | 27 | 'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом', |
659aa21b | 28 | 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.', |
896bf553 | 29 | 'thumbnail': 're:http://.*\.jpg', |
659aa21b | 30 | 'upload_date': '20140130', |
31 | } | |
848edeab YCH |
32 | }, { |
33 | # video in <iframe> | |
34 | 'url': 'http://lifenews.ru/news/152125', | |
35 | 'md5': '77d19a6f0886cd76bdbf44b4d971a273', | |
36 | 'info_dict': { | |
37 | 'id': '152125', | |
38 | 'ext': 'mp4', | |
39 | 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', | |
40 | 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', | |
41 | 'upload_date': '20150402', | |
42 | 'uploader': 'embed.life.ru', | |
43 | } | |
07d2921c YCH |
44 | }, { |
45 | 'url': 'http://lifenews.ru/news/153461', | |
46 | 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795', | |
47 | 'info_dict': { | |
48 | 'id': '153461', | |
49 | 'ext': 'mp4', | |
50 | 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', | |
51 | 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', | |
52 | 'upload_date': '20150505', | |
53 | 'uploader': 'embed.life.ru', | |
54 | } | |
057ebeac S |
55 | }, { |
56 | 'url': 'http://lifenews.ru/video/13035', | |
57 | 'only_matching': True, | |
848edeab | 58 | }] |
659aa21b | 59 | |
60 | def _real_extract(self, url): | |
61 | mobj = re.match(self._VALID_URL, url) | |
62 | video_id = mobj.group('id') | |
48006517 | 63 | section = mobj.group('section') |
659aa21b | 64 | |
48006517 S |
65 | webpage = self._download_webpage( |
66 | 'http://lifenews.ru/%s/%s' % (section, video_id), | |
67 | video_id, 'Downloading page') | |
659aa21b | 68 | |
fc26f3b4 | 69 | videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) |
848edeab | 70 | iframe_link = self._html_search_regex( |
48006517 | 71 | '<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None) |
848edeab | 72 | if not videos and not iframe_link: |
fc26f3b4 | 73 | raise ExtractorError('No media links available for %s' % video_id) |
659aa21b | 74 | |
75 | title = self._og_search_title(webpage) | |
76 | TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' | |
77 | if title.endswith(TITLE_SUFFIX): | |
78 | title = title[:-len(TITLE_SUFFIX)] | |
79 | ||
80 | description = self._og_search_description(webpage) | |
81 | ||
82 | view_count = self._html_search_regex( | |
1748d67a | 83 | r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False) |
659aa21b | 84 | comment_count = self._html_search_regex( |
028a33d7 S |
85 | r'=\'commentCount\'[^>]*>\s*(\d+)\s*<', |
86 | webpage, 'comment count', fatal=False) | |
659aa21b | 87 | |
88 | upload_date = self._html_search_regex( | |
028a33d7 | 89 | r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False) |
bcb891e8 S |
90 | if upload_date is not None: |
91 | upload_date = unified_strdate(upload_date) | |
659aa21b | 92 | |
848edeab YCH |
93 | common_info = { |
94 | 'description': description, | |
95 | 'view_count': int_or_none(view_count), | |
96 | 'comment_count': int_or_none(comment_count), | |
97 | 'upload_date': upload_date, | |
98 | } | |
99 | ||
fc26f3b4 | 100 | def make_entry(video_id, media, video_number=None): |
848edeab YCH |
101 | cur_info = dict(common_info) |
102 | cur_info.update({ | |
fc26f3b4 S |
103 | 'id': video_id, |
104 | 'url': media[1], | |
105 | 'thumbnail': media[0], | |
106 | 'title': title if video_number is None else '%s-video%s' % (title, video_number), | |
848edeab YCH |
107 | }) |
108 | return cur_info | |
109 | ||
110 | if iframe_link: | |
b326b07a | 111 | iframe_link = self._proto_relative_url(iframe_link, 'http:') |
848edeab YCH |
112 | cur_info = dict(common_info) |
113 | cur_info.update({ | |
114 | '_type': 'url_transparent', | |
115 | 'id': video_id, | |
116 | 'title': title, | |
117 | 'url': iframe_link, | |
118 | }) | |
119 | return cur_info | |
fc26f3b4 S |
120 | |
121 | if len(videos) == 1: | |
122 | return make_entry(video_id, videos[0]) | |
123 | else: | |
2514d263 | 124 | return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)] |
75427031 S |
125 | |
126 | ||
127 | class LifeEmbedIE(InfoExtractor): | |
128 | IE_NAME = 'life:embed' | |
129 | _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})' | |
130 | ||
131 | _TEST = { | |
132 | 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', | |
133 | 'md5': 'b889715c9e49cb1981281d0e5458fbbe', | |
134 | 'info_dict': { | |
135 | 'id': 'e50c2dec2867350528e2574c899b8291', | |
136 | 'ext': 'mp4', | |
137 | 'title': 'e50c2dec2867350528e2574c899b8291', | |
138 | 'thumbnail': 're:http://.*\.jpg', | |
139 | } | |
140 | } | |
141 | ||
142 | def _real_extract(self, url): | |
143 | video_id = self._match_id(url) | |
144 | ||
145 | webpage = self._download_webpage(url, video_id) | |
146 | ||
147 | formats = [] | |
148 | for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): | |
149 | video_url = compat_urlparse.urljoin(url, video_url) | |
150 | ext = determine_ext(video_url) | |
151 | if ext == 'm3u8': | |
152 | formats.extend(self._extract_m3u8_formats( | |
153 | video_url, video_id, 'mp4', m3u8_id='m3u8')) | |
154 | else: | |
155 | formats.append({ | |
156 | 'url': video_url, | |
157 | 'format_id': ext, | |
158 | 'preference': 1, | |
159 | }) | |
95eb1add | 160 | self._sort_formats(formats) |
75427031 S |
161 | |
162 | thumbnail = self._search_regex( | |
163 | r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) | |
164 | ||
165 | return { | |
166 | 'id': video_id, | |
167 | 'title': video_id, | |
168 | 'thumbnail': thumbnail, | |
169 | 'formats': formats, | |
170 | } |