]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/fivetv.py
[cleanup] Use `_html_extract_title`
[yt-dlp.git] / yt_dlp / extractor / fivetv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4
5 from .common import InfoExtractor
6 from ..utils import int_or_none
7
8
9 class FiveTVIE(InfoExtractor):
10 _VALID_URL = r'''(?x)
11 https?://
12 (?:www\.)?5-tv\.ru/
13 (?:
14 (?:[^/]+/)+(?P<id>\d+)|
15 (?P<path>[^/?#]+)(?:[/?#])?
16 )
17 '''
18
19 _TESTS = [{
20 'url': 'http://5-tv.ru/news/96814/',
21 'md5': 'bbff554ad415ecf5416a2f48c22d9283',
22 'info_dict': {
23 'id': '96814',
24 'ext': 'mp4',
25 'title': 'Россияне выбрали имя для общенациональной платежной системы',
26 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
27 'thumbnail': r're:^https?://.*\.jpg$',
28 'duration': 180,
29 },
30 }, {
31 'url': 'http://5-tv.ru/video/1021729/',
32 'info_dict': {
33 'id': '1021729',
34 'ext': 'mp4',
35 'title': '3D принтер',
36 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
37 'thumbnail': r're:^https?://.*\.jpg$',
38 'duration': 180,
39 },
40 }, {
41 # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
42 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
43 'info_dict': {
44 'id': 'glavnoe',
45 'ext': 'mp4',
46 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
47 'thumbnail': r're:^https?://.*\.jpg$',
48 },
49 'skip': 'redirect to «Известия. Главное» project page',
50 }, {
51 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
52 'only_matching': True,
53 }, {
54 'url': 'http://5-tv.ru/films/1507502/',
55 'only_matching': True,
56 }, {
57 'url': 'http://5-tv.ru/programs/broadcast/508713/',
58 'only_matching': True,
59 }, {
60 'url': 'http://5-tv.ru/angel/',
61 'only_matching': True,
62 }, {
63 'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
64 'only_matching': True,
65 }]
66
67 def _real_extract(self, url):
68 mobj = self._match_valid_url(url)
69 video_id = mobj.group('id') or mobj.group('path')
70
71 webpage = self._download_webpage(url, video_id)
72
73 video_url = self._search_regex(
74 [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
75 r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
76 webpage, 'video url')
77
78 title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
79 duration = int_or_none(self._og_search_property(
80 'video:duration', webpage, 'duration', default=None))
81
82 return {
83 'id': video_id,
84 'url': video_url,
85 'title': title,
86 'description': self._og_search_description(webpage, default=None),
87 'thumbnail': self._og_search_thumbnail(webpage, default=None),
88 'duration': duration,
89 }