]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/alsace20tv.py
Don't download entire video when no matching `--download-sections`
[yt-dlp.git] / yt_dlp / extractor / alsace20tv.py
1 from .common import InfoExtractor
2 from ..utils import (
3 clean_html,
4 dict_get,
5 get_element_by_class,
6 int_or_none,
7 unified_strdate,
8 url_or_none,
9 )
10
11
12 class Alsace20TVBaseIE(InfoExtractor):
13 def _extract_video(self, video_id, url=None):
14 info = self._download_json(
15 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
16 video_id) or {}
17 title = info.get('titre')
18
19 formats = []
20 for res, fmt_url in (info.get('files') or {}).items():
21 formats.extend(
22 self._extract_smil_formats(fmt_url, video_id, fatal=False)
23 if '/smil:_' in fmt_url
24 else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
25 self._sort_formats(formats)
26
27 webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
28 thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
29 upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
30 upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
31 return {
32 'id': video_id,
33 'title': title,
34 'formats': formats,
35 'description': clean_html(get_element_by_class('wysiwyg', webpage)),
36 'upload_date': upload_date,
37 'thumbnail': thumbnail,
38 'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
39 'view_count': int_or_none(info.get('nb_vues')),
40 }
41
42
43 class Alsace20TVIE(Alsace20TVBaseIE):
44 _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
45 _TESTS = [{
46 'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
47 'info_dict': {
48 'id': 'lyNHCXpYJh',
49 'ext': 'mp4',
50 'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
51 'title': 'Votre JT du jeudi 3 février',
52 'upload_date': '20220203',
53 'thumbnail': r're:https?://.+\.jpg',
54 'duration': 1073,
55 'view_count': int,
56 },
57 }]
58
59 def _real_extract(self, url):
60 video_id = self._match_id(url)
61 return self._extract_video(video_id, url)
62
63
64 class Alsace20TVEmbedIE(Alsace20TVBaseIE):
65 _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
66 _TESTS = [{
67 'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
68 # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
69 'info_dict': {
70 'id': 'lyNHCXpYJh',
71 'ext': 'mp4',
72 'title': 'Votre JT du jeudi 3 février',
73 'upload_date': '20220203',
74 'thumbnail': r're:https?://.+\.jpg',
75 'view_count': int,
76 },
77 'params': {
78 'format': 'bestvideo',
79 },
80 }]
81
82 def _real_extract(self, url):
83 video_id = self._match_id(url)
84 return self._extract_video(video_id)