]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/lci.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / lci.py
1 from .common import InfoExtractor
2
3
4 class LCIIE(InfoExtractor):
5 _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
6 _TESTS = [{
7 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
8 'info_dict': {
9 'id': '13875948',
10 'ext': 'mp4',
11 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a',
12 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg',
13 'upload_date': '20220422',
14 'duration': 33,
15 },
16 'params': {
17 'skip_download': True,
18 },
19 }, {
20 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
21 'only_matching': True,
22 }]
23
24 def _real_extract(self, url):
25 video_id = self._match_id(url)
26 webpage = self._download_webpage(url, video_id)
27 wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
28 return self.url_result('wat:' + wat_id, 'Wat', wat_id)