]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/lefigaro.py
[extractor/youtube] Misc cleanup
[yt-dlp.git] / yt_dlp / extractor / lefigaro.py
CommitLineData
eb8fd6d0
E
1import json
2import math
3
4from .common import InfoExtractor
5from ..utils import (
6 InAdvancePagedList,
7 traverse_obj,
8)
9
10
11class LeFigaroVideoEmbedIE(InfoExtractor):
12 _VALID_URL = r'https?://video\.lefigaro\.fr/embed/[^?#]+/(?P<id>[\w-]+)'
13
14 _TESTS = [{
15 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
16 'md5': 'e94de44cd80818084352fcf8de1ce82c',
17 'info_dict': {
18 'id': 'g9j7Eovo',
19 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
20 'description': 'md5:862b8813148ba4bf10763a65a69dfe41',
21 'upload_date': '20230216',
22 'timestamp': 1676581615,
23 'duration': 3076,
24 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
25 'ext': 'mp4',
26 },
27 }, {
28 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
29 'md5': '0b3f10332b812034b3a3eda1ef877c5f',
30 'info_dict': {
31 'id': 'LeAgybyc',
32 'title': 'Intelligence artificielle : faut-il s’en méfier ?',
33 'description': 'md5:249d136e3e5934a67c8cb704f8abf4d2',
34 'upload_date': '20230124',
35 'timestamp': 1674584477,
36 'duration': 860,
37 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
38 'ext': 'mp4',
39 },
40 }]
41
42 _WEBPAGE_TESTS = [{
43 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
44 'md5': '3972ddf2d5f8b98699f191687258e2f9',
45 'info_dict': {
46 'id': 'QChnbPYA',
47 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
48 'description': 'md5:6f47235b7e7c93b366fd8ebfa10572ac',
49 'upload_date': '20230123',
50 'timestamp': 1674503575,
51 'duration': 3153,
52 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
53 'age_limit': 0,
54 'ext': 'mp4',
55 },
56 }, {
57 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
58 'md5': '3ac0a0769546ee6be41ab52caea5d9a9',
59 'info_dict': {
60 'id': 'QJzqoNbf',
61 'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live',
62 'description': 'md5:c586793bb72e726c83aa257f99a8c8c4',
63 'upload_date': '20230217',
64 'timestamp': 1676661986,
65 'duration': 1558,
66 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
67 'age_limit': 0,
68 'ext': 'mp4',
69 },
70 }]
71
72 def _real_extract(self, url):
73 display_id = self._match_id(url)
74 webpage = self._download_webpage(url, display_id)
75
76 player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData']
77
78 return self.url_result(
79 f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
80 description=player_data.get('description'), thumbnail=player_data.get('poster'))
81
82
83class LeFigaroVideoSectionIE(InfoExtractor):
84 _VALID_URL = r'https?://video\.lefigaro\.fr/figaro/(?P<id>[\w-]+)/?(?:[#?]|$)'
85
86 _TESTS = [{
87 'url': 'https://video.lefigaro.fr/figaro/le-club-le-figaro-idees/',
88 'info_dict': {
89 'id': 'le-club-le-figaro-idees',
90 'title': 'Le Club Le Figaro Idées',
91 },
92 'playlist_mincount': 14,
93 }, {
94 'url': 'https://video.lefigaro.fr/figaro/factu/',
95 'info_dict': {
96 'id': 'factu',
97 'title': 'Factu',
98 },
99 'playlist_mincount': 519,
100 }]
101
102 _PAGE_SIZE = 20
103
104 def _get_api_response(self, display_id, page_num, note=None):
105 return self._download_json(
106 'https://api-graphql.lefigaro.fr/graphql', display_id, note=note,
107 query={
108 'id': 'flive-website_UpdateListPage_1fb260f996bca2d78960805ac382544186b3225f5bedb43ad08b9b8abef79af6',
109 'variables': json.dumps({
110 'slug': display_id,
111 'videosLimit': self._PAGE_SIZE,
112 'sort': 'DESC',
113 'order': 'PUBLISHED_AT',
114 'page': page_num,
115 }).encode(),
116 })
117
118 def _real_extract(self, url):
119 display_id = self._match_id(url)
120 initial_response = self._get_api_response(display_id, page_num=1)['data']['playlist']
121
122 def page_func(page_num):
123 api_response = self._get_api_response(display_id, page_num + 1, note=f'Downloading page {page_num + 1}')
124
125 return [self.url_result(
126 video['embedUrl'], LeFigaroVideoEmbedIE, **traverse_obj(video, {
127 'title': 'name',
128 'description': 'description',
129 'thumbnail': 'thumbnailUrl',
130 })) for video in api_response['data']['playlist']['jsonLd'][0]['itemListElement']]
131
132 entries = InAdvancePagedList(
133 page_func, math.ceil(initial_response['videoCount'] / self._PAGE_SIZE), self._PAGE_SIZE)
134
135 return self.playlist_result(entries, playlist_id=display_id, playlist_title=initial_response.get('title'))