]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/lego.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / lego.py
1 import uuid
2
3 from .common import InfoExtractor
4 from ..networking.exceptions import HTTPError
5 from ..utils import (
6 ExtractorError,
7 int_or_none,
8 join_nonempty,
9 qualities,
10 )
11
12
13 class LEGOIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
15 _TESTS = [{
16 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
17 'md5': 'f34468f176cfd76488767fc162c405fa',
18 'info_dict': {
19 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
20 'ext': 'mp4',
21 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
22 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
23 },
24 }, {
25 # geo-restricted but the contentUrl contain a valid url
26 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
27 'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
28 'info_dict': {
29 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
30 'ext': 'mp4',
31 'title': 'Aflevering 20: Helden van het koninkrijk',
32 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
33 'age_limit': 5,
34 },
35 }, {
36 # with subtitle
37 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
38 'info_dict': {
39 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
40 'ext': 'mp4',
41 'title': 'De kleine puppy',
42 'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
43 'age_limit': 1,
44 'subtitles': {
45 'nl': [{
46 'ext': 'srt',
47 'url': r're:^https://.+\.srt$',
48 }],
49 },
50 },
51 'params': {
52 'skip_download': True,
53 },
54 }]
55 _QUALITIES = {
56 'Lowest': (64, 180, 320),
57 'Low': (64, 270, 480),
58 'Medium': (96, 360, 640),
59 'High': (128, 540, 960),
60 'Highest': (128, 720, 1280),
61 }
62
63 def _real_extract(self, url):
64 locale, video_id = self._match_valid_url(url).groups()
65 countries = [locale.split('-')[1].upper()]
66 self._initialize_geo_bypass({
67 'countries': countries,
68 })
69
70 try:
71 item = self._download_json(
72 # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
73 'https://services.slingshot.lego.com/mediaplayer/v2',
74 video_id, query={
75 'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
76 }, headers=self.geo_verification_headers())
77 except ExtractorError as e:
78 if isinstance(e.cause, HTTPError) and e.cause.status == 451:
79 self.raise_geo_restricted(countries=countries)
80 raise
81
82 video = item['Video']
83 video_id = video['Id']
84 title = video['Title']
85
86 q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
87 formats = []
88 for video_source in item.get('VideoFormats', []):
89 video_source_url = video_source.get('Url')
90 if not video_source_url:
91 continue
92 video_source_format = video_source.get('Format')
93 if video_source_format == 'F4M':
94 formats.extend(self._extract_f4m_formats(
95 video_source_url, video_id,
96 f4m_id=video_source_format, fatal=False))
97 elif video_source_format == 'M3U8':
98 formats.extend(self._extract_m3u8_formats(
99 video_source_url, video_id, 'mp4', 'm3u8_native',
100 m3u8_id=video_source_format, fatal=False))
101 else:
102 video_source_quality = video_source.get('Quality')
103 f = {
104 'format_id': join_nonempty(video_source_format, video_source_quality),
105 'quality': q(video_source_quality),
106 'url': video_source_url,
107 }
108 quality = self._QUALITIES.get(video_source_quality)
109 if quality:
110 f.update({
111 'abr': quality[0],
112 'height': quality[1],
113 'width': quality[2],
114 }),
115 formats.append(f)
116
117 subtitles = {}
118 sub_file_id = video.get('SubFileId')
119 if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
120 net_storage_path = video.get('NetstoragePath')
121 invariant_id = video.get('InvariantId')
122 video_file_id = video.get('VideoFileId')
123 video_version = video.get('VideoVersion')
124 if net_storage_path and invariant_id and video_file_id and video_version:
125 subtitles.setdefault(locale[:2], []).append({
126 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
127 })
128
129 return {
130 'id': video_id,
131 'title': title,
132 'description': video.get('Description'),
133 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
134 'duration': int_or_none(video.get('Length')),
135 'formats': formats,
136 'subtitles': subtitles,
137 'age_limit': int_or_none(video.get('AgeFrom')),
138 'season': video.get('SeasonTitle'),
139 'season_number': int_or_none(video.get('Season')) or None,
140 'episode_number': int_or_none(video.get('Episode')) or None,
141 }