]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/imdb.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / imdb.py
CommitLineData
5d9f6cbc
S
1import base64
2import json
d8d61486 3import re
d8d61486
JMF
4
5from .common import InfoExtractor
dd67702a 6from ..utils import (
7dd6ab4a 7 determine_ext,
18eac302 8 int_or_none,
96c2e3e9 9 mimetype2ext,
dd67702a 10 qualities,
18eac302 11 traverse_obj,
5d9f6cbc 12 try_get,
3052a30d 13 url_or_none,
d8d61486
JMF
14)
15
16
17class ImdbIE(InfoExtractor):
ecfef3e5
PH
18 IE_NAME = 'imdb'
19 IE_DESC = 'Internet Movie Database trailers'
5d9f6cbc 20 _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
d8d61486 21
f196508f 22 _TESTS = [{
ecfef3e5 23 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
ecfef3e5
PH
24 'info_dict': {
25 'id': '2524815897',
26 'ext': 'mp4',
5d9f6cbc 27 'title': 'No. 2',
0167f0db 28 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
5d9f6cbc 29 'duration': 152,
18eac302 30 'thumbnail': r're:^https?://.+\.jpg',
add96eb9 31 },
18eac302
HTL
32 }, {
33 'url': 'https://www.imdb.com/video/vi3516832537',
34 'info_dict': {
35 'id': '3516832537',
36 'ext': 'mp4',
37 'title': 'Paul: U.S. Trailer #1',
38 'description': 'md5:17fcc4fe11ec29b4399be9d4c5ef126c',
39 'duration': 153,
40 'thumbnail': r're:^https?://.+\.jpg',
add96eb9 41 },
f196508f
S
42 }, {
43 'url': 'http://www.imdb.com/video/_/vi2524815897',
44 'only_matching': True,
4c93ee8d
S
45 }, {
46 'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
47 'only_matching': True,
48 }, {
49 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
50 'only_matching': True,
13607896
S
51 }, {
52 'url': 'http://www.imdb.com/videoplayer/vi1562949145',
53 'only_matching': True,
b8457665
S
54 }, {
55 'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
56 'only_matching': True,
0167f0db
RA
57 }, {
58 'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
59 'only_matching': True,
f196508f 60 }]
d8d61486
JMF
61
62 def _real_extract(self, url):
11fba175 63 video_id = self._match_id(url)
18eac302
HTL
64 webpage = self._download_webpage(f'https://www.imdb.com/video/vi{video_id}', video_id)
65 info = self._search_nextjs_data(webpage, video_id)
66 video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={})
67 title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text'))
68 or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None)
04f3fd2c 69 or self._html_extract_title(webpage))
18eac302 70 data = video_info.get('playbackURLs') or try_get(self._download_json(
5d9f6cbc
S
71 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
72 query={
73 'key': base64.b64encode(json.dumps({
74 'type': 'VIDEO_PLAYER',
75 'subType': 'FORCE_LEGACY',
add96eb9 76 'id': f'vi{video_id}',
5d9f6cbc 77 }).encode()).decode(),
18eac302 78 }), lambda x: x[0]['videoLegacyEncodings'])
f7f2e53a 79 quality = qualities(('SD', '480p', '720p', '1080p'))
18eac302
HTL
80 formats, subtitles = [], {}
81 for encoding in data:
0167f0db 82 if not encoding or not isinstance(encoding, dict):
96c2e3e9 83 continue
5d9f6cbc 84 video_url = url_or_none(encoding.get('url'))
3052a30d 85 if not video_url:
96c2e3e9 86 continue
a62460aa
S
87 ext = mimetype2ext(encoding.get(
88 'mimeType')) or determine_ext(video_url)
0167f0db 89 if ext == 'm3u8':
18eac302 90 fmts, subs = self._extract_m3u8_formats_and_subtitles(
0167f0db 91 video_url, video_id, 'mp4', entry_protocol='m3u8_native',
18eac302
HTL
92 preference=1, m3u8_id='hls', fatal=False)
93 subtitles = self._merge_subtitles(subtitles, subs)
94 formats.extend(fmts)
96c2e3e9 95 continue
18eac302 96 format_id = traverse_obj(encoding, ('displayName', 'value'), 'definition')
0167f0db
RA
97 formats.append({
98 'format_id': format_id,
99 'url': video_url,
100 'ext': ext,
101 'quality': quality(format_id),
102 })
d8d61486
JMF
103
104 return {
105 'id': video_id,
0167f0db 106 'title': title,
5d9f6cbc 107 'alt_title': info.get('videoSubTitle'),
d8d61486 108 'formats': formats,
18eac302
HTL
109 'description': try_get(video_info, lambda x: x['description']['value']),
110 'thumbnail': url_or_none(try_get(video_info, lambda x: x['thumbnail']['url'])),
111 'duration': int_or_none(try_get(video_info, lambda x: x['runtime']['value'])),
112 'subtitles': subtitles,
d8d61486 113 }
c645c765 114
ecfef3e5 115
c645c765 116class ImdbListIE(InfoExtractor):
ecfef3e5
PH
117 IE_NAME = 'imdb:list'
118 IE_DESC = 'Internet Movie Database lists'
27694fe7 119 _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
22a6f150 120 _TEST = {
0167f0db 121 'url': 'https://www.imdb.com/list/ls009921623/',
22a6f150 122 'info_dict': {
0167f0db
RA
123 'id': '009921623',
124 'title': 'The Bourne Legacy',
125 'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
22a6f150 126 },
0167f0db 127 'playlist_count': 8,
22a6f150 128 }
5f6a1245 129
c645c765 130 def _real_extract(self, url):
11fba175 131 list_id = self._match_id(url)
d7b51547 132 webpage = self._download_webpage(url, list_id)
d7b51547
PH
133 entries = [
134 self.url_result('http://www.imdb.com' + m, 'Imdb')
add96eb9 135 for m in re.findall(rf'href="(/list/ls{list_id}/videoplayer/vi[^"]+)"', webpage)]
d7b51547
PH
136
137 list_title = self._html_search_regex(
0167f0db
RA
138 r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
139 webpage, 'list title')
140 list_description = self._html_search_regex(
141 r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
142 webpage, 'list description')
d7b51547 143
0167f0db 144 return self.playlist_result(entries, list_id, list_title, list_description)