]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/cartoonnetwork.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / cartoonnetwork.py
1 from .turner import TurnerBaseIE
2 from ..utils import int_or_none
3
4
5 class CartoonNetworkIE(TurnerBaseIE):
6 _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
7 _TEST = {
8 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
9 'info_dict': {
10 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
11 'ext': 'mp4',
12 'title': 'How to Draw Upgrade',
13 'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
14 },
15 'params': {
16 # m3u8 download
17 'skip_download': True,
18 },
19 }
20
21 def _real_extract(self, url):
22 display_id = self._match_id(url)
23 webpage = self._download_webpage(url, display_id)
24
25 def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
26 metadata_re = ''
27 if content_re:
28 metadata_re = r'|video_metadata\.content_' + content_re
29 return self._search_regex(
30 rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
31 webpage, name, fatal=fatal)
32
33 media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
34 title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
35
36 info = self._extract_ngtv_info(
37 media_id, {'networkId': 'cartoonnetwork'}, {
38 'url': url,
39 'site_name': 'CartoonNetwork',
40 'auth_required': find_field('authType', 'auth type') != 'unauth',
41 })
42
43 series = find_field(
44 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
45 info.update({
46 'id': media_id,
47 'display_id': display_id,
48 'title': title,
49 'description': self._html_search_meta('description', webpage),
50 'series': series,
51 'episode': title,
52 })
53
54 for field in ('season', 'episode'):
55 field_name = field + 'Number'
56 info[field + '_number'] = int_or_none(find_field(
57 field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
58
59 return info