yt_dlp/extractor/cartoonnetwork.py

   1 from .turner import TurnerBaseIE
   2 from ..utils import int_or_none
   3
   4
   5 class CartoonNetworkIE(TurnerBaseIE):
   6     _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
   7     _TEST = {
   8         'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
   9         'info_dict': {
  10             'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
  11             'ext': 'mp4',
  12             'title': 'How to Draw Upgrade',
  13             'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
  14         },
  15         'params': {
  16             # m3u8 download
  17             'skip_download': True,
  18         },
  19     }
  20
  21     def _real_extract(self, url):
  22         display_id = self._match_id(url)
  23         webpage = self._download_webpage(url, display_id)
  24
  25         def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
  26             metadata_re = ''
  27             if content_re:
  28                 metadata_re = r'|video_metadata\.content_' + content_re
  29             return self._search_regex(
  30                 rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
  31                 webpage, name, fatal=fatal)
  32
  33         media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
  34         title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
  35
  36         info = self._extract_ngtv_info(
  37             media_id, {'networkId': 'cartoonnetwork'}, {
  38                 'url': url,
  39                 'site_name': 'CartoonNetwork',
  40                 'auth_required': find_field('authType', 'auth type') != 'unauth',
  41             })
  42
  43         series = find_field(
  44             'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
  45         info.update({
  46             'id': media_id,
  47             'display_id': display_id,
  48             'title': title,
  49             'description': self._html_search_meta('description', webpage),
  50             'series': series,
  51             'episode': title,
  52         })
  53
  54         for field in ('season', 'episode'):
  55             field_name = field + 'Number'
  56             info[field + '_number'] = int_or_none(find_field(
  57                 field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
  58
  59         return info