yt_dlp/extractor/freetv.py

   1 import itertools
   2 import re
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7     traverse_obj,
   8     urlencode_postdata,
   9 )
  10
  11
  12 class FreeTvBaseIE(InfoExtractor):
  13     def _get_api_response(self, content_id, resource_type, postdata):
  14         return self._download_json(
  15             'https://www.freetv.com/wordpress/wp-admin/admin-ajax.php',
  16             content_id, data=urlencode_postdata(postdata),
  17             note=f'Downloading {content_id} {resource_type} JSON')['data']
  18
  19
  20 class FreeTvMoviesIE(FreeTvBaseIE):
  21     _VALID_URL = r'https?://(?:www\.)?freetv\.com/peliculas/(?P<id>[^/]+)'
  22     _TESTS = [{
  23         'url': 'https://www.freetv.com/peliculas/atrapame-si-puedes/',
  24         'md5': 'dc62d5abf0514726640077cd1591aa92',
  25         'info_dict': {
  26             'id': '428021',
  27             'title': 'Atrápame Si Puedes',
  28             'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982',
  29             'ext': 'mp4',
  30         }
  31     }, {
  32         'url': 'https://www.freetv.com/peliculas/monstruoso/',
  33         'md5': '509c15c68de41cb708d1f92d071f20aa',
  34         'info_dict': {
  35             'id': '377652',
  36             'title': 'Monstruoso',
  37             'description': 'md5:333fc19ee327b457b980e54a911ea4a3',
  38             'ext': 'mp4',
  39         }
  40     }]
  41
  42     def _extract_video(self, content_id, action='olyott_video_play'):
  43         api_response = self._get_api_response(content_id, 'video', {
  44             'action': action,
  45             'contentID': content_id,
  46         })
  47
  48         video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo']
  49         formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4')
  50         self._sort_formats(formats)
  51
  52         return {
  53             'id': video_id,
  54             'title': traverse_obj(api_response, ('displayMeta', 'title')),
  55             'description': traverse_obj(api_response, ('displayMeta', 'desc')),
  56             'formats': formats,
  57             'subtitles': subtitles,
  58         }
  59
  60     def _real_extract(self, url):
  61         display_id = self._match_id(url)
  62         webpage = self._download_webpage(url, display_id)
  63
  64         return self._extract_video(
  65             self._search_regex((
  66                 r'class=["\'][^>]+postid-(?P<video_id>\d+)',
  67                 r'<link[^>]+freetv.com/\?p=(?P<video_id>\d+)',
  68                 r'<div[^>]+data-params=["\'][^>]+post_id=(?P<video_id>\d+)',
  69             ), webpage, 'video id', group='video_id'))
  70
  71
  72 class FreeTvIE(FreeTvBaseIE):
  73     IE_NAME = 'freetv:series'
  74     _VALID_URL = r'https?://(?:www\.)?freetv\.com/series/(?P<id>[^/]+)'
  75     _TESTS = [{
  76         'url': 'https://www.freetv.com/series/el-detective-l/',
  77         'info_dict': {
  78             'id': 'el-detective-l',
  79             'title': 'El Detective L',
  80             'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be'
  81         },
  82         'playlist_count': 24,
  83     }, {
  84         'url': 'https://www.freetv.com/series/esmeraldas/',
  85         'info_dict': {
  86             'id': 'esmeraldas',
  87             'title': 'Esmeraldas',
  88             'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf'
  89         },
  90         'playlist_count': 62,
  91     }, {
  92         'url': 'https://www.freetv.com/series/las-aventuras-de-leonardo/',
  93         'info_dict': {
  94             'id': 'las-aventuras-de-leonardo',
  95             'title': 'Las Aventuras de Leonardo',
  96             'description': 'md5:0c47130846c141120a382aca059288f6'
  97         },
  98         'playlist_count': 13,
  99     },
 100     ]
 101
 102     def _extract_series_season(self, season_id, series_title):
 103         episodes = self._get_api_response(season_id, 'series', {
 104             'contentID': season_id,
 105             'action': 'olyott_get_dynamic_series_content',
 106             'type': 'list',
 107             'perPage': '1000',
 108         })['1']
 109
 110         for episode in episodes:
 111             video_id = str(episode['contentID'])
 112             formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4')
 113             self._sort_formats(formats)
 114
 115             yield {
 116                 'id': video_id,
 117                 'title': episode.get('fullTitle'),
 118                 'description': episode.get('description'),
 119                 'formats': formats,
 120                 'subtitles': subtitles,
 121                 'thumbnail': episode.get('thumbnail'),
 122                 'series': series_title,
 123                 'series_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seriesID')),
 124                 'season_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seasonID')),
 125                 'season_number': traverse_obj(
 126                     episode, ('contentMeta', 'displayMeta', 'seasonNum'), expected_type=int_or_none),
 127                 'episode_number': traverse_obj(
 128                     episode, ('contentMeta', 'displayMeta', 'episodeNum'), expected_type=int_or_none),
 129             }
 130
 131     def _real_extract(self, url):
 132         display_id = self._match_id(url)
 133         webpage = self._download_webpage(url, display_id)
 134
 135         title = self._html_search_regex(
 136             r'<h1[^>]+class=["\']synopis[^>]>(?P<title>[^<]+)', webpage, 'title', group='title', fatal=False)
 137         description = self._html_search_regex(
 138             r'<div[^>]+class=["\']+synopis content[^>]><p>(?P<description>[^<]+)',
 139             webpage, 'description', group='description', fatal=False)
 140
 141         return self.playlist_result(
 142             itertools.chain.from_iterable(
 143                 self._extract_series_season(season_id, title)
 144                 for season_id in re.findall(r'<option[^>]+value=["\'](\d+)["\']', webpage)),
 145             display_id, title, description)