]> jfr.im git - yt-dlp.git/commitdiff
Update to ytdl-commit-3be0980
authorpukkandan <redacted>
Sun, 14 Mar 2021 23:22:06 +0000 (04:52 +0530)
committerpukkandan <redacted>
Sun, 14 Mar 2021 23:22:06 +0000 (04:52 +0530)
https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea

17 files changed:
yt_dlp/YoutubeDL.py
yt_dlp/extractor/applepodcasts.py
yt_dlp/extractor/bandcamp.py
yt_dlp/extractor/bilibili.py
yt_dlp/extractor/cbs.py
yt_dlp/extractor/common.py
yt_dlp/extractor/fujitv.py
yt_dlp/extractor/lbry.py
yt_dlp/extractor/peertube.py
yt_dlp/extractor/pinterest.py
yt_dlp/extractor/pornhub.py
yt_dlp/extractor/rtve.py
yt_dlp/extractor/shahid.py
yt_dlp/extractor/southpark.py
yt_dlp/extractor/sportdeutschland.py
yt_dlp/extractor/tver.py
yt_dlp/extractor/voxmedia.py

index 397d405033725960487777279c57d40ee76d9692..0979252c987c24e9283dae6c2416b911e0cd5a7f 100644 (file)
@@ -1794,14 +1794,18 @@ def sanitize_numeric_fields(info):
         if 'display_id' not in info_dict and 'id' in info_dict:
             info_dict['display_id'] = info_dict['id']
 
-        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-            # see http://bugs.python.org/issue1646728)
-            try:
-                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
-                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-            except (ValueError, OverflowError, OSError):
-                pass
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                except (ValueError, OverflowError, OSError):
+                    pass
 
         # Auto generate title fields corresponding to the *_number fields when missing
         # in order to always have clean titles. This is very common for TV series.
index 95758feced6d10844e1dbae28c04ea1fe3624306..6a74de75822eed3ab54f6455824f996d2e1c9ff5 100644 (file)
@@ -42,6 +42,7 @@ def _real_extract(self, url):
         ember_data = self._parse_json(self._search_regex(
             r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
             webpage, 'ember data'), episode_id)
+        ember_data = ember_data.get(episode_id) or ember_data
         episode = ember_data['data']['attributes']
         description = episode.get('description') or {}
 
index 69e673a260e615cc87e8c95f516dc1630753d204..006aab3b4489fbf6a43cad31ea30b457aeff80d4 100644 (file)
@@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
             'uploader': 'Ben Prunty',
             'timestamp': 1396508491,
             'upload_date': '20140403',
+            'release_timestamp': 1396483200,
             'release_date': '20140403',
             'duration': 260.877,
             'track': 'Lanius (Battle)',
@@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
             'uploader': 'Mastodon',
             'timestamp': 1322005399,
             'upload_date': '20111122',
+            'release_timestamp': 1076112000,
             'release_date': '20040207',
             'duration': 120.79,
             'track': 'Hail to Fire',
@@ -197,7 +199,7 @@ def _real_extract(self, url):
             'thumbnail': thumbnail,
             'uploader': artist,
             'timestamp': timestamp,
-            'release_date': unified_strdate(tralbum.get('album_release_date')),
+            'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
             'duration': duration,
             'track': track,
             'track_number': track_number,
index c3e0a92624a252972bc90e27e5b4664728c62c38..6fcc4ac9323153d533330225cc2a92d48f6a95ec 100644 (file)
@@ -138,11 +138,6 @@ def _real_extract(self, url):
         anime_id = mobj.group('anime_id')
         page_id = mobj.group('page')
         webpage = self._download_webpage(url, video_id)
-        headers = {
-            'Referer': url,
-            'Accept': '*/*'
-        }
-        headers.update(self.geo_verification_headers())
 
         if 'anime/' not in url:
             cid = self._search_regex(
@@ -160,8 +155,12 @@ def _real_extract(self, url):
             if 'no_bangumi_tip' not in smuggled_data:
                 self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
                     video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+            headers = {
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                'Referer': url
+            }
+            headers.update(self.geo_verification_headers())
 
-            headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
             js = self._download_json(
                 'http://bangumi.bilibili.com/web_api/get_source', video_id,
                 data=urlencode_postdata({'episode_id': video_id}),
@@ -170,6 +169,12 @@ def _real_extract(self, url):
                 self._report_error(js)
             cid = js['result']['cid']
 
+        headers = {
+            'Accept': 'application/json',
+            'Referer': url
+        }
+        headers.update(self.geo_verification_headers())
+
         entries = []
 
         RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
index 503d2e6a6ce7a40fa2a292a821eea0db9701c891..38c8bbc80561c5d82b9bfd4dca36ef5fd1326624 100644 (file)
@@ -27,7 +27,7 @@ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
 
 
 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs\.com|paramountplus\.com)/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
+    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
 
     _TESTS = [{
         'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@@ -53,7 +53,7 @@ class CBSIE(CBSBaseIE):
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'only_matching': True,
     }, {
-        'url': 'https://www.paramountplus.com/shows/star-trek-discovery/video/l5ANMH9wM7kxwV1qr4u1xn88XOhYMlZX/star-trek-discovery-the-vulcan-hello/',
+        'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
         'only_matching': True,
     }]
 
index b74a5dc011c6b0011c89934d0b423186cf089112..65fcfcbf59b8a11f7455aaeca60db87ac71548ce 100644 (file)
@@ -231,8 +231,9 @@ class InfoExtractor(object):
     uploader:       Full name of the video uploader.
     license:        License name the video is licensed under.
     creator:        The creator of the video.
+    release_timestamp: UNIX timestamp of the moment the video was released.
     release_date:   The date (YYYYMMDD) when the video was released.
-    timestamp:      UNIX timestamp of the moment the video became available.
+    timestamp:      UNIX timestamp of the moment the video was uploaded
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
index 39685e0754339e8b197d5cf01deee15b58c945d4..a02a943742f7cfd8bfee6a908c39ec0542e0079a 100644 (file)
@@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         formats = self._extract_m3u8_formats(
-            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
+            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
         for f in formats:
             wh = self._BITRATE_MAP.get(f.get('tbr'))
             if wh:
index 051d94873d8c42db643abef5662592a9385a5a52..865cda7618d1d910ef79ce17011e0f3700aa228a 100644 (file)
@@ -6,8 +6,10 @@
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_parse_qs,
     compat_str,
     compat_urllib_parse_unquote,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
     determine_ext,
@@ -62,6 +64,7 @@ def _parse_stream(self, stream, url):
             'description': stream_value.get('description'),
             'license': stream_value.get('license'),
             'timestamp': int_or_none(stream.get('timestamp')),
+            'release_timestamp': int_or_none(stream_value.get('release_time')),
             'tags': stream_value.get('tags'),
             'duration': int_or_none(media.get('duration')),
             'channel': try_get(signing_channel, lambda x: x['value']['title']),
@@ -94,6 +97,8 @@ class LBRYIE(LBRYBaseIE):
             'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
             'timestamp': 1595694354,
             'upload_date': '20200725',
+            'release_timestamp': 1595340697,
+            'release_date': '20200721',
             'width': 1280,
             'height': 720,
         }
@@ -108,6 +113,8 @@ class LBRYIE(LBRYBaseIE):
             'description': 'md5:661ac4f1db09f31728931d7b88807a61',
             'timestamp': 1591312601,
             'upload_date': '20200604',
+            'release_timestamp': 1591312421,
+            'release_date': '20200604',
             'tags': list,
             'duration': 2570,
             'channel': 'The LBRY Foundation',
@@ -189,17 +196,18 @@ class LBRYChannelIE(LBRYBaseIE):
     }]
     _PAGE_SIZE = 50
 
-    def _fetch_page(self, claim_id, url, page):
+    def _fetch_page(self, claim_id, url, params, page):
         page += 1
+        page_params = {
+            'channel_ids': [claim_id],
+            'claim_type': 'stream',
+            'no_totals': True,
+            'page': page,
+            'page_size': self._PAGE_SIZE,
+        }
+        page_params.update(params)
         result = self._call_api_proxy(
-            'claim_search', claim_id, {
-                'channel_ids': [claim_id],
-                'claim_type': 'stream',
-                'no_totals': True,
-                'page': page,
-                'page_size': self._PAGE_SIZE,
-                'stream_types': self._SUPPORTED_STREAM_TYPES,
-            }, 'page %d' % page)
+            'claim_search', claim_id, page_params, 'page %d' % page)
         for item in (result.get('items') or []):
             stream_claim_name = item.get('name')
             stream_claim_id = item.get('claim_id')
@@ -220,8 +228,31 @@ def _real_extract(self, url):
         result = self._resolve_url(
             'lbry://' + display_id, display_id, 'channel')
         claim_id = result['claim_id']
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        content = qs.get('content', [None])[0]
+        params = {
+            'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+            'order_by': {
+                'new': ['release_time'],
+                'top': ['effective_amount'],
+                'trending': ['trending_group', 'trending_mixed'],
+            }[qs.get('order', ['new'])[0]],
+            'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+        }
+        duration = qs.get('duration', [None])[0]
+        if duration:
+            params['duration'] = {
+                'long': '>=1200',
+                'short': '<=240',
+            }[duration]
+        language = qs.get('language', ['all'])[0]
+        if language != 'all':
+            languages = [language]
+            if language == 'en':
+                languages.append('none')
+            params['any_languages'] = languages
         entries = OnDemandPagedList(
-            functools.partial(self._fetch_page, claim_id, url),
+            functools.partial(self._fetch_page, claim_id, url, params),
             self._PAGE_SIZE)
         result_value = result.get('value') or {}
         return self.playlist_result(
index 32ff51653e4add61d7db4a220da5350dc7bd54b0..d9b13adc211d3623d3594e16ccdb0f4454fe4d4c 100644 (file)
@@ -599,11 +599,13 @@ def channel_data(field, type_):
         else:
             age_limit = None
 
+        webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
+
         return {
             'id': video_id,
             'title': title,
             'description': description,
-            'thumbnail': urljoin(url, video.get('thumbnailPath')),
+            'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
             'timestamp': unified_timestamp(video.get('publishedAt')),
             'uploader': account_data('displayName', compat_str),
             'uploader_id': str_or_none(account_data('id', int)),
@@ -621,5 +623,6 @@ def channel_data(field, type_):
             'tags': try_get(video, lambda x: x['tags'], list),
             'categories': categories,
             'formats': formats,
-            'subtitles': subtitles
+            'subtitles': subtitles,
+            'webpage_url': webpage_url,
         }
index 15c11a755aca40318a24412c0bea98841c44f0fe..09aeea3400aa94b4272f5ca6327bd1b93d195ab2 100644 (file)
@@ -31,6 +31,7 @@ def _extract_video(self, data, extract_formats=True):
 
         title = (data.get('title') or data.get('grid_title') or video_id).strip()
 
+        urls = []
         formats = []
         duration = None
         if extract_formats:
@@ -38,8 +39,9 @@ def _extract_video(self, data, extract_formats=True):
                 if not isinstance(format_dict, dict):
                     continue
                 format_url = url_or_none(format_dict.get('url'))
-                if not format_url:
+                if not format_url or format_url in urls:
                     continue
+                urls.append(format_url)
                 duration = float_or_none(format_dict.get('duration'), scale=1000)
                 ext = determine_ext(format_url)
                 if 'hls' in format_id.lower() or ext == 'm3u8':
index b7631e4e1548472a8ad8fccdd782f10350a85541..2a7818e41bcc63ae1876f350e28c4e1c14732c88 100644 (file)
@@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
         'params': {
             'skip_download': True,
         },
+        'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
     }, {
         # subtitles
         'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
@@ -265,7 +266,8 @@ def dl_webpage(platform):
         webpage = dl_webpage('pc')
 
         error_msg = self._html_search_regex(
-            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+            (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+             r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
             webpage, 'error message', default=None, group='error')
         if error_msg:
             error_msg = re.sub(r'\s+', ' ', error_msg)
@@ -394,6 +396,21 @@ def parse_quality_items(quality_items):
 
         upload_date = None
         formats = []
+
+        def add_format(format_url, height=None):
+            tbr = None
+            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
+            if mobj:
+                if not height:
+                    height = int(mobj.group('height'))
+                tbr = int(mobj.group('tbr'))
+            formats.append({
+                'url': format_url,
+                'format_id': '%dp' % height if height else None,
+                'height': height,
+                'tbr': tbr,
+            })
+
         for video_url, height in video_urls:
             if not upload_date:
                 upload_date = self._search_regex(
@@ -410,18 +427,19 @@ def parse_quality_items(quality_items):
                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id='hls', fatal=False))
                 continue
-            tbr = None
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
-            if mobj:
-                if not height:
-                    height = int(mobj.group('height'))
-                tbr = int(mobj.group('tbr'))
-            formats.append({
-                'url': video_url,
-                'format_id': '%dp' % height if height else None,
-                'height': height,
-                'tbr': tbr,
-            })
+            if '/video/get_media' in video_url:
+                medias = self._download_json(video_url, video_id, fatal=False)
+                if isinstance(medias, list):
+                    for media in medias:
+                        if not isinstance(media, dict):
+                            continue
+                        video_url = url_or_none(media.get('videoUrl'))
+                        if not video_url:
+                            continue
+                        height = int_or_none(media.get('quality'))
+                        add_format(video_url, height)
+                continue
+            add_format(video_url)
         self._sort_formats(formats)
 
         video_uploader = self._html_search_regex(
index ce9db0629459d23ea0cc870a1bde348d21e99e9a..d2fb754cf57e09b2e3c0957da1e59078b456fc57 100644 (file)
@@ -2,8 +2,9 @@
 from __future__ import unicode_literals
 
 import base64
+import io
 import re
-import time
+import sys
 
 from .common import InfoExtractor
 from ..compat import (
     determine_ext,
     ExtractorError,
     float_or_none,
+    qualities,
     remove_end,
     remove_start,
-    sanitized_Request,
     std_headers,
 )
 
-
-def _decrypt_url(png):
-    encrypted_data = compat_b64decode(png)
-    text_index = encrypted_data.find(b'tEXt')
-    text_chunk = encrypted_data[text_index - 4:]
-    length = compat_struct_unpack('!I', text_chunk[:4])[0]
-    # Use bytearray to get integers when iterating in both python 2.x and 3.x
-    data = bytearray(text_chunk[8:8 + length])
-    data = [chr(b) for b in data if b != 0]
-    hash_index = data.index('#')
-    alphabet_data = data[:hash_index]
-    url_data = data[hash_index + 1:]
-    if url_data[0] == 'H' and url_data[3] == '%':
-        # remove useless HQ%% at the start
-        url_data = url_data[4:]
-
-    alphabet = []
-    e = 0
-    d = 0
-    for l in alphabet_data:
-        if d == 0:
-            alphabet.append(l)
-            d = e = (e + 1) % 4
-        else:
-            d -= 1
-    url = ''
-    f = 0
-    e = 3
-    b = 1
-    for letter in url_data:
-        if f == 0:
-            l = int(letter) * 10
-            f = 1
-        else:
-            if e == 0:
-                l += int(letter)
-                url += alphabet[l]
-                e = (b + 3) % 4
-                f = 0
-                b += 1
-            else:
-                e -= 1
-
-    return url
+_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
 
 
 class RTVEALaCartaIE(InfoExtractor):
@@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
             'duration': 5024.566,
+            'series': 'Balonmano',
         },
+        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
     }, {
         'note': 'Live stream',
         'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
         'info_dict': {
             'id': '1694255',
-            'ext': 'flv',
-            'title': 'TODO',
+            'ext': 'mp4',
+            'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': 'live stream',
         },
-        'skip': 'The f4m manifest can\'t be used yet',
     }, {
         'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
-        'md5': 'e55e162379ad587e9640eda4f7353c0f',
+        'md5': 'd850f3c8731ea53952ebab489cf81cbf',
         'info_dict': {
             'id': '4236788',
             'ext': 'mp4',
-            'title': 'Servir y proteger - Capítulo 104 ',
+            'title': 'Servir y proteger - Capítulo 104',
             'duration': 3222.0,
         },
-        'params': {
-            'skip_download': True,  # requires ffmpeg
-        },
+        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
     }, {
         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
         'only_matching': True,
@@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):
 
     def _real_initialize(self):
         user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
-        manager_info = self._download_json(
+        self._manager = self._download_json(
             'http://www.rtve.es/odin/loki/' + user_agent_b64,
-            None, 'Fetching manager info')
-        self._manager = manager_info['manager']
+            None, 'Fetching manager info')['manager']
+
+    @staticmethod
+    def _decrypt_url(png):
+        encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
+        while True:
+            length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+            chunk_type = encrypted_data.read(4)
+            if chunk_type == b'IEND':
+                break
+            data = encrypted_data.read(length)
+            if chunk_type == b'tEXt':
+                alphabet_data, text = data.split(b'\0')
+                quality, url_data = text.split(b'%%')
+                alphabet = []
+                e = 0
+                d = 0
+                for l in _bytes_to_chr(alphabet_data):
+                    if d == 0:
+                        alphabet.append(l)
+                        d = e = (e + 1) % 4
+                    else:
+                        d -= 1
+                url = ''
+                f = 0
+                e = 3
+                b = 1
+                for letter in _bytes_to_chr(url_data):
+                    if f == 0:
+                        l = int(letter) * 10
+                        f = 1
+                    else:
+                        if e == 0:
+                            l += int(letter)
+                            url += alphabet[l]
+                            e = (b + 3) % 4
+                            f = 0
+                            b += 1
+                        else:
+                            e -= 1
+
+                yield quality.decode(), url
+            encrypted_data.read(4)  # CRC
+
+    def _extract_png_formats(self, video_id):
+        png = self._download_webpage(
+            'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
+            video_id, 'Downloading url information', query={'q': 'v2'})
+        q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+        formats = []
+        for quality, video_url in self._decrypt_url(png):
+            ext = determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    video_url, video_id, 'dash', fatal=False))
+            else:
+                formats.append({
+                    'format_id': quality,
+                    'quality': q(quality),
+                    'url': video_url,
+                })
+        self._sort_formats(formats)
+        return formats
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         info = self._download_json(
             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
             video_id)['page']['items'][0]
         if info['state'] == 'DESPU':
             raise ExtractorError('The video is no longer available', expected=True)
-        title = info['title']
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
-        png_request = sanitized_Request(png_url)
-        png_request.add_header('Referer', url)
-        png = self._download_webpage(png_request, video_id, 'Downloading url information')
-        video_url = _decrypt_url(png)
-        ext = determine_ext(video_url)
-
-        formats = []
-        if not video_url.endswith('.f4m') and ext != 'm3u8':
-            if '?' not in video_url:
-                video_url = video_url.replace('resources/', 'auth/resources/')
-            video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
-
-        if ext == 'm3u8':
-            formats.extend(self._extract_m3u8_formats(
-                video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
-                m3u8_id='hls', fatal=False))
-        elif ext == 'f4m':
-            formats.extend(self._extract_f4m_formats(
-                video_url, video_id, f4m_id='hds', fatal=False))
-        else:
-            formats.append({
-                'url': video_url,
-            })
-        self._sort_formats(formats)
+        title = info['title'].strip()
+        formats = self._extract_png_formats(video_id)
 
         subtitles = None
-        if info.get('sbtFile') is not None:
-            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+        sbt_file = info.get('sbtFile')
+        if sbt_file:
+            subtitles = self.extract_subtitles(video_id, sbt_file)
+
+        is_live = info.get('live') is True
 
         return {
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'formats': formats,
             'thumbnail': info.get('image'),
-            'page_url': url,
             'subtitles': subtitles,
-            'duration': float_or_none(info.get('duration'), scale=1000),
+            'duration': float_or_none(info.get('duration'), 1000),
+            'is_live': is_live,
+            'series': info.get('programTitle'),
         }
 
     def _get_subtitles(self, video_id, sub_file):
@@ -174,48 +179,26 @@ def _get_subtitles(self, video_id, sub_file):
             for s in subs)
 
 
-class RTVEInfantilIE(InfoExtractor):
+class RTVEInfantilIE(RTVEALaCartaIE):
     IE_NAME = 'rtve.es:infantil'
     IE_DESC = 'RTVE infantil'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
-        'md5': '915319587b33720b8e0357caaa6617e6',
+        'md5': '5747454717aedf9f9fdf212d1bcfc48d',
         'info_dict': {
             'id': '3040283',
             'ext': 'mp4',
             'title': 'Maneras de vivir',
-            'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+            'thumbnail': r're:https?://.+/1426182947956\.JPG',
             'duration': 357.958,
         },
+        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
     }]
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        info = self._download_json(
-            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
-            video_id)['page']['items'][0]
-
-        webpage = self._download_webpage(url, video_id)
-        vidplayer_id = self._search_regex(
-            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
 
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
-        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        video_url = _decrypt_url(png)
-
-        return {
-            'id': video_id,
-            'ext': 'mp4',
-            'title': info['title'],
-            'url': video_url,
-            'thumbnail': info.get('image'),
-            'duration': float_or_none(info.get('duration'), scale=1000),
-        }
-
-
-class RTVELiveIE(InfoExtractor):
+class RTVELiveIE(RTVEALaCartaIE):
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
     _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
@@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
         'info_dict': {
             'id': 'la-1',
             'ext': 'mp4',
-            'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
+            'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
         },
         'params': {
             'skip_download': 'live stream',
@@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        start_time = time.gmtime()
         video_id = mobj.group('id')
 
         webpage = self._download_webpage(url, video_id)
         title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
         title = remove_start(title, 'Estoy viendo ')
-        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
 
         vidplayer_id = self._search_regex(
             (r'playerId=player([0-9]+)',
              r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
              r'data-id=["\'](\d+)'),
             webpage, 'internal video ID')
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
-        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        m3u8_url = _decrypt_url(png)
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
-        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': title,
-            'formats': formats,
+            'title': self._live_title(title),
+            'formats': self._extract_png_formats(vidplayer_id),
             'is_live': True,
         }
 
index c1d6aba2c962cb2b869c0c75606f6942926ec80e..5768199bcc87ffb418e2cd477c85924212665d62 100644 (file)
@@ -51,13 +51,16 @@ class ShahidIE(ShahidBaseIE):
     _NETRC_MACHINE = 'shahid'
     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
     _TESTS = [{
-        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
+        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
         'info_dict': {
-            'id': '275286',
+            'id': '816924',
             'ext': 'mp4',
-            'title': 'مجلس الشباب الموسم 1 كليب 1',
-            'timestamp': 1506988800,
-            'upload_date': '20171003',
+            'title': 'متحف الدحيح الموسم 1 كليب 1',
+            'timestamp': 1602806400,
+            'upload_date': '20201016',
+            'description': 'برومو',
+            'duration': 22,
+            'categories': ['كوميديا'],
         },
         'params': {
             # m3u8 download
@@ -109,12 +112,15 @@ def _real_extract(self, url):
             page_type = 'episode'
 
         playout = self._call_api(
-            'playout/url/' + video_id, video_id)['playout']
+            'playout/new/url/' + video_id, video_id)['playout']
 
         if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
             raise ExtractorError('This video is DRM protected.', expected=True)
 
-        formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
+        formats = self._extract_m3u8_formats(re.sub(
+            # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
+            r'aws\.manifestfilter=[\w:;,-]+&?',
+            '', playout['url']), video_id, 'mp4')
         self._sort_formats(formats)
 
         # video = self._call_api(
index 95e6d289041081361a6a76e93f2c5f17c07d650a..9aedaa04a465268c60784d865b833e6f8760fbb5 100644 (file)
@@ -6,9 +6,9 @@
 
 class SouthParkIE(MTVServicesInfoExtractor):
     IE_NAME = 'southpark.cc.com'
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
 
-    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+    _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
 
     _TESTS = [{
         'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
@@ -23,8 +23,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
         'only_matching': True,
+    }, {
+        'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
+        'only_matching': True,
     }]
 
+    def _get_feed_query(self, uri):
+        return {
+            'accountOverride': 'intl.mtvi.com',
+            'arcEp': 'shared.southpark.global',
+            'ep': '90877963',
+            'imageEp': 'shared.southpark.global',
+            'mgid': uri,
+        }
+
 
 class SouthParkEsIE(SouthParkIE):
     IE_NAME = 'southpark.cc.com:español'
index 378fc75686313f92a846aaa30579049e9a29eccc..3e497a9393ea99dc1962848ce1020e0ac5b59de3 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
+    clean_html,
+    float_or_none,
+    int_or_none,
     parse_iso8601,
-    sanitized_Request,
+    strip_or_none,
+    try_get,
 )
 
 
 class SportDeutschlandIE(InfoExtractor):
-    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
     _TESTS = [{
         'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
         'info_dict': {
-            'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
+            'id': '5318cac0275701382770543d7edaf0a0',
             'ext': 'mp4',
-            'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
-            'categories': ['Badminton-Deutschland'],
-            'view_count': int,
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': int,
-            'upload_date': '20200201',
-            'description': 're:.*',  # meaningless description for THIS video
+            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
+            'duration': 16106.36,
         },
+        'params': {
+            'noplaylist': True,
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+        'info_dict': {
+            'id': 'c6e2fdd01f63013854c47054d2ab776f',
+            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
+            'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
+            'duration': 31397,
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        sport_id = mobj.group('sport')
-
-        api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
-            sport_id, video_id)
-        req = sanitized_Request(api_url, headers={
-            'Accept': 'application/vnd.vidibus.v2.html+json',
-            'Referer': url,
-        })
-        data = self._download_json(req, video_id)
-
+        display_id = self._match_id(url)
+        data = self._download_json(
+            'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
+            display_id, query={'access_token': 'true'})
         asset = data['asset']
-        categories = [data['section']['title']]
-
-        formats = []
-        smil_url = asset['video']
-        if '.smil' in smil_url:
-            m3u8_url = smil_url.replace('.smil', '.m3u8')
-            formats.extend(
-                self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
+        title = (asset.get('title') or asset['label']).strip()
+        asset_id = asset.get('id') or asset.get('uuid')
+        info = {
+            'id': asset_id,
+            'title': title,
+            'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
+            'duration': int_or_none(asset.get('seconds')),
+        }
+        videos = asset.get('videos') or []
+        if len(videos) > 1:
+            playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
+            if playlist_id:
+                if self._downloader.params.get('noplaylist'):
+                    videos = [videos[int(playlist_id)]]
+                    self.to_screen('Downloading just a single video because of --no-playlist')
+                else:
+                    self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
 
-            smil_doc = self._download_xml(
-                smil_url, video_id, note='Downloading SMIL metadata')
-            base_url_el = smil_doc.find('./head/meta')
-            if base_url_el:
-                base_url = base_url_el.attrib['base']
-            formats.extend([{
-                'format_id': 'rmtp',
-                'url': base_url if base_url_el else n.attrib['src'],
-                'play_path': n.attrib['src'],
-                'ext': 'flv',
-                'preference': -100,
-                'format_note': 'Seems to fail at example stream',
-            } for n in smil_doc.findall('./body/video')])
+            def entries():
+                for i, video in enumerate(videos, 1):
+                    video_id = video.get('uuid')
+                    video_url = video.get('url')
+                    if not (video_id and video_url):
+                        continue
+                    formats = self._extract_m3u8_formats(
+                        video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
+                    if not formats:
+                        continue
+                    yield {
+                        'id': video_id,
+                        'formats': formats,
+                        'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
+                        'duration': float_or_none(video.get('duration')),
+                    }
+            info.update({
+                '_type': 'multi_video',
+                'entries': entries(),
+            })
         else:
-            formats.append({'url': smil_url})
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'formats': formats,
-            'title': asset['title'],
-            'thumbnail': asset.get('image'),
-            'description': asset.get('teaser'),
-            'duration': asset.get('duration'),
-            'categories': categories,
-            'view_count': asset.get('views'),
-            'rtmp_live': asset.get('live'),
-            'timestamp': parse_iso8601(asset.get('date')),
-        }
+            formats = self._extract_m3u8_formats(
+                videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
+            section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
+            info.update({
+                'formats': formats,
+                'display_id': asset.get('permalink'),
+                'thumbnail': try_get(asset, lambda x: x['images'][0]),
+                'categories': [section_title] if section_title else None,
+                'view_count': int_or_none(asset.get('views')),
+                'is_live': asset.get('is_live') is True,
+                'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
+            })
+        return info
index 931d4d6507611bd8432bc5b86ec33cfdd31f00b3..a54f49319a41ece1864c132dce53683102b626ea 100644 (file)
@@ -9,6 +9,7 @@
     int_or_none,
     remove_start,
     smuggle_url,
+    strip_or_none,
     try_get,
 )
 
@@ -25,6 +26,10 @@ class TVerIE(InfoExtractor):
     }, {
         'url': 'https://tver.jp/episode/79622438',
         'only_matching': True,
+    }, {
+        # subtitle = ' '
+        'url': 'https://tver.jp/corner/f0068870',
+        'only_matching': True,
     }]
     _TOKEN = None
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
@@ -47,8 +52,12 @@ def _real_extract(self, url):
         }
 
         if service == 'cx':
+            title = main['title']
+            subtitle = strip_or_none(main.get('subtitle'))
+            if subtitle:
+                title += ' - ' + subtitle
             info.update({
-                'title': main.get('subtitle') or main['title'],
+                'title': title,
                 'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
                 'ie_key': 'FujiTVFODPlus7',
             })
index b318e15d4b4da53fe0b42f3537d0fde66182299c..6612081258ebfce7da3ef2148819774cc5702701 100644 (file)
@@ -7,6 +7,8 @@
 from ..utils import (
     ExtractorError,
     int_or_none,
+    try_get,
+    unified_timestamp,
 )
 
 
@@ -19,14 +21,17 @@ def _real_extract(self, url):
 
         setup = self._parse_json(self._search_regex(
             r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
-        video_data = setup.get('video') or {}
+        player_setup = setup.get('player_setup') or setup
+        video_data = player_setup.get('video') or {}
+        formatted_metadata = video_data.get('formatted_metadata') or {}
         info = {
             'id': video_id,
-            'title': video_data.get('title_short'),
+            'title': player_setup.get('title') or video_data.get('title_short'),
             'description': video_data.get('description_long') or video_data.get('description_short'),
-            'thumbnail': video_data.get('brightcove_thumbnail')
+            'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
+            'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
         }
-        asset = setup.get('asset') or setup.get('params') or {}
+        asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}
 
         formats = []
         hls_url = asset.get('hls_url')
@@ -47,6 +52,7 @@ def _real_extract(self, url):
         if formats:
             self._sort_formats(formats)
             info['formats'] = formats
+            info['duration'] = int_or_none(asset.get('duration'))
             return info
 
         for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
@@ -84,7 +90,7 @@ class VoxMediaIE(InfoExtractor):
     }, {
         # Volume embed, Youtube
         'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
-        'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
+        'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
         'info_dict': {
             'id': 'Gy8Md3Eky38',
             'ext': 'mp4',
@@ -93,6 +99,7 @@ class VoxMediaIE(InfoExtractor):
             'uploader_id': 'TheVerge',
             'upload_date': '20141021',
             'uploader': 'The Verge',
+            'timestamp': 1413907200,
         },
         'add_ie': ['Youtube'],
         'skip': 'similar to the previous test',
@@ -100,13 +107,13 @@ class VoxMediaIE(InfoExtractor):
         # Volume embed, Youtube
         'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
         'info_dict': {
-            'id': 'YCjDnX-Xzhg',
+            'id': '22986359b',
             'ext': 'mp4',
             'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
             'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
-            'uploader_id': 'voxdotcom',
             'upload_date': '20150915',
-            'uploader': 'Vox',
+            'timestamp': 1442332800,
+            'duration': 285,
         },
         'add_ie': ['Youtube'],
         'skip': 'similar to the previous test',
@@ -160,6 +167,9 @@ class VoxMediaIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
             'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+            'timestamp': 1402938000,
+            'upload_date': '20140616',
+            'duration': 4114,
         },
         'add_ie': ['VoxMediaVolume'],
     }]