]> jfr.im git - yt-dlp.git/commitdiff
Update to ytdl-2021.01.24.1
authorRemita Amine <redacted>
Sat, 16 Jan 2021 17:12:05 +0000 (18:12 +0100)
committerpukkandan <redacted>
Sun, 24 Jan 2021 14:58:44 +0000 (20:28 +0530)
23 files changed:
README.md
test/test_YoutubeDL.py
youtube_dlc/YoutubeDL.py
youtube_dlc/__init__.py
youtube_dlc/extractor/aenetworks.py
youtube_dlc/extractor/aljazeera.py
youtube_dlc/extractor/americastestkitchen.py
youtube_dlc/extractor/aol.py
youtube_dlc/extractor/ard.py
youtube_dlc/extractor/comedycentral.py
youtube_dlc/extractor/extractors.py
youtube_dlc/extractor/franceculture.py
youtube_dlc/extractor/lbry.py
youtube_dlc/extractor/minds.py [new file with mode: 0644]
youtube_dlc/extractor/mtv.py
youtube_dlc/extractor/ninegag.py
youtube_dlc/extractor/njpwworld.py
youtube_dlc/extractor/spike.py
youtube_dlc/extractor/spotify.py [new file with mode: 0644]
youtube_dlc/extractor/trovo.py [new file with mode: 0644]
youtube_dlc/extractor/wat.py
youtube_dlc/extractor/yahoo.py
youtube_dlc/options.py

index 59999245bf74f61726ba844292a74684844a9ff4..59886a26666d8fe5fdfe32d54b802341da7d284f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -814,7 +814,7 @@ # OUTPUT TEMPLATE
  - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
  - `release_year` (numeric): Year (YYYY) when the album was released
 
-Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
 
 For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dlc test video` and id `BaW_jenozKcj`, this will result in a `youtube-dlc test video-BaW_jenozKcj.mp4` file created in the current directory.
 
index bacab60a4f836f3f0b6a11cba5ef569f78d808a0..43a5dcd74958fee6c92982e21f5c84c7be4fa97e 100644 (file)
@@ -637,13 +637,20 @@ def test_prepare_filename(self):
             'title2': '%PATH%',
         }
 
-        def fname(templ):
-            ydl = YoutubeDL({'outtmpl': templ})
+        def fname(templ, na_placeholder='NA'):
+            params = {'outtmpl': templ}
+            if na_placeholder != 'NA':
+                params['outtmpl_na_placeholder'] = na_placeholder
+            ydl = YoutubeDL(params)
             return ydl.prepare_filename(info)
         self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
         self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
-        # Replace missing fields with 'NA'
-        self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+        NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
+        # Replace missing fields with 'NA' by default
+        self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
+        # Or by provided placeholder
+        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
+        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
         self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
         self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
         self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
index 02cc97625933fb260d932650f19cd37ab03995fb..ce990507ca04f4ae0d66b9f5beff3d7602e986f8 100644 (file)
@@ -181,9 +181,12 @@ class YoutubeDL(object):
     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
     outtmpl:           Template for output names.
-    restrictfilenames: Do not allow "&" and spaces in file names.
-    trim_file_name:    Limit length of filename (extension excluded).
-    ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
+    outtmpl_na_placeholder: Placeholder for unavailable meta fields.
+    restrictfilenames: Do not allow "&" and spaces in file names
+    trim_file_name:    Limit length of filename (extension excluded)
+    ignoreerrors:      Do not stop on download errors
+                       (Default True when running youtube-dlc,
+                       but False when directly accessing YoutubeDL class)
     force_generic_extractor: Force downloader to use the generic extractor
     overwrites:        Overwrite all video and metadata files if True,
                        overwrite only non-video files if None
@@ -741,7 +744,7 @@ def prepare_filename(self, info_dict, warn=False):
             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
                                  for k, v in template_dict.items()
                                  if v is not None and not isinstance(v, (list, tuple, dict)))
-            template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+            template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 
@@ -761,8 +764,8 @@ def prepare_filename(self, info_dict, warn=False):
 
             # Missing numeric fields used together with integer presentation types
             # in format specification will break the argument substitution since
-            # string 'NA' is returned for missing fields. We will patch output
-            # template for missing fields to meet string presentation type.
+            # string NA placeholder is returned for missing fields. We will patch
+            # output template for missing fields to meet string presentation type.
             for numeric_field in self._NUMERIC_FIELDS:
                 if numeric_field not in template_dict:
                     # As of [1] format syntax is:
index ee61203959bf87c806e4efac2a7abb99bf753c16..e2db6626650e549f76723b53677562eded95b015 100644 (file)
@@ -373,6 +373,7 @@ def parse_retries(retries):
         'listformats': opts.listformats,
         'listformats_table': opts.listformats_table,
         'outtmpl': outtmpl,
+        'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
         'paths': opts.paths,
         'autonumber_size': opts.autonumber_size,
         'autonumber_start': opts.autonumber_start,
index 8e4963131731d31e8210550792e7667b8143f145..a5d88ebbea03843977cba9850d48fd1f03b7904d 100644 (file)
@@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
             'title': 'Ancient Aliens',
             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
         },
-        'playlist_mincount': 168,
+        'playlist_mincount': 150,
     }]
     _RESOURCE = 'series'
     _ITEMS_KEY = 'episodes'
index c68be31340296c5b4a40fa36a7b17bc50f598f4b..c4f915a3c17cfb073e2e6018f866b746231f5d8d 100644 (file)
@@ -1,13 +1,16 @@
 from __future__ import unicode_literals
 
+import json
+import re
+
 from .common import InfoExtractor
 
 
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
 
     _TESTS = [{
-        'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+        'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
         'info_dict': {
             'id': '3792260579001',
             'ext': 'mp4',
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
         'add_ie': ['BrightcoveNew'],
         'skip': 'Not accessible from Travis CI server',
     }, {
-        'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+        'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
         'only_matching': True,
     }]
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
-        program_name = self._match_id(url)
-        webpage = self._download_webpage(url, program_name)
-        brightcove_id = self._search_regex(
-            r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+        post_type, name = re.match(self._VALID_URL, url).groups()
+        post_type = {
+            'features': 'post',
+            'program': 'episode',
+            'videos': 'video',
+        }[post_type.split('/')[0]]
+        video = self._download_json(
+            'https://www.aljazeera.com/graphql', name, query={
+                'operationName': 'SingleArticleQuery',
+                'variables': json.dumps({
+                    'name': name,
+                    'postType': post_type,
+                }),
+            }, headers={
+                'wp-site': 'aje',
+            })['data']['article']['video']
+        video_id = video['id']
+        account_id = video.get('accountId') or '665003303001'
+        player_id = video.get('playerId') or 'BkeSH5BDb'
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+            'BrightcoveNew', video_id)
index e20f00fc3efabf7ed5c12892aeeafa986bfb803e..be960c0f93b4950559c761f1bcd1f60dd4792d69 100644 (file)
@@ -1,13 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
+    int_or_none,
     try_get,
     unified_strdate,
+    unified_timestamp,
 )
 
 
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
             'ext': 'mp4',
             'description': 'md5:64e606bfee910627efc4b5f050de92b3',
             'thumbnail': r're:^https?://',
-            'timestamp': 1523664000,
-            'upload_date': '20180414',
+            'timestamp': 1523318400,
+            'upload_date': '20180410',
             'release_date': '20180410',
             'series': "America's Test Kitchen",
             'season_number': 18,
@@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+        'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+        'md5': '06451608c57651e985a498e69cec17e5',
+        'info_dict': {
+            'id': '5fbe8c61bda2010001c6763b',
+            'title': 'Simple Chicken Dinner',
+            'ext': 'mp4',
+            'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+            'thumbnail': r're:^https?://',
+            'timestamp': 1610755200,
+            'upload_date': '20210116',
+            'release_date': '20210116',
+            'series': "America's Test Kitchen",
+            'season_number': 21,
+            'episode': 'Simple Chicken Dinner',
+            'episode_number': 3,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
         'only_matching': True,
@@ -60,7 +84,76 @@ def _real_extract(self, url):
             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
             'ie_key': 'Zype',
             'description': clean_html(video.get('description')),
+            'timestamp': unified_timestamp(video.get('publishDate')),
             'release_date': unified_strdate(video.get('publishDate')),
+            'episode_number': int_or_none(episode.get('number')),
+            'season_number': int_or_none(episode.get('season')),
             'series': try_get(episode, lambda x: x['show']['title']),
             'episode': episode.get('title'),
         }
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+    _TESTS = [{
+        # ATK Season
+        'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+        'info_dict': {
+            'id': 'season_1',
+            'title': 'Season 1',
+        },
+        'playlist_count': 13,
+    }, {
+        # Cooks Country Season
+        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'info_dict': {
+            'id': 'season_12',
+            'title': 'Season 12',
+        },
+        'playlist_count': 13,
+    }]
+
+    def _real_extract(self, url):
+        show_name, season_number = re.match(self._VALID_URL, url).groups()
+        season_number = int(season_number)
+
+        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+
+        season = 'Season %d' % season_number
+
+        season_search = self._download_json(
+            'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+            season, headers={
+                'Origin': 'https://www.%s.com' % show_name,
+                'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+                'X-Algolia-Application-Id': 'Y1FNZXUI30',
+            }, query={
+                'facetFilters': json.dumps([
+                    'search_season_list:' + season,
+                    'search_document_klass:episode',
+                    'search_show_slug:' + slug,
+                ]),
+                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+                'attributesToHighlight': '',
+                'hitsPerPage': 1000,
+            })
+
+        def entries():
+            for episode in (season_search.get('hits') or []):
+                search_url = episode.get('search_url')
+                if not search_url:
+                    continue
+                yield {
+                    '_type': 'url',
+                    'url': 'https://www.%s.com%s' % (show_name, search_url),
+                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+                    'title': episode.get('title'),
+                    'description': episode.get('description'),
+                    'timestamp': unified_timestamp(episode.get('search_document_date')),
+                    'season_number': season_number,
+                    'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+                    'ie_key': AmericasTestKitchenIE.ie_key(),
+                }
+
+        return self.playlist_result(
+            entries(), 'season_%d' % season_number, season)
index e87994a6aaacea39f3e6997b541d321ec665a3d7..f6ecb8438fdaeafe365c35d93640c6197ed32089 100644 (file)
@@ -3,7 +3,7 @@
 
 import re
 
-from .common import InfoExtractor
+from .yahoo import YahooIE
 from ..compat import (
     compat_parse_qs,
     compat_urllib_parse_urlparse,
@@ -15,9 +15,9 @@
 )
 
 
-class AolIE(InfoExtractor):
+class AolIE(YahooIE):
     IE_NAME = 'aol.com'
-    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
 
     _TESTS = [{
         # video with 5min ID
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
     }, {
         'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
         'only_matching': True,
+    }, {
+        # Yahoo video
+        'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        if '-' in video_id:
+            return self._extract_yahoo_video(video_id, 'us')
 
         response = self._download_json(
             'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
index 6f1e477a90322980e4e8368f9e0e5b1eb921d7b3..73379314523bb0dfaf2f7f828236a13f7b1824f8 100644 (file)
@@ -226,13 +226,13 @@ def _real_extract(self, url):
             if doc.tag == 'rss':
                 return GenericIE()._extract_rss(url, video_id, doc)
 
-        title = self._html_search_regex(
+        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
              r'<meta name="dcterms\.title" content="(.*?)"/>',
              r'<h4 class="headline">(.*?)</h4>',
              r'<title[^>]*>(.*?)</title>'],
             webpage, 'title')
-        description = self._html_search_meta(
+        description = self._og_search_description(webpage, default=None) or self._html_search_meta(
             'dcterms.abstract', webpage, 'description', default=None)
         if description is None:
             description = self._html_search_meta(
@@ -289,18 +289,18 @@ def _real_extract(self, url):
 
 
 class ARDIE(InfoExtractor):
-    _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
     _TESTS = [{
-        # available till 14.02.2019
-        'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
-        'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
+        # available till 7.01.2022
+        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
+        'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
         'info_dict': {
-            'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
-            'id': '102',
+            'display_id': 'maischberger-die-woche',
+            'id': '100',
             'ext': 'mp4',
-            'duration': 4435.0,
-            'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
-            'upload_date': '20180214',
+            'duration': 3687.0,
+            'title': 'maischberger. die woche vom 7. Januar 2021',
+            'upload_date': '20210107',
             'thumbnail': r're:^https?://.*\.jpg$',
         },
     }, {
@@ -355,17 +355,17 @@ def _real_extract(self, url):
 class ARDBetaMediathekIE(ARDMediathekBaseIE):
     _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
     _TESTS = [{
-        'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
-        'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
+        'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
+        'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
         'info_dict': {
             'display_id': 'die-robuste-roswita',
-            'id': '70153354',
+            'id': '78566716',
             'title': 'Die robuste Roswita',
-            'description': r're:^Der Mord.*trüber ist als die Ilm.',
+            'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
             'duration': 5316,
-            'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
-            'timestamp': 1577047500,
-            'upload_date': '20191222',
+            'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
+            'timestamp': 1596658200,
+            'upload_date': '20200805',
             'ext': 'mp4',
         },
     }, {
index f54c4adeb9fe82b1f2b5392bcb7c42cdafa483cf..1bfa912be40e9e5181ca4d2f5ab4f17d87291b1a 100644 (file)
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
-from .common import InfoExtractor
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
-        /(?P<title>.*)'''
+    _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
 
     _TESTS = [{
-        'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+        'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+        'md5': 'b8acb347177c680ff18a292aa2166f80',
         'info_dict': {
-            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
             'ext': 'mp4',
-            'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
-            'description': 'After a certain point, breastfeeding becomes c**kblocking.',
-            'timestamp': 1376798400,
-            'upload_date': '20130818',
+            'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+            'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+            'timestamp': 1598670000,
+            'upload_date': '20200829',
         },
     }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+        'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
         'only_matching': True,
-    }]
-
-
-class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (?:full-episodes|shows(?=/[^/]+/full-episodes))
-        /(?P<id>[^?]+)'''
-    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
-
-    _TESTS = [{
-        'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
-        'info_dict': {
-            'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
-            'title': 'November 28, 2016 - Ryan Speedo Green',
-        },
-        'playlist_count': 4,
     }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
-        videos_info = self._get_videos_info(mgid)
-        return videos_info
-
-
-class ToshIE(MTVServicesInfoExtractor):
-    IE_DESC = 'Tosh.0'
-    _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
-    _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-
-    _TESTS = [{
-        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
-        'info_dict': {
-            'description': 'Tosh asked fans to share their summer plans.',
-            'title': 'Twitter Users Share Summer Plans',
-        },
-        'playlist': [{
-            'md5': 'f269e88114c1805bb6d7653fecea9e06',
-            'info_dict': {
-                'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
-                'ext': 'mp4',
-                'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
-                'description': 'Tosh asked fans to share their summer plans.',
-                'thumbnail': r're:^https?://.*\.jpg',
-                # It's really reported to be published on year 2077
-                'upload_date': '20770610',
-                'timestamp': 3390510600,
-                'subtitles': {
-                    'en': 'mincount:3',
-                },
-            },
-        }]
-    }, {
-        'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
+        'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
         'only_matching': True,
     }]
 
 
 class ComedyCentralTVIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
     _TESTS = [{
-        'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+        'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
         'info_dict': {
-            'id': 'local_playlist-f99b626bdfe13568579a',
-            'ext': 'flv',
-            'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
+            'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+            'ext': 'mp4',
+            'title': 'Josh Investigates',
+            'description': 'Steht uns das Ende der Welt bevor?',
         },
-    }, {
-        'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
-        'only_matching': True,
     }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        mrss_url = self._search_regex(
-            r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'mrss url', group='url')
-
-        return self._get_videos_info_from_url(mrss_url, video_id)
-
-
-class ComedyCentralShortnameIE(InfoExtractor):
-    _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
-    _TESTS = [{
-        'url': ':tds',
-        'only_matching': True,
-    }, {
-        'url': ':thedailyshow',
-        'only_matching': True,
-    }, {
-        'url': ':theopposition',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        shortcut_map = {
-            'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-            'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-            'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
+    _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+    _GEO_COUNTRIES = ['DE']
+
+    def _get_feed_query(self, uri):
+        return {
+            'accountOverride': 'intl.mtvi.com',
+            'arcEp': 'web.cc.tv',
+            'ep': 'b9032c3a',
+            'imageEp': 'web.cc.tv',
+            'mgid': uri,
         }
-        return self.url_result(shortcut_map[video_id])
index 6ea86c0973e5de391e3f5bf3f004f9170a1c217b..10fd4a0b57c9fa34758a28c3cc3ba73a869270a6 100644 (file)
     AnimeLabIE,
     AnimeLabShowsIE,
 )
-from .americastestkitchen import AmericasTestKitchenIE
+from .americastestkitchen import (
+    AmericasTestKitchenIE,
+    AmericasTestKitchenSeasonIE,
+)
 from .animeondemand import AnimeOnDemandIE
 from .anvato import AnvatoIE
 from .aol import AolIE
 )
 from .coub import CoubIE
 from .comedycentral import (
-    ComedyCentralFullEpisodesIE,
     ComedyCentralIE,
-    ComedyCentralShortnameIE,
     ComedyCentralTVIE,
-    ToshIE,
 )
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonprotocols import (
     MildomVodIE,
     MildomUserVodIE,
 )
+from .minds import (
+    MindsIE,
+    MindsChannelIE,
+    MindsGroupIE,
+)
 from .ministrygrid import MinistryGridIE
 from .minoto import MinotoIE
 from .miomio import MioMioIE
 from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+    SpotifyIE,
+    SpotifyShowIE,
+)
 from .spreaker import (
     SpreakerIE,
     SpreakerPageIE,
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
-from .trovolive import TrovoLiveIE
+from .trovo import (
+    TrovoIE,
+    TrovoVodIE,
+)
 from .trunews import TruNewsIE
 from .trutv import TruTVIE
 from .tube8 import Tube8IE
index 306b45fc99a4c3495a233d8fb3c649032641d87a..14f4cb48905426c82f638b43dc905058cd4edefe 100644 (file)
@@ -11,7 +11,7 @@
 
 class FranceCultureIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
         'info_dict': {
             'id': 'rendez-vous-au-pays-des-geeks',
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
             'title': 'Rendez-vous au pays des geeks',
             'thumbnail': r're:^https?://.*\.jpg$',
             'upload_date': '20140301',
-            'timestamp': 1393642916,
+            'timestamp': 1393700400,
             'vcodec': 'none',
         }
-    }
+    }, {
+        # no thumbnail
+        'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
@@ -36,19 +40,19 @@ def _real_extract(self, url):
                     </h1>|
                     <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
                 ).*?
-                (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+                (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
             ''',
             webpage, 'video data'))
 
-        video_url = video_data['data-asset-source']
-        title = video_data.get('data-asset-title') or self._og_search_title(webpage)
+        video_url = video_data.get('data-url') or video_data['data-asset-source']
+        title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
 
         description = self._html_search_regex(
             r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
             webpage, 'description', default=None)
         thumbnail = self._search_regex(
             r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+            webpage, 'thumbnail', default=None)
         uploader = self._html_search_regex(
             r'(?s)<span class="author">(.*?)</span>',
             webpage, 'uploader', default=None)
@@ -64,6 +68,6 @@ def _real_extract(self, url):
             'ext': ext,
             'vcodec': 'none' if ext == 'mp3' else None,
             'uploader': uploader,
-            'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+            'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
             'duration': int_or_none(video_data.get('data-duration')),
         }
index 41cc245ebdccabd0b00c288b8914a19aa72a256d..413215a992782952801430eb602697cba8332b13 100644 (file)
@@ -5,7 +5,10 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
     }, {
         'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
         'only_matching': True,
+    }, {
+        'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -139,6 +145,7 @@ def _real_extract(self, url):
             display_id = display_id.split('/', 2)[-1].replace('/', ':')
         else:
             display_id = display_id.replace(':', '#')
+        display_id = compat_urllib_parse_unquote(display_id)
         uri = 'lbry://' + display_id
         result = self._resolve_url(uri, display_id, 'stream')
         result_value = result['value']
diff --git a/youtube_dlc/extractor/minds.py b/youtube_dlc/extractor/minds.py
new file mode 100644 (file)
index 0000000..8e9f0f8
--- /dev/null
@@ -0,0 +1,196 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    clean_html,
+    int_or_none,
+    str_or_none,
+    strip_or_none,
+)
+
+
+class MindsBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
+
+    def _call_api(self, path, video_id, resource, query=None):
+        api_url = 'https://www.minds.com/api/' + path
+        token = self._get_cookies(api_url).get('XSRF-TOKEN')
+        return self._download_json(
+            api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
+                'Referer': 'https://www.minds.com/',
+                'X-XSRF-TOKEN': token.value if token else '',
+            }, query=query)
+
+
+class MindsIE(MindsBaseIE):
+    IE_NAME = 'minds'
+    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://www.minds.com/media/100000000000086822',
+        'md5': '215a658184a419764852239d4970b045',
+        'info_dict': {
+            'id': '100000000000086822',
+            'ext': 'mp4',
+            'title': 'Minds intro sequence',
+            'thumbnail': r're:https?://.+\.png',
+            'uploader_id': 'ottman',
+            'upload_date': '20130524',
+            'timestamp': 1369404826,
+            'uploader': 'Bill Ottman',
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'tags': ['animation'],
+            'comment_count': int,
+            'license': 'attribution-cc',
+        },
+    }, {
+        # entity.type == 'activity' and empty title
+        'url': 'https://www.minds.com/newsfeed/798025111988506624',
+        'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
+        'info_dict': {
+            'id': '798022190320226304',
+            'ext': 'mp4',
+            'title': '798022190320226304',
+            'uploader': 'ColinFlaherty',
+            'upload_date': '20180111',
+            'timestamp': 1515639316,
+            'uploader_id': 'ColinFlaherty',
+        },
+    }, {
+        'url': 'https://www.minds.com/archive/view/715172106794442752',
+        'only_matching': True,
+    }, {
+        # youtube perma_url
+        'url': 'https://www.minds.com/newsfeed/1197131838022602752',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        entity_id = self._match_id(url)
+        entity = self._call_api(
+            'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
+        if entity.get('type') == 'activity':
+            if entity.get('custom_type') == 'video':
+                video_id = entity['entity_guid']
+            else:
+                return self.url_result(entity['perma_url'])
+        else:
+            assert(entity['subtype'] == 'video')
+            video_id = entity_id
+        # 1080p and webm formats available only on the sources array
+        video = self._call_api(
+            'v2/media/video/' + video_id, video_id, 'video')
+
+        formats = []
+        for source in (video.get('sources') or []):
+            src = source.get('src')
+            if not src:
+                continue
+            formats.append({
+                'format_id': source.get('label'),
+                'height': int_or_none(source.get('size')),
+                'url': src,
+            })
+        self._sort_formats(formats)
+
+        entity = video.get('entity') or entity
+        owner = entity.get('ownerObj') or {}
+        uploader_id = owner.get('username')
+
+        tags = entity.get('tags')
+        if tags and isinstance(tags, compat_str):
+            tags = [tags]
+
+        thumbnail = None
+        poster = video.get('poster') or entity.get('thumbnail_src')
+        if poster:
+            urlh = self._request_webpage(poster, video_id, fatal=False)
+            if urlh:
+                thumbnail = urlh.geturl()
+
+        return {
+            'id': video_id,
+            'title': entity.get('title') or video_id,
+            'formats': formats,
+            'description': clean_html(entity.get('description')) or None,
+            'license': str_or_none(entity.get('license')),
+            'timestamp': int_or_none(entity.get('time_created')),
+            'uploader': strip_or_none(owner.get('name')),
+            'uploader_id': uploader_id,
+            'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
+            'view_count': int_or_none(entity.get('play:count')),
+            'like_count': int_or_none(entity.get('thumbs:up:count')),
+            'dislike_count': int_or_none(entity.get('thumbs:down:count')),
+            'tags': tags,
+            'comment_count': int_or_none(entity.get('comments:count')),
+            'thumbnail': thumbnail,
+        }
+
+
+class MindsFeedBaseIE(MindsBaseIE):
+    _PAGE_SIZE = 150
+
+    def _entries(self, feed_id):
+        query = {'limit': self._PAGE_SIZE, 'sync': 1}
+        i = 1
+        while True:
+            data = self._call_api(
+                'v2/feeds/container/%s/videos' % feed_id,
+                feed_id, 'page %s' % i, query)
+            entities = data.get('entities') or []
+            for entity in entities:
+                guid = entity.get('guid')
+                if not guid:
+                    continue
+                yield self.url_result(
+                    'https://www.minds.com/newsfeed/' + guid,
+                    MindsIE.ie_key(), guid)
+            query['from_timestamp'] = data['load-next']
+            if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
+                break
+            i += 1
+
+    def _real_extract(self, url):
+        feed_id = self._match_id(url)
+        feed = self._call_api(
+            'v1/%s/%s' % (self._FEED_PATH, feed_id),
+            feed_id, self._FEED_TYPE)[self._FEED_TYPE]
+
+        return self.playlist_result(
+            self._entries(feed['guid']), feed_id,
+            strip_or_none(feed.get('name')),
+            feed.get('briefdescription'))
+
+
+class MindsChannelIE(MindsFeedBaseIE):
+    _FEED_TYPE = 'channel'
+    IE_NAME = 'minds:' + _FEED_TYPE
+    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
+    _FEED_PATH = 'channel'
+    _TEST = {
+        'url': 'https://www.minds.com/ottman',
+        'info_dict': {
+            'id': 'ottman',
+            'title': 'Bill Ottman',
+            'description': 'Co-creator & CEO @minds',
+        },
+        'playlist_mincount': 54,
+    }
+
+
+class MindsGroupIE(MindsFeedBaseIE):
+    _FEED_TYPE = 'group'
+    IE_NAME = 'minds:' + _FEED_TYPE
+    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
+    _FEED_PATH = 'groups/group'
+    _TEST = {
+        'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
+        'info_dict': {
+            'id': '785582576369672204',
+            'title': 'Cooking Videos',
+        },
+        'playlist_mincount': 1,
+    }
index d31f53137a84f2fcde19b5b2f03e0768e65d89f1..68e81ad47acf19d3f60b5e1ef5ea8a2b9928a76e 100644 (file)
@@ -255,6 +255,10 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
 
         return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
 
+    @staticmethod
+    def _extract_child_with_type(parent, t):
+        return next(c for c in parent['children'] if c.get('type') == t)
+
     def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
         if url == '':
             return
@@ -332,6 +336,13 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
         if not mgid:
             mgid = self._extract_triforce_mgid(webpage, data_zone)
 
+        if not mgid:
+            data = self._parse_json(self._search_regex(
+                r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+            main_container = self._extract_child_with_type(data, 'MainContainer')
+            video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+            mgid = video_player['props']['media']['video']['config']['uri']
+
         return mgid
 
     def _real_extract(self, url):
@@ -403,18 +414,6 @@ class MTVIE(MTVServicesInfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def extract_child_with_type(parent, t):
-        children = parent['children']
-        return next(c for c in children if c.get('type') == t)
-
-    def _extract_mgid(self, webpage):
-        data = self._parse_json(self._search_regex(
-            r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
-        main_container = self.extract_child_with_type(data, 'MainContainer')
-        video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
-        return video_player['props']['media']['video']['config']['uri']
-
 
 class MTVJapanIE(MTVServicesInfoExtractor):
     IE_NAME = 'mtvjapan'
index dc6a27d3643d335a69a75d494f7673ecf5b43a7d..440f865bcee89c7c70f7bde7e67966af3bd2f44d 100644 (file)
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import str_to_int
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+    int_or_none,
+    try_get,
+    url_or_none,
+)
 
 
 class NineGagIE(InfoExtractor):
     IE_NAME = '9gag'
-    _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
+    _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
 
-    _TESTS = [{
-        'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
-        'info_dict': {
-            'id': 'kXzwOKyGlSA',
-            'ext': 'mp4',
-            'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
-            'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
-            'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
-            'uploader': 'CompilationChannel',
-            'upload_date': '20131110',
-            'view_count': int,
-        },
-        'add_ie': ['Youtube'],
-    }, {
-        'url': 'http://9gag.com/tv/p/aKolP3',
+    _TEST = {
+        'url': 'https://9gag.com/gag/ae5Ag7B',
         'info_dict': {
-            'id': 'aKolP3',
+            'id': 'ae5Ag7B',
             'ext': 'mp4',
-            'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
-            'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
-            'uploader_id': 'rickmereki',
-            'uploader': 'Rick Mereki',
-            'upload_date': '20110803',
-            'view_count': int,
-        },
-        'add_ie': ['Vimeo'],
-    }, {
-        'url': 'http://9gag.com/tv/p/KklwM',
-        'only_matching': True,
-    }, {
-        'url': 'http://9gag.tv/p/Kk2X5',
-        'only_matching': True,
-    }, {
-        'url': 'http://9gag.com/tv/embed/a5Dmvl',
-        'only_matching': True,
-    }]
-
-    _EXTERNAL_VIDEO_PROVIDER = {
-        '1': {
-            'url': '%s',
-            'ie_key': 'Youtube',
-        },
-        '2': {
-            'url': 'http://player.vimeo.com/video/%s',
-            'ie_key': 'Vimeo',
-        },
-        '3': {
-            'url': 'http://instagram.com/p/%s',
-            'ie_key': 'Instagram',
-        },
-        '4': {
-            'url': 'http://vine.co/v/%s',
-            'ie_key': 'Vine',
-        },
+            'title': 'Capybara Agility Training',
+            'upload_date': '20191108',
+            'timestamp': 1573237208,
+            'categories': ['Awesome'],
+            'tags': ['Weimaraner', 'American Pit Bull Terrier'],
+            'duration': 44,
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+        }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
+        post_id = self._match_id(url)
+        post = self._download_json(
+            'https://9gag.com/v1/post', post_id, query={
+                'id': post_id
+            })['data']['post']
+
+        if post.get('type') != 'Animated':
+            raise ExtractorError(
+                'The given url does not contain a video',
+                expected=True)
+
+        title = post['title']
+
+        duration = None
+        formats = []
+        thumbnails = []
+        for key, image in (post.get('images') or {}).items():
+            image_url = url_or_none(image.get('url'))
+            if not image_url:
+                continue
+            ext = determine_ext(image_url)
+            image_id = key.strip('image')
+            common = {
+                'url': image_url,
+                'width': int_or_none(image.get('width')),
+                'height': int_or_none(image.get('height')),
+            }
+            if ext in ('jpg', 'png'):
+                webp_url = image.get('webpUrl')
+                if webp_url:
+                    t = common.copy()
+                    t.update({
+                        'id': image_id + '-webp',
+                        'url': webp_url,
+                    })
+                    thumbnails.append(t)
+                common.update({
+                    'id': image_id,
+                    'ext': ext,
+                })
+                thumbnails.append(common)
+            elif ext in ('webm', 'mp4'):
+                if not duration:
+                    duration = int_or_none(image.get('duration'))
+                common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
+                for vcodec in ('vp8', 'vp9', 'h265'):
+                    c_url = image.get(vcodec + 'Url')
+                    if not c_url:
+                        continue
+                    c_f = common.copy()
+                    c_f.update({
+                        'format_id': image_id + '-' + vcodec,
+                        'url': c_url,
+                        'vcodec': vcodec,
+                    })
+                    formats.append(c_f)
+                common.update({
+                    'ext': ext,
+                    'format_id': image_id,
+                })
+                formats.append(common)
+        self._sort_formats(formats)
 
-        webpage = self._download_webpage(url, display_id)
+        section = try_get(post, lambda x: x['postSection']['name'])
 
-        post_view = self._parse_json(
-            self._search_regex(
-                r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
-                webpage, 'post view'),
-            display_id)
+        tags = None
+        post_tags = post.get('tags')
+        if post_tags:
+            tags = []
+            for tag in post_tags:
+                tag_key = tag.get('key')
+                if not tag_key:
+                    continue
+                tags.append(tag_key)
 
-        ie_key = None
-        source_url = post_view.get('sourceUrl')
-        if not source_url:
-            external_video_id = post_view['videoExternalId']
-            external_video_provider = post_view['videoExternalProvider']
-            source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
-            ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
-        title = post_view['title']
-        description = post_view.get('description')
-        view_count = str_to_int(post_view.get('externalView'))
-        thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
+        get_count = lambda x: int_or_none(post.get(x + 'Count'))
 
         return {
-            '_type': 'url_transparent',
-            'url': source_url,
-            'ie_key': ie_key,
-            'id': video_id,
-            'display_id': display_id,
+            'id': post_id,
             'title': title,
-            'description': description,
-            'view_count': view_count,
-            'thumbnail': thumbnail,
+            'timestamp': int_or_none(post.get('creationTs')),
+            'duration': duration,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'like_count': get_count('upVote'),
+            'dislike_count': get_count('downVote'),
+            'comment_count': get_count('comments'),
+            'age_limit': 18 if post.get('nsfw') == 1 else None,
+            'categories': [section] if section else None,
+            'tags': tags,
         }
index 025c5d249c4a0daa2032cf639b64c5c27f2974fb..3639d142ff461a541664a92ff19035799d5e91ee 100644 (file)
@@ -6,30 +6,40 @@
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
-    extract_attributes,
     get_element_by_class,
     urlencode_postdata,
 )
 
 
 class NJPWWorldIE(InfoExtractor):
-    _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
+    _VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
     IE_DESC = '新日本プロレスワールド'
     _NETRC_MACHINE = 'njpwworld'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
         'info_dict': {
             'id': 's_series_00155_1_9',
             'ext': 'mp4',
-            'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
+            'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
             'tags': list,
         },
         'params': {
             'skip_download': True,  # AES-encrypted m3u8
         },
         'skip': 'Requires login',
-    }
+    }, {
+        'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
+        'info_dict': {
+            'id': 's_series_00563_16_bs',
+            'ext': 'mp4',
+            'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
+            'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
 
     _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
 
@@ -64,35 +74,27 @@ def _real_extract(self, url):
         webpage = self._download_webpage(url, video_id)
 
         formats = []
-        for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
-            player = extract_attributes(mobj.group(0))
-            player_path = player.get('href')
-            if not player_path:
-                continue
-            kind = self._search_regex(
-                r'(low|high)$', player.get('class') or '', 'kind',
-                default='low')
+        for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
+            player_path = '/intent?id=%s&type=url' % vid
             player_url = compat_urlparse.urljoin(url, player_path)
-            player_page = self._download_webpage(
-                player_url, video_id, note='Downloading player page')
-            entries = self._parse_html5_media_entries(
-                player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
-                m3u8_entry_protocol='m3u8_native')
-            kind_formats = entries[0]['formats']
-            for f in kind_formats:
-                f['quality'] = 2 if kind == 'high' else 1
-            formats.extend(kind_formats)
+            formats.append({
+                'url': player_url,
+                'format_id': kind,
+                'ext': 'mp4',
+                'protocol': 'm3u8',
+                'quality': 2 if kind == 'high' else 1,
+            })
 
         self._sort_formats(formats)
 
-        post_content = get_element_by_class('post-content', webpage)
+        tag_block = get_element_by_class('tag-block', webpage)
         tags = re.findall(
-            r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
-        ) if post_content else None
+            r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
+        ) if tag_block else None
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
+            'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
             'formats': formats,
             'tags': tags,
         }
index 4180e71efa5eb045b25be3b45258dbb2ad4b73dd..5805f3d4454030e0111178876607765321cca912 100644 (file)
@@ -20,19 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
     _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
     _GEO_COUNTRIES = ['US']
 
-    def _extract_mgid(self, webpage, url):
-        mgid = None
-
-        if not mgid:
-            mgid = self._extract_triforce_mgid(webpage)
-
-        if not mgid:
-            mgid = self._extract_new_triforce_mgid(webpage, url)
-
-        return mgid
-
-# TODO Remove - Reason: Outdated Site
-
 
 class ParamountNetworkIE(MTVServicesInfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@@ -56,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
     def _get_feed_query(self, uri):
         return {
             'arcEp': 'paramountnetwork.com',
+            'imageEp': 'paramountnetwork.com',
             'mgid': uri,
         }
-
-    def _extract_mgid(self, webpage, url):
-        root_data = self._parse_json(self._search_regex(
-            r'window\.__DATA__\s*=\s*({.+})',
-            webpage, 'data'), None)
-
-        def find_sub_data(data, data_type):
-            return next(c for c in data['children'] if c.get('type') == data_type)
-
-        c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
-        return c['props']['media']['video']['config']['uri']
diff --git a/youtube_dlc/extractor/spotify.py b/youtube_dlc/extractor/spotify.py
new file mode 100644 (file)
index 0000000..826f98c
--- /dev/null
@@ -0,0 +1,156 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_podcast_url,
+    float_or_none,
+    int_or_none,
+    strip_or_none,
+    try_get,
+    unified_strdate,
+)
+
+
+class SpotifyBaseIE(InfoExtractor):
+    _ACCESS_TOKEN = None
+    _OPERATION_HASHES = {
+        'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
+        'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
+        'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
+    }
+    _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
+
+    def _real_initialize(self):
+        self._ACCESS_TOKEN = self._download_json(
+            'https://open.spotify.com/get_access_token', None)['accessToken']
+
+    def _call_api(self, operation, video_id, variables):
+        return self._download_json(
+            'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
+                'operationName': 'query' + operation,
+                'variables': json.dumps(variables),
+                'extensions': json.dumps({
+                    'persistedQuery': {
+                        'sha256Hash': self._OPERATION_HASHES[operation],
+                    },
+                })
+            }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+
+    def _extract_episode(self, episode, series):
+        episode_id = episode['id']
+        title = episode['name'].strip()
+
+        formats = []
+        audio_preview = episode.get('audioPreview') or {}
+        audio_preview_url = audio_preview.get('url')
+        if audio_preview_url:
+            f = {
+                'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
+                'vcodec': 'none',
+            }
+            audio_preview_format = audio_preview.get('format')
+            if audio_preview_format:
+                f['format_id'] = audio_preview_format
+                mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
+                if mobj:
+                    f.update({
+                        'abr': int(mobj.group(2)),
+                        'ext': mobj.group(1).lower(),
+                    })
+            formats.append(f)
+
+        for item in (try_get(episode, lambda x: x['audio']['items']) or []):
+            item_url = item.get('url')
+            if not (item_url and item.get('externallyHosted')):
+                continue
+            formats.append({
+                'url': clean_podcast_url(item_url),
+                'vcodec': 'none',
+            })
+
+        thumbnails = []
+        for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
+            source_url = source.get('url')
+            if not source_url:
+                continue
+            thumbnails.append({
+                'url': source_url,
+                'width': int_or_none(source.get('width')),
+                'height': int_or_none(source.get('height')),
+            })
+
+        return {
+            'id': episode_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': strip_or_none(episode.get('description')),
+            'duration': float_or_none(try_get(
+                episode, lambda x: x['duration']['totalMilliseconds']), 1000),
+            'release_date': unified_strdate(try_get(
+                episode, lambda x: x['releaseDate']['isoString'])),
+            'series': series,
+        }
+
+
+class SpotifyIE(SpotifyBaseIE):
+    IE_NAME = 'spotify'
+    _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
+    _TEST = {
+        'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
+        'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
+        'info_dict': {
+            'id': '4Z7GAJ50bgctf6uclHlWKo',
+            'ext': 'mp3',
+            'title': 'From the archive: Why time management is ruining our lives',
+            'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
+            'duration': 2083.605,
+            'release_date': '20201217',
+            'series': "The Guardian's Audio Long Reads",
+        }
+    }
+
+    def _real_extract(self, url):
+        episode_id = self._match_id(url)
+        episode = self._call_api('Episode', episode_id, {
+            'uri': 'spotify:episode:' + episode_id
+        })['episode']
+        return self._extract_episode(
+            episode, try_get(episode, lambda x: x['podcast']['name']))
+
+
+class SpotifyShowIE(SpotifyBaseIE):
+    IE_NAME = 'spotify:show'
+    _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
+    _TEST = {
+        'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
+        'info_dict': {
+            'id': '4PM9Ke6l66IRNpottHKV9M',
+            'title': 'The Story from the Guardian',
+            'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
+        },
+        'playlist_mincount': 36,
+    }
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+        podcast = self._call_api('ShowEpisodes', show_id, {
+            'limit': 1000000000,
+            'offset': 0,
+            'uri': 'spotify:show:' + show_id,
+        })['podcast']
+        podcast_name = podcast.get('name')
+
+        entries = []
+        for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
+            episode = item.get('episode')
+            if not episode:
+                continue
+            entries.append(self._extract_episode(episode, podcast_name))
+
+        return self.playlist_result(
+            entries, show_id, podcast_name, podcast.get('description'))
diff --git a/youtube_dlc/extractor/trovo.py b/youtube_dlc/extractor/trovo.py
new file mode 100644 (file)
index 0000000..4374521
--- /dev/null
@@ -0,0 +1,193 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    str_or_none,
+    try_get,
+)
+
+
+class TrovoBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
+
+    def _extract_streamer_info(self, data):
+        streamer_info = data.get('streamerInfo') or {}
+        username = streamer_info.get('userName')
+        return {
+            'uploader': streamer_info.get('nickName'),
+            'uploader_id': str_or_none(streamer_info.get('uid')),
+            'uploader_url': 'https://trovo.live/' + username if username else None,
+        }
+
+
+class TrovoIE(TrovoBaseIE):
+    _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
+
+    def _real_extract(self, url):
+        username = self._match_id(url)
+        live_info = self._download_json(
+            'https://gql.trovo.live/', username, query={
+                'query': '''{
+  getLiveInfo(params: {userName: "%s"}) {
+    isLive
+    programInfo        {
+      coverUrl
+      id
+      streamInfo {
+        desc
+        playUrl
+      }
+      title
+    }
+    streamerInfo {
+        nickName
+        uid
+        userName
+    }
+  }
+}''' % username,
+            })['data']['getLiveInfo']
+        if live_info.get('isLive') == 0:
+            raise ExtractorError('%s is offline' % username, expected=True)
+        program_info = live_info['programInfo']
+        program_id = program_info['id']
+        title = self._live_title(program_info['title'])
+
+        formats = []
+        for stream_info in (program_info.get('streamInfo') or []):
+            play_url = stream_info.get('playUrl')
+            if not play_url:
+                continue
+            format_id = stream_info.get('desc')
+            formats.append({
+                'format_id': format_id,
+                'height': int_or_none(format_id[:-1]) if format_id else None,
+                'url': play_url,
+            })
+        self._sort_formats(formats)
+
+        info = {
+            'id': program_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': program_info.get('coverUrl'),
+            'is_live': True,
+        }
+        info.update(self._extract_streamer_info(live_info))
+        return info
+
+
+class TrovoVodIE(TrovoBaseIE):
+    _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+        'info_dict': {
+            'id': 'ltv-100095501_100095501_1609596043',
+            'ext': 'mp4',
+            'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
+            'uploader': 'Exsl',
+            'timestamp': 1609640305,
+            'upload_date': '20210103',
+            'uploader_id': '100095501',
+            'duration': 43977,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+            'comments': 'mincount:8',
+            'categories': ['Grand Theft Auto V'],
+        },
+    }, {
+        'url': 'https://trovo.live/clip/lc-5285890810184026005',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        vid = self._match_id(url)
+        resp = self._download_json(
+            'https://gql.trovo.live/', vid, data=json.dumps([{
+                'query': '''{
+  batchGetVodDetailInfo(params: {vids: ["%s"]}) {
+    VodDetailInfos
+  }
+}''' % vid,
+            }, {
+                'query': '''{
+  getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
+    commentList {
+      author {
+        nickName
+        uid
+      }
+      commentID
+      content
+      createdAt
+      parentID
+    }
+  }
+}''' % vid,
+            }]).encode(), headers={
+                'Content-Type': 'application/json',
+            })
+        vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
+        vod_info = vod_detail_info['vodInfo']
+        title = vod_info['title']
+
+        language = vod_info.get('languageName')
+        formats = []
+        for play_info in (vod_info.get('playInfos') or []):
+            play_url = play_info.get('playUrl')
+            if not play_url:
+                continue
+            format_id = play_info.get('desc')
+            formats.append({
+                'ext': 'mp4',
+                'filesize': int_or_none(play_info.get('fileSize')),
+                'format_id': format_id,
+                'height': int_or_none(format_id[:-1]) if format_id else None,
+                'language': language,
+                'protocol': 'm3u8_native',
+                'tbr': int_or_none(play_info.get('bitrate')),
+                'url': play_url,
+            })
+        self._sort_formats(formats)
+
+        category = vod_info.get('categoryName')
+        get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
+
+        comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
+        comments = []
+        for comment in comment_list:
+            content = comment.get('content')
+            if not content:
+                continue
+            author = comment.get('author') or {}
+            parent = comment.get('parentID')
+            comments.append({
+                'author': author.get('nickName'),
+                'author_id': str_or_none(author.get('uid')),
+                'id': str_or_none(comment.get('commentID')),
+                'text': content,
+                'timestamp': int_or_none(comment.get('createdAt')),
+                'parent': 'root' if parent == 0 else str_or_none(parent),
+            })
+
+        info = {
+            'id': vid,
+            'title': title,
+            'formats': formats,
+            'thumbnail': vod_info.get('coverUrl'),
+            'timestamp': int_or_none(vod_info.get('publishTs')),
+            'duration': int_or_none(vod_info.get('duration')),
+            'view_count': get_count('watch'),
+            'like_count': get_count('like'),
+            'comment_count': get_count('comment'),
+            'comments': comments,
+            'categories': [category] if category else None,
+        }
+        info.update(self._extract_streamer_info(vod_detail_info))
+        return info
index 8ef3e0906436b3a13e1bc368173e7f7c81ba6c22..f6940b371bfc60e268be2c1e1be2be9e3f843e35 100644 (file)
@@ -1,12 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
     unified_strdate,
     HEADRequest,
     int_or_none,
@@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
         },
     ]
 
-    _FORMATS = (
-        (200, 416, 234),
-        (400, 480, 270),
-        (600, 640, 360),
-        (1200, 640, 360),
-        (1800, 960, 540),
-        (2500, 1280, 720),
-    )
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
@@ -97,46 +85,20 @@ def extract_url(path_template, url_type):
                     return red_url
             return None
 
-        def remove_bitrate_limit(manifest_url):
-            return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
-
         formats = []
-        try:
-            alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
-            manifest_urls = self._download_json(
-                'http://www.wat.tv/get/webhtml/' + video_id, video_id)
-            m3u8_url = manifest_urls.get('hls')
-            if m3u8_url:
-                m3u8_url = remove_bitrate_limit(m3u8_url)
-                for m3u8_alt_url in alt_urls(m3u8_url):
-                    formats.extend(self._extract_m3u8_formats(
-                        m3u8_alt_url, video_id, 'mp4',
-                        'm3u8_native', m3u8_id='hls', fatal=False))
-                    formats.extend(self._extract_f4m_formats(
-                        m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
-                        video_id, f4m_id='hds', fatal=False))
-            mpd_url = manifest_urls.get('mpd')
-            if mpd_url:
-                mpd_url = remove_bitrate_limit(mpd_url)
-                for mpd_alt_url in alt_urls(mpd_url):
-                    formats.extend(self._extract_mpd_formats(
-                        mpd_alt_url, video_id, mpd_id='dash', fatal=False))
-            self._sort_formats(formats)
-        except ExtractorError:
-            abr = 64
-            for vbr, width, height in self._FORMATS:
-                tbr = vbr + abr
-                format_id = 'http-%s' % tbr
-                fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
-                if self._is_valid_url(fmt_url, video_id, format_id):
-                    formats.append({
-                        'format_id': format_id,
-                        'url': fmt_url,
-                        'vbr': vbr,
-                        'abr': abr,
-                        'width': width,
-                        'height': height,
-                    })
+        manifest_urls = self._download_json(
+            'http://www.wat.tv/get/webhtml/' + video_id, video_id)
+        m3u8_url = manifest_urls.get('hls')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4',
+                'm3u8_native', m3u8_id='hls', fatal=False))
+        mpd_url = manifest_urls.get('mpd')
+        if mpd_url:
+            formats.extend(self._extract_mpd_formats(
+                mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
+                video_id, mpd_id='dash', fatal=False))
+        self._sort_formats(formats)
 
         date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
         upload_date = unified_strdate(date_diffusion) if date_diffusion else None
index e4615376c428432f7035c2141d1cbecc738496cc..a17b10d6eb867a5db42755aa45dde14e35f5c6fa 100644 (file)
@@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _real_extract(self, url):
-        url, country, display_id = re.match(self._VALID_URL, url).groups()
-        if not country:
-            country = 'us'
-        else:
-            country = country.split('-')[0]
-        api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
-
-        for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
-            content = self._download_json(
-                api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
-                display_id, 'Downloading content JSON metadata', fatal=i == 1)
-            if content:
-                item = content['items'][0]
-                break
-
-        if item.get('type') != 'video':
-            entries = []
-
-            cover = item.get('cover') or {}
-            if cover.get('type') == 'yvideo':
-                cover_url = cover.get('url')
-                if cover_url:
-                    entries.append(self.url_result(
-                        cover_url, 'Yahoo', cover.get('uuid')))
-
-            for e in item.get('body', []):
-                if e.get('type') == 'videoIframe':
-                    iframe_url = e.get('url')
-                    if not iframe_url:
-                        continue
-                    entries.append(self.url_result(iframe_url))
-
-            return self.playlist_result(
-                entries, item.get('uuid'),
-                item.get('title'), item.get('summary'))
-
-        video_id = item['uuid']
+    def _extract_yahoo_video(self, video_id, country):
         video = self._download_json(
-            api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
+            'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
             video_id, 'Downloading video JSON metadata')[0]
         title = video['title']
 
@@ -298,7 +261,6 @@ def _real_extract(self, url):
             'id': video_id,
             'title': self._live_title(title) if is_live else title,
             'formats': formats,
-            'display_id': display_id,
             'thumbnails': thumbnails,
             'description': clean_html(video.get('description')),
             'timestamp': parse_iso8601(video.get('publish_time')),
@@ -311,6 +273,44 @@ def _real_extract(self, url):
             'episode_number': int_or_none(series_info.get('episode_number')),
         }
 
+    def _real_extract(self, url):
+        url, country, display_id = re.match(self._VALID_URL, url).groups()
+        if not country:
+            country = 'us'
+        else:
+            country = country.split('-')[0]
+
+        item = self._download_json(
+            'https://%s.yahoo.com/caas/content/article' % country, display_id,
+            'Downloading content JSON metadata', query={
+                'url': url
+            })['items'][0]['data']['partnerData']
+
+        if item.get('type') != 'video':
+            entries = []
+
+            cover = item.get('cover') or {}
+            if cover.get('type') == 'yvideo':
+                cover_url = cover.get('url')
+                if cover_url:
+                    entries.append(self.url_result(
+                        cover_url, 'Yahoo', cover.get('uuid')))
+
+            for e in (item.get('body') or []):
+                if e.get('type') == 'videoIframe':
+                    iframe_url = e.get('url')
+                    if not iframe_url:
+                        continue
+                    entries.append(self.url_result(iframe_url))
+
+            return self.playlist_result(
+                entries, item.get('uuid'),
+                item.get('title'), item.get('summary'))
+
+        info = self._extract_yahoo_video(item['uuid'], country)
+        info['display_id'] = display_id
+        return info
+
 
 class YahooSearchIE(SearchInfoExtractor):
     IE_DESC = 'Yahoo screen search'
index 97e8964d610b1400662d973513bcdf8efbecee2a..8b8c81c35c00ab5e69245dc6007fb3e146447b6c 100644 (file)
@@ -842,6 +842,10 @@ def _dict_from_multiple_values_options_callback(
         '-o', '--output',
         dest='outtmpl', metavar='TEMPLATE',
         help='Output filename template, see "OUTPUT TEMPLATE" for details')
+    filesystem.add_option(
+        '--output-na-placeholder',
+        dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
+        help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
     filesystem.add_option(
         '--autonumber-size',
         dest='autonumber_size', metavar='NUMBER', type=int,
@@ -997,7 +1001,7 @@ def _dict_from_multiple_values_options_callback(
     postproc.add_option(
         '-x', '--extract-audio',
         action='store_true', dest='extractaudio', default=False,
-        help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+        help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
     postproc.add_option(
         '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
         help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')