- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- `release_year` (numeric): Year (YYYY) when the album was released
-Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dlc test video` and id `BaW_jenozKcj`, this will result in a `youtube-dlc test video-BaW_jenozKcj.mp4` file created in the current directory.
'title2': '%PATH%',
}
- def fname(templ):
- ydl = YoutubeDL({'outtmpl': templ})
+ def fname(templ, na_placeholder='NA'):
+ params = {'outtmpl': templ}
+ if na_placeholder != 'NA':
+ params['outtmpl_na_placeholder'] = na_placeholder
+ ydl = YoutubeDL(params)
return ydl.prepare_filename(info)
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
- # Replace missing fields with 'NA'
- self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+ NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
+ # Replace missing fields with 'NA' by default
+ self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
+ # Or by provided placeholder
+ self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
+ self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
outtmpl: Template for output names.
- restrictfilenames: Do not allow "&" and spaces in file names.
- trim_file_name: Limit length of filename (extension excluded).
- ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ trim_file_name: Limit length of filename (extension excluded)
+ ignoreerrors: Do not stop on download errors
+ (Default True when running youtube-dlc,
+ but False when directly accessing YoutubeDL class)
force_generic_extractor: Force downloader to use the generic extractor
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+ template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
'listformats': opts.listformats,
'listformats_table': opts.listformats_table,
'outtmpl': outtmpl,
+ 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
'paths': opts.paths,
'autonumber_size': opts.autonumber_size,
'autonumber_start': opts.autonumber_start,
'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
},
- 'playlist_mincount': 168,
+ 'playlist_mincount': 150,
}]
_RESOURCE = 'series'
_ITEMS_KEY = 'episodes'
from __future__ import unicode_literals
+import json
+import re
+
from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
_TESTS = [{
- 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+ 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
'info_dict': {
'id': '3792260579001',
'ext': 'mp4',
'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server',
}, {
- 'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+ 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
'only_matching': True,
}]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
- program_name = self._match_id(url)
- webpage = self._download_webpage(url, program_name)
- brightcove_id = self._search_regex(
- r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+ post_type, name = re.match(self._VALID_URL, url).groups()
+ post_type = {
+ 'features': 'post',
+ 'program': 'episode',
+ 'videos': 'video',
+ }[post_type.split('/')[0]]
+ video = self._download_json(
+ 'https://www.aljazeera.com/graphql', name, query={
+ 'operationName': 'SingleArticleQuery',
+ 'variables': json.dumps({
+ 'name': name,
+ 'postType': post_type,
+ }),
+ }, headers={
+ 'wp-site': 'aje',
+ })['data']['article']['video']
+ video_id = video['id']
+ account_id = video.get('accountId') or '665003303001'
+ player_id = video.get('playerId') or 'BkeSH5BDb'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+ 'BrightcoveNew', video_id)
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
+ int_or_none,
try_get,
unified_strdate,
+ unified_timestamp,
)
'ext': 'mp4',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
- 'timestamp': 1523664000,
- 'upload_date': '20180414',
+ 'timestamp': 1523318400,
+ 'upload_date': '20180410',
'release_date': '20180410',
'series': "America's Test Kitchen",
'season_number': 18,
'params': {
'skip_download': True,
},
+ }, {
+ # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+ 'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+ 'md5': '06451608c57651e985a498e69cec17e5',
+ 'info_dict': {
+ 'id': '5fbe8c61bda2010001c6763b',
+ 'title': 'Simple Chicken Dinner',
+ 'ext': 'mp4',
+ 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1610755200,
+ 'upload_date': '20210116',
+ 'release_date': '20210116',
+ 'series': "America's Test Kitchen",
+ 'season_number': 21,
+ 'episode': 'Simple Chicken Dinner',
+ 'episode_number': 3,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
+ 'timestamp': unified_timestamp(video.get('publishDate')),
'release_date': unified_strdate(video.get('publishDate')),
+ 'episode_number': int_or_none(episode.get('number')),
+ 'season_number': int_or_none(episode.get('season')),
'series': try_get(episode, lambda x: x['show']['title']),
'episode': episode.get('title'),
}
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+ _TESTS = [{
+ # ATK Season
+ 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+ 'info_dict': {
+ 'id': 'season_1',
+ 'title': 'Season 1',
+ },
+ 'playlist_count': 13,
+ }, {
+ # Cooks Country Season
+ 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+ 'info_dict': {
+ 'id': 'season_12',
+ 'title': 'Season 12',
+ },
+ 'playlist_count': 13,
+ }]
+
+ def _real_extract(self, url):
+ show_name, season_number = re.match(self._VALID_URL, url).groups()
+ season_number = int(season_number)
+
+ slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+
+ season = 'Season %d' % season_number
+
+ season_search = self._download_json(
+ 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+ season, headers={
+ 'Origin': 'https://www.%s.com' % show_name,
+ 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+ 'X-Algolia-Application-Id': 'Y1FNZXUI30',
+ }, query={
+ 'facetFilters': json.dumps([
+ 'search_season_list:' + season,
+ 'search_document_klass:episode',
+ 'search_show_slug:' + slug,
+ ]),
+ 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+ 'attributesToHighlight': '',
+ 'hitsPerPage': 1000,
+ })
+
+ def entries():
+ for episode in (season_search.get('hits') or []):
+ search_url = episode.get('search_url')
+ if not search_url:
+ continue
+ yield {
+ '_type': 'url',
+ 'url': 'https://www.%s.com%s' % (show_name, search_url),
+ 'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'timestamp': unified_timestamp(episode.get('search_document_date')),
+ 'season_number': season_number,
+ 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+ 'ie_key': AmericasTestKitchenIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), 'season_%d' % season_number, season)
import re
-from .common import InfoExtractor
+from .yahoo import YahooIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
-class AolIE(InfoExtractor):
+class AolIE(YahooIE):
IE_NAME = 'aol.com'
- _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
_TESTS = [{
# video with 5min ID
}, {
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
+ }, {
+ # Yahoo video
+ 'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if '-' in video_id:
+ return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
- title = self._html_search_regex(
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
- description = self._html_search_meta(
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
class ARDIE(InfoExtractor):
- _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+ _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
_TESTS = [{
- # available till 14.02.2019
- 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
- 'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
+ # available till 7.01.2022
+ 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
+ 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
'info_dict': {
- 'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
- 'id': '102',
+ 'display_id': 'maischberger-die-woche',
+ 'id': '100',
'ext': 'mp4',
- 'duration': 4435.0,
- 'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
- 'upload_date': '20180214',
+ 'duration': 3687.0,
+ 'title': 'maischberger. die woche vom 7. Januar 2021',
+ 'upload_date': '20210107',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
_TESTS = [{
- 'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
- 'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
+ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
+ 'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
'info_dict': {
'display_id': 'die-robuste-roswita',
- 'id': '70153354',
+ 'id': '78566716',
'title': 'Die robuste Roswita',
- 'description': r're:^Der Mord.*trüber ist als die Ilm.',
+ 'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
'duration': 5316,
- 'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
- 'timestamp': 1577047500,
- 'upload_date': '20191222',
+ 'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
+ 'timestamp': 1596658200,
+ 'upload_date': '20200805',
'ext': 'mp4',
},
}, {
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
-from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
- /(?P<title>.*)'''
+ _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
- 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
- 'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+ 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+ 'md5': 'b8acb347177c680ff18a292aa2166f80',
'info_dict': {
- 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+ 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
'ext': 'mp4',
- 'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
- 'description': 'After a certain point, breastfeeding becomes c**kblocking.',
- 'timestamp': 1376798400,
- 'upload_date': '20130818',
+ 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+ 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+ 'timestamp': 1598670000,
+ 'upload_date': '20200829',
},
}, {
- 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+ 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
'only_matching': True,
- }]
-
-
-class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (?:full-episodes|shows(?=/[^/]+/full-episodes))
- /(?P<id>[^?]+)'''
- _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
-
- _TESTS = [{
- 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
- 'info_dict': {
- 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
- 'title': 'November 28, 2016 - Ryan Speedo Green',
- },
- 'playlist_count': 4,
}, {
- 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
- videos_info = self._get_videos_info(mgid)
- return videos_info
-
-
-class ToshIE(MTVServicesInfoExtractor):
- IE_DESC = 'Tosh.0'
- _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
- _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-
- _TESTS = [{
- 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
- 'info_dict': {
- 'description': 'Tosh asked fans to share their summer plans.',
- 'title': 'Twitter Users Share Summer Plans',
- },
- 'playlist': [{
- 'md5': 'f269e88114c1805bb6d7653fecea9e06',
- 'info_dict': {
- 'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
- 'ext': 'mp4',
- 'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
- 'description': 'Tosh asked fans to share their summer plans.',
- 'thumbnail': r're:^https?://.*\.jpg',
- # It's really reported to be published on year 2077
- 'upload_date': '20770610',
- 'timestamp': 3390510600,
- 'subtitles': {
- 'en': 'mincount:3',
- },
- },
- }]
- }, {
- 'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
+ 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
'only_matching': True,
}]
class ComedyCentralTVIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
_TESTS = [{
- 'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+ 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
'info_dict': {
- 'id': 'local_playlist-f99b626bdfe13568579a',
- 'ext': 'flv',
- 'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
+ 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Josh Investigates',
+ 'description': 'Steht uns das Ende der Welt bevor?',
},
- }, {
- 'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
- 'only_matching': True,
- }, {
- 'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
- 'only_matching': True,
}]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- mrss_url = self._search_regex(
- r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'mrss url', group='url')
-
- return self._get_videos_info_from_url(mrss_url, video_id)
-
-
-class ComedyCentralShortnameIE(InfoExtractor):
- _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
- _TESTS = [{
- 'url': ':tds',
- 'only_matching': True,
- }, {
- 'url': ':thedailyshow',
- 'only_matching': True,
- }, {
- 'url': ':theopposition',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- shortcut_map = {
- 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['DE']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'web.cc.tv',
+ 'ep': 'b9032c3a',
+ 'imageEp': 'web.cc.tv',
+ 'mgid': uri,
}
- return self.url_result(shortcut_map[video_id])
AnimeLabIE,
AnimeLabShowsIE,
)
-from .americastestkitchen import AmericasTestKitchenIE
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
from .animeondemand import AnimeOnDemandIE
from .anvato import AnvatoIE
from .aol import AolIE
)
from .coub import CoubIE
from .comedycentral import (
- ComedyCentralFullEpisodesIE,
ComedyCentralIE,
- ComedyCentralShortnameIE,
ComedyCentralTVIE,
- ToshIE,
)
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
MildomVodIE,
MildomUserVodIE,
)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
-from .trovolive import TrovoLiveIE
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+)
from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
'info_dict': {
'id': 'rendez-vous-au-pays-des-geeks',
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
- 'timestamp': 1393642916,
+ 'timestamp': 1393700400,
'vcodec': 'none',
}
- }
+ }, {
+ # no thumbnail
+ 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
</h1>|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
).*?
- (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+ (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
''',
webpage, 'video data'))
- video_url = video_data['data-asset-source']
- title = video_data.get('data-asset-title') or self._og_search_title(webpage)
+ video_url = video_data.get('data-url') or video_data['data-asset-source']
+ title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
webpage, 'description', default=None)
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
+ webpage, 'thumbnail', default=None)
uploader = self._html_search_regex(
r'(?s)<span class="author">(.*?)</span>',
webpage, 'uploader', default=None)
'ext': ext,
'vcodec': 'none' if ext == 'mp3' else None,
'uploader': uploader,
- 'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+ 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
'duration': int_or_none(video_data.get('data-duration')),
}
import json
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
determine_ext,
ExtractorError,
}, {
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
+ display_id = compat_urllib_parse_unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
result_value = result['value']
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+)
+
+
+class MindsBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
+
+ def _call_api(self, path, video_id, resource, query=None):
+ api_url = 'https://www.minds.com/api/' + path
+ token = self._get_cookies(api_url).get('XSRF-TOKEN')
+ return self._download_json(
+ api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
+ 'Referer': 'https://www.minds.com/',
+ 'X-XSRF-TOKEN': token.value if token else '',
+ }, query=query)
+
+
+class MindsIE(MindsBaseIE):
+ IE_NAME = 'minds'
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.minds.com/media/100000000000086822',
+ 'md5': '215a658184a419764852239d4970b045',
+ 'info_dict': {
+ 'id': '100000000000086822',
+ 'ext': 'mp4',
+ 'title': 'Minds intro sequence',
+ 'thumbnail': r're:https?://.+\.png',
+ 'uploader_id': 'ottman',
+ 'upload_date': '20130524',
+ 'timestamp': 1369404826,
+ 'uploader': 'Bill Ottman',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['animation'],
+ 'comment_count': int,
+ 'license': 'attribution-cc',
+ },
+ }, {
+ # entity.type == 'activity' and empty title
+ 'url': 'https://www.minds.com/newsfeed/798025111988506624',
+ 'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
+ 'info_dict': {
+ 'id': '798022190320226304',
+ 'ext': 'mp4',
+ 'title': '798022190320226304',
+ 'uploader': 'ColinFlaherty',
+ 'upload_date': '20180111',
+ 'timestamp': 1515639316,
+ 'uploader_id': 'ColinFlaherty',
+ },
+ }, {
+ 'url': 'https://www.minds.com/archive/view/715172106794442752',
+ 'only_matching': True,
+ }, {
+ # youtube perma_url
+ 'url': 'https://www.minds.com/newsfeed/1197131838022602752',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ entity_id = self._match_id(url)
+ entity = self._call_api(
+ 'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
+ if entity.get('type') == 'activity':
+ if entity.get('custom_type') == 'video':
+ video_id = entity['entity_guid']
+ else:
+ return self.url_result(entity['perma_url'])
+ else:
+ assert(entity['subtype'] == 'video')
+ video_id = entity_id
+ # 1080p and webm formats available only on the sources array
+ video = self._call_api(
+ 'v2/media/video/' + video_id, video_id, 'video')
+
+ formats = []
+ for source in (video.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': source.get('label'),
+ 'height': int_or_none(source.get('size')),
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ entity = video.get('entity') or entity
+ owner = entity.get('ownerObj') or {}
+ uploader_id = owner.get('username')
+
+ tags = entity.get('tags')
+ if tags and isinstance(tags, compat_str):
+ tags = [tags]
+
+ thumbnail = None
+ poster = video.get('poster') or entity.get('thumbnail_src')
+ if poster:
+ urlh = self._request_webpage(poster, video_id, fatal=False)
+ if urlh:
+ thumbnail = urlh.geturl()
+
+ return {
+ 'id': video_id,
+ 'title': entity.get('title') or video_id,
+ 'formats': formats,
+ 'description': clean_html(entity.get('description')) or None,
+ 'license': str_or_none(entity.get('license')),
+ 'timestamp': int_or_none(entity.get('time_created')),
+ 'uploader': strip_or_none(owner.get('name')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
+ 'view_count': int_or_none(entity.get('play:count')),
+ 'like_count': int_or_none(entity.get('thumbs:up:count')),
+ 'dislike_count': int_or_none(entity.get('thumbs:down:count')),
+ 'tags': tags,
+ 'comment_count': int_or_none(entity.get('comments:count')),
+ 'thumbnail': thumbnail,
+ }
+
+
+class MindsFeedBaseIE(MindsBaseIE):
+ _PAGE_SIZE = 150
+
+ def _entries(self, feed_id):
+ query = {'limit': self._PAGE_SIZE, 'sync': 1}
+ i = 1
+ while True:
+ data = self._call_api(
+ 'v2/feeds/container/%s/videos' % feed_id,
+ feed_id, 'page %s' % i, query)
+ entities = data.get('entities') or []
+ for entity in entities:
+ guid = entity.get('guid')
+ if not guid:
+ continue
+ yield self.url_result(
+ 'https://www.minds.com/newsfeed/' + guid,
+ MindsIE.ie_key(), guid)
+ query['from_timestamp'] = data['load-next']
+ if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
+ break
+ i += 1
+
+ def _real_extract(self, url):
+ feed_id = self._match_id(url)
+ feed = self._call_api(
+ 'v1/%s/%s' % (self._FEED_PATH, feed_id),
+ feed_id, self._FEED_TYPE)[self._FEED_TYPE]
+
+ return self.playlist_result(
+ self._entries(feed['guid']), feed_id,
+ strip_or_none(feed.get('name')),
+ feed.get('briefdescription'))
+
+
+class MindsChannelIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'channel'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
+ _FEED_PATH = 'channel'
+ _TEST = {
+ 'url': 'https://www.minds.com/ottman',
+ 'info_dict': {
+ 'id': 'ottman',
+ 'title': 'Bill Ottman',
+ 'description': 'Co-creator & CEO @minds',
+ },
+ 'playlist_mincount': 54,
+ }
+
+
+class MindsGroupIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'group'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
+ _FEED_PATH = 'groups/group'
+ _TEST = {
+ 'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
+ 'info_dict': {
+ 'id': '785582576369672204',
+ 'title': 'Cooking Videos',
+ },
+ 'playlist_mincount': 1,
+ }
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+ @staticmethod
+ def _extract_child_with_type(parent, t):
+ return next(c for c in parent['children'] if c.get('type') == t)
+
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
if url == '':
return
if not mgid:
mgid = self._extract_triforce_mgid(webpage, data_zone)
+ if not mgid:
+ data = self._parse_json(self._search_regex(
+ r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+ main_container = self._extract_child_with_type(data, 'MainContainer')
+ video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+ mgid = video_player['props']['media']['video']['config']['uri']
+
return mgid
def _real_extract(self, url):
'only_matching': True,
}]
- @staticmethod
- def extract_child_with_type(parent, t):
- children = parent['children']
- return next(c for c in children if c.get('type') == t)
-
- def _extract_mgid(self, webpage):
- data = self._parse_json(self._search_regex(
- r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
- main_container = self.extract_child_with_type(data, 'MainContainer')
- video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
- return video_player['props']['media']['video']['config']['uri']
-
class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvjapan'
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import str_to_int
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ try_get,
+ url_or_none,
+)
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
- _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
+ _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
- _TESTS = [{
- 'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
- 'info_dict': {
- 'id': 'kXzwOKyGlSA',
- 'ext': 'mp4',
- 'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
- 'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
- 'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
- 'uploader': 'CompilationChannel',
- 'upload_date': '20131110',
- 'view_count': int,
- },
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'http://9gag.com/tv/p/aKolP3',
+ _TEST = {
+ 'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': {
- 'id': 'aKolP3',
+ 'id': 'ae5Ag7B',
'ext': 'mp4',
- 'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
- 'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
- 'uploader_id': 'rickmereki',
- 'uploader': 'Rick Mereki',
- 'upload_date': '20110803',
- 'view_count': int,
- },
- 'add_ie': ['Vimeo'],
- }, {
- 'url': 'http://9gag.com/tv/p/KklwM',
- 'only_matching': True,
- }, {
- 'url': 'http://9gag.tv/p/Kk2X5',
- 'only_matching': True,
- }, {
- 'url': 'http://9gag.com/tv/embed/a5Dmvl',
- 'only_matching': True,
- }]
-
- _EXTERNAL_VIDEO_PROVIDER = {
- '1': {
- 'url': '%s',
- 'ie_key': 'Youtube',
- },
- '2': {
- 'url': 'http://player.vimeo.com/video/%s',
- 'ie_key': 'Vimeo',
- },
- '3': {
- 'url': 'http://instagram.com/p/%s',
- 'ie_key': 'Instagram',
- },
- '4': {
- 'url': 'http://vine.co/v/%s',
- 'ie_key': 'Vine',
- },
+ 'title': 'Capybara Agility Training',
+ 'upload_date': '20191108',
+ 'timestamp': 1573237208,
+ 'categories': ['Awesome'],
+ 'tags': ['Weimaraner', 'American Pit Bull Terrier'],
+ 'duration': 44,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ }
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
+ post_id = self._match_id(url)
+ post = self._download_json(
+ 'https://9gag.com/v1/post', post_id, query={
+ 'id': post_id
+ })['data']['post']
+
+ if post.get('type') != 'Animated':
+ raise ExtractorError(
+ 'The given url does not contain a video',
+ expected=True)
+
+ title = post['title']
+
+ duration = None
+ formats = []
+ thumbnails = []
+ for key, image in (post.get('images') or {}).items():
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ ext = determine_ext(image_url)
+ image_id = key.strip('image')
+ common = {
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ }
+ if ext in ('jpg', 'png'):
+ webp_url = image.get('webpUrl')
+ if webp_url:
+ t = common.copy()
+ t.update({
+ 'id': image_id + '-webp',
+ 'url': webp_url,
+ })
+ thumbnails.append(t)
+ common.update({
+ 'id': image_id,
+ 'ext': ext,
+ })
+ thumbnails.append(common)
+ elif ext in ('webm', 'mp4'):
+ if not duration:
+ duration = int_or_none(image.get('duration'))
+ common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
+ for vcodec in ('vp8', 'vp9', 'h265'):
+ c_url = image.get(vcodec + 'Url')
+ if not c_url:
+ continue
+ c_f = common.copy()
+ c_f.update({
+ 'format_id': image_id + '-' + vcodec,
+ 'url': c_url,
+ 'vcodec': vcodec,
+ })
+ formats.append(c_f)
+ common.update({
+ 'ext': ext,
+ 'format_id': image_id,
+ })
+ formats.append(common)
+ self._sort_formats(formats)
- webpage = self._download_webpage(url, display_id)
+ section = try_get(post, lambda x: x['postSection']['name'])
- post_view = self._parse_json(
- self._search_regex(
- r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
- webpage, 'post view'),
- display_id)
+ tags = None
+ post_tags = post.get('tags')
+ if post_tags:
+ tags = []
+ for tag in post_tags:
+ tag_key = tag.get('key')
+ if not tag_key:
+ continue
+ tags.append(tag_key)
- ie_key = None
- source_url = post_view.get('sourceUrl')
- if not source_url:
- external_video_id = post_view['videoExternalId']
- external_video_provider = post_view['videoExternalProvider']
- source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
- ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
- title = post_view['title']
- description = post_view.get('description')
- view_count = str_to_int(post_view.get('externalView'))
- thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
+ get_count = lambda x: int_or_none(post.get(x + 'Count'))
return {
- '_type': 'url_transparent',
- 'url': source_url,
- 'ie_key': ie_key,
- 'id': video_id,
- 'display_id': display_id,
+ 'id': post_id,
'title': title,
- 'description': description,
- 'view_count': view_count,
- 'thumbnail': thumbnail,
+ 'timestamp': int_or_none(post.get('creationTs')),
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'like_count': get_count('upVote'),
+ 'dislike_count': get_count('downVote'),
+ 'comment_count': get_count('comments'),
+ 'age_limit': 18 if post.get('nsfw') == 1 else None,
+ 'categories': [section] if section else None,
+ 'tags': tags,
}
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- extract_attributes,
get_element_by_class,
urlencode_postdata,
)
class NJPWWorldIE(InfoExtractor):
- _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
+ _VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
IE_DESC = '新日本プロレスワールド'
_NETRC_MACHINE = 'njpwworld'
- _TEST = {
+ _TESTS = [{
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
'info_dict': {
'id': 's_series_00155_1_9',
'ext': 'mp4',
- 'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
+ 'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
'tags': list,
},
'params': {
'skip_download': True, # AES-encrypted m3u8
},
'skip': 'Requires login',
- }
+ }, {
+ 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
+ 'info_dict': {
+ 'id': 's_series_00563_16_bs',
+ 'ext': 'mp4',
+ 'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
+ 'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
webpage = self._download_webpage(url, video_id)
formats = []
- for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
- player = extract_attributes(mobj.group(0))
- player_path = player.get('href')
- if not player_path:
- continue
- kind = self._search_regex(
- r'(low|high)$', player.get('class') or '', 'kind',
- default='low')
+ for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
+ player_path = '/intent?id=%s&type=url' % vid
player_url = compat_urlparse.urljoin(url, player_path)
- player_page = self._download_webpage(
- player_url, video_id, note='Downloading player page')
- entries = self._parse_html5_media_entries(
- player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
- m3u8_entry_protocol='m3u8_native')
- kind_formats = entries[0]['formats']
- for f in kind_formats:
- f['quality'] = 2 if kind == 'high' else 1
- formats.extend(kind_formats)
+ formats.append({
+ 'url': player_url,
+ 'format_id': kind,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ 'quality': 2 if kind == 'high' else 1,
+ })
self._sort_formats(formats)
- post_content = get_element_by_class('post-content', webpage)
+ tag_block = get_element_by_class('tag-block', webpage)
tags = re.findall(
- r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
- ) if post_content else None
+ r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
+ ) if tag_block else None
return {
'id': video_id,
- 'title': self._og_search_title(webpage),
+ 'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
'formats': formats,
'tags': tags,
}
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
- def _extract_mgid(self, webpage, url):
- mgid = None
-
- if not mgid:
- mgid = self._extract_triforce_mgid(webpage)
-
- if not mgid:
- mgid = self._extract_new_triforce_mgid(webpage, url)
-
- return mgid
-
-# TODO Remove - Reason: Outdated Site
-
class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
def _get_feed_query(self, uri):
return {
'arcEp': 'paramountnetwork.com',
+ 'imageEp': 'paramountnetwork.com',
'mgid': uri,
}
-
- def _extract_mgid(self, webpage, url):
- root_data = self._parse_json(self._search_regex(
- r'window\.__DATA__\s*=\s*({.+})',
- webpage, 'data'), None)
-
- def find_sub_data(data, data_type):
- return next(c for c in data['children'] if c.get('type') == data_type)
-
- c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
- return c['props']['media']['video']['config']['uri']
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class SpotifyBaseIE(InfoExtractor):
+ _ACCESS_TOKEN = None
+ _OPERATION_HASHES = {
+ 'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
+ 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
+ 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
+ }
+ _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
+
+ def _real_initialize(self):
+ self._ACCESS_TOKEN = self._download_json(
+ 'https://open.spotify.com/get_access_token', None)['accessToken']
+
+ def _call_api(self, operation, video_id, variables):
+ return self._download_json(
+ 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
+ 'operationName': 'query' + operation,
+ 'variables': json.dumps(variables),
+ 'extensions': json.dumps({
+ 'persistedQuery': {
+ 'sha256Hash': self._OPERATION_HASHES[operation],
+ },
+ })
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+
+ def _extract_episode(self, episode, series):
+ episode_id = episode['id']
+ title = episode['name'].strip()
+
+ formats = []
+ audio_preview = episode.get('audioPreview') or {}
+ audio_preview_url = audio_preview.get('url')
+ if audio_preview_url:
+ f = {
+ 'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
+ 'vcodec': 'none',
+ }
+ audio_preview_format = audio_preview.get('format')
+ if audio_preview_format:
+ f['format_id'] = audio_preview_format
+ mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
+ if mobj:
+ f.update({
+ 'abr': int(mobj.group(2)),
+ 'ext': mobj.group(1).lower(),
+ })
+ formats.append(f)
+
+ for item in (try_get(episode, lambda x: x['audio']['items']) or []):
+ item_url = item.get('url')
+ if not (item_url and item.get('externallyHosted')):
+ continue
+ formats.append({
+ 'url': clean_podcast_url(item_url),
+ 'vcodec': 'none',
+ })
+
+ thumbnails = []
+ for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ thumbnails.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+
+ return {
+ 'id': episode_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': strip_or_none(episode.get('description')),
+ 'duration': float_or_none(try_get(
+ episode, lambda x: x['duration']['totalMilliseconds']), 1000),
+ 'release_date': unified_strdate(try_get(
+ episode, lambda x: x['releaseDate']['isoString'])),
+ 'series': series,
+ }
+
+
+class SpotifyIE(SpotifyBaseIE):
+ IE_NAME = 'spotify'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
+ _TEST = {
+ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
+ 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
+ 'info_dict': {
+ 'id': '4Z7GAJ50bgctf6uclHlWKo',
+ 'ext': 'mp3',
+ 'title': 'From the archive: Why time management is ruining our lives',
+ 'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
+ 'duration': 2083.605,
+ 'release_date': '20201217',
+ 'series': "The Guardian's Audio Long Reads",
+ }
+ }
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api('Episode', episode_id, {
+ 'uri': 'spotify:episode:' + episode_id
+ })['episode']
+ return self._extract_episode(
+ episode, try_get(episode, lambda x: x['podcast']['name']))
+
+
+class SpotifyShowIE(SpotifyBaseIE):
+ IE_NAME = 'spotify:show'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
+ _TEST = {
+ 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
+ 'info_dict': {
+ 'id': '4PM9Ke6l66IRNpottHKV9M',
+ 'title': 'The Story from the Guardian',
+ 'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
+ },
+ 'playlist_mincount': 36,
+ }
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ podcast = self._call_api('ShowEpisodes', show_id, {
+ 'limit': 1000000000,
+ 'offset': 0,
+ 'uri': 'spotify:show:' + show_id,
+ })['podcast']
+ podcast_name = podcast.get('name')
+
+ entries = []
+ for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
+ episode = item.get('episode')
+ if not episode:
+ continue
+ entries.append(self._extract_episode(episode, podcast_name))
+
+ return self.playlist_result(
+ entries, show_id, podcast_name, podcast.get('description'))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class TrovoBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
+
+ def _extract_streamer_info(self, data):
+ streamer_info = data.get('streamerInfo') or {}
+ username = streamer_info.get('userName')
+ return {
+ 'uploader': streamer_info.get('nickName'),
+ 'uploader_id': str_or_none(streamer_info.get('uid')),
+ 'uploader_url': 'https://trovo.live/' + username if username else None,
+ }
+
+
+class TrovoIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ live_info = self._download_json(
+ 'https://gql.trovo.live/', username, query={
+ 'query': '''{
+ getLiveInfo(params: {userName: "%s"}) {
+ isLive
+ programInfo {
+ coverUrl
+ id
+ streamInfo {
+ desc
+ playUrl
+ }
+ title
+ }
+ streamerInfo {
+ nickName
+ uid
+ userName
+ }
+ }
+}''' % username,
+ })['data']['getLiveInfo']
+ if live_info.get('isLive') == 0:
+ raise ExtractorError('%s is offline' % username, expected=True)
+ program_info = live_info['programInfo']
+ program_id = program_info['id']
+ title = self._live_title(program_info['title'])
+
+ formats = []
+ for stream_info in (program_info.get('streamInfo') or []):
+ play_url = stream_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = stream_info.get('desc')
+ formats.append({
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'url': play_url,
+ })
+ self._sort_formats(formats)
+
+ info = {
+ 'id': program_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': program_info.get('coverUrl'),
+ 'is_live': True,
+ }
+ info.update(self._extract_streamer_info(live_info))
+ return info
+
+
+class TrovoVodIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+ 'info_dict': {
+ 'id': 'ltv-100095501_100095501_1609596043',
+ 'ext': 'mp4',
+ 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
+ 'uploader': 'Exsl',
+ 'timestamp': 1609640305,
+ 'upload_date': '20210103',
+ 'uploader_id': '100095501',
+ 'duration': 43977,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': 'mincount:8',
+ 'categories': ['Grand Theft Auto V'],
+ },
+ }, {
+ 'url': 'https://trovo.live/clip/lc-5285890810184026005',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ vid = self._match_id(url)
+ resp = self._download_json(
+ 'https://gql.trovo.live/', vid, data=json.dumps([{
+ 'query': '''{
+ batchGetVodDetailInfo(params: {vids: ["%s"]}) {
+ VodDetailInfos
+ }
+}''' % vid,
+ }, {
+ 'query': '''{
+ getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
+ commentList {
+ author {
+ nickName
+ uid
+ }
+ commentID
+ content
+ createdAt
+ parentID
+ }
+ }
+}''' % vid,
+ }]).encode(), headers={
+ 'Content-Type': 'application/json',
+ })
+ vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
+ vod_info = vod_detail_info['vodInfo']
+ title = vod_info['title']
+
+ language = vod_info.get('languageName')
+ formats = []
+ for play_info in (vod_info.get('playInfos') or []):
+ play_url = play_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = play_info.get('desc')
+ formats.append({
+ 'ext': 'mp4',
+ 'filesize': int_or_none(play_info.get('fileSize')),
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'language': language,
+ 'protocol': 'm3u8_native',
+ 'tbr': int_or_none(play_info.get('bitrate')),
+ 'url': play_url,
+ })
+ self._sort_formats(formats)
+
+ category = vod_info.get('categoryName')
+ get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
+
+ comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
+ comments = []
+ for comment in comment_list:
+ content = comment.get('content')
+ if not content:
+ continue
+ author = comment.get('author') or {}
+ parent = comment.get('parentID')
+ comments.append({
+ 'author': author.get('nickName'),
+ 'author_id': str_or_none(author.get('uid')),
+ 'id': str_or_none(comment.get('commentID')),
+ 'text': content,
+ 'timestamp': int_or_none(comment.get('createdAt')),
+ 'parent': 'root' if parent == 0 else str_or_none(parent),
+ })
+
+ info = {
+ 'id': vid,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': vod_info.get('coverUrl'),
+ 'timestamp': int_or_none(vod_info.get('publishTs')),
+ 'duration': int_or_none(vod_info.get('duration')),
+ 'view_count': get_count('watch'),
+ 'like_count': get_count('like'),
+ 'comment_count': get_count('comment'),
+ 'comments': comments,
+ 'categories': [category] if category else None,
+ }
+ info.update(self._extract_streamer_info(vod_detail_info))
+ return info
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- ExtractorError,
unified_strdate,
HEADRequest,
int_or_none,
},
]
- _FORMATS = (
- (200, 416, 234),
- (400, 480, 270),
- (600, 640, 360),
- (1200, 640, 360),
- (1800, 960, 540),
- (2500, 1280, 720),
- )
-
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
return red_url
return None
- def remove_bitrate_limit(manifest_url):
- return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
-
formats = []
- try:
- alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
- manifest_urls = self._download_json(
- 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
- m3u8_url = manifest_urls.get('hls')
- if m3u8_url:
- m3u8_url = remove_bitrate_limit(m3u8_url)
- for m3u8_alt_url in alt_urls(m3u8_url):
- formats.extend(self._extract_m3u8_formats(
- m3u8_alt_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- mpd_url = manifest_urls.get('mpd')
- if mpd_url:
- mpd_url = remove_bitrate_limit(mpd_url)
- for mpd_alt_url in alt_urls(mpd_url):
- formats.extend(self._extract_mpd_formats(
- mpd_alt_url, video_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
- except ExtractorError:
- abr = 64
- for vbr, width, height in self._FORMATS:
- tbr = vbr + abr
- format_id = 'http-%s' % tbr
- fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
- if self._is_valid_url(fmt_url, video_id, format_id):
- formats.append({
- 'format_id': format_id,
- 'url': fmt_url,
- 'vbr': vbr,
- 'abr': abr,
- 'width': width,
- 'height': height,
- })
+ manifest_urls = self._download_json(
+ 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
+ m3u8_url = manifest_urls.get('hls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ mpd_url = manifest_urls.get('mpd')
+ if mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
+ video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
'only_matching': True,
}]
- def _real_extract(self, url):
- url, country, display_id = re.match(self._VALID_URL, url).groups()
- if not country:
- country = 'us'
- else:
- country = country.split('-')[0]
- api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
-
- for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
- content = self._download_json(
- api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
- display_id, 'Downloading content JSON metadata', fatal=i == 1)
- if content:
- item = content['items'][0]
- break
-
- if item.get('type') != 'video':
- entries = []
-
- cover = item.get('cover') or {}
- if cover.get('type') == 'yvideo':
- cover_url = cover.get('url')
- if cover_url:
- entries.append(self.url_result(
- cover_url, 'Yahoo', cover.get('uuid')))
-
- for e in item.get('body', []):
- if e.get('type') == 'videoIframe':
- iframe_url = e.get('url')
- if not iframe_url:
- continue
- entries.append(self.url_result(iframe_url))
-
- return self.playlist_result(
- entries, item.get('uuid'),
- item.get('title'), item.get('summary'))
-
- video_id = item['uuid']
+ def _extract_yahoo_video(self, video_id, country):
video = self._download_json(
- api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
+ 'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
video_id, 'Downloading video JSON metadata')[0]
title = video['title']
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
- 'display_id': display_id,
'thumbnails': thumbnails,
'description': clean_html(video.get('description')),
'timestamp': parse_iso8601(video.get('publish_time')),
'episode_number': int_or_none(series_info.get('episode_number')),
}
+ def _real_extract(self, url):
+ url, country, display_id = re.match(self._VALID_URL, url).groups()
+ if not country:
+ country = 'us'
+ else:
+ country = country.split('-')[0]
+
+ item = self._download_json(
+ 'https://%s.yahoo.com/caas/content/article' % country, display_id,
+ 'Downloading content JSON metadata', query={
+ 'url': url
+ })['items'][0]['data']['partnerData']
+
+ if item.get('type') != 'video':
+ entries = []
+
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+
+ for e in (item.get('body') or []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if not iframe_url:
+ continue
+ entries.append(self.url_result(iframe_url))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ info = self._extract_yahoo_video(item['uuid'], country)
+ info['display_id'] = display_id
+ return info
+
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'
'-o', '--output',
dest='outtmpl', metavar='TEMPLATE',
help='Output filename template, see "OUTPUT TEMPLATE" for details')
+ filesystem.add_option(
+ '--output-na-placeholder',
+ dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
+ help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER', type=int,
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
- help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+ help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')