if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
- if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
ember_data = self._parse_json(self._search_regex(
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
webpage, 'ember data'), episode_id)
+ ember_data = ember_data.get(episode_id) or ember_data
episode = ember_data['data']['attributes']
description = episode.get('description') or {}
'uploader': 'Ben Prunty',
'timestamp': 1396508491,
'upload_date': '20140403',
+ 'release_timestamp': 1396483200,
'release_date': '20140403',
'duration': 260.877,
'track': 'Lanius (Battle)',
'uploader': 'Mastodon',
'timestamp': 1322005399,
'upload_date': '20111122',
+ 'release_timestamp': 1076112000,
'release_date': '20040207',
'duration': 120.79,
'track': 'Hail to Fire',
'thumbnail': thumbnail,
'uploader': artist,
'timestamp': timestamp,
- 'release_date': unified_strdate(tralbum.get('album_release_date')),
+ 'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
'duration': duration,
'track': track,
'track_number': track_number,
anime_id = mobj.group('anime_id')
page_id = mobj.group('page')
webpage = self._download_webpage(url, video_id)
- headers = {
- 'Referer': url,
- 'Accept': '*/*'
- }
- headers.update(self.geo_verification_headers())
if 'anime/' not in url:
cid = self._search_regex(
if 'no_bangumi_tip' not in smuggled_data:
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+ headers = {
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
- headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
js = self._download_json(
'http://bangumi.bilibili.com/web_api/get_source', video_id,
data=urlencode_postdata({'episode_id': video_id}),
self._report_error(js)
cid = js['result']['cid']
+ headers = {
+ 'Accept': 'application/json',
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
+
entries = []
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
class CBSIE(CBSBaseIE):
- _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs\.com|paramountplus\.com)/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
+ _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
'only_matching': True,
}, {
- 'url': 'https://www.paramountplus.com/shows/star-trek-discovery/video/l5ANMH9wM7kxwV1qr4u1xn88XOhYMlZX/star-trek-discovery-the-vulcan-hello/',
+ 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
'only_matching': True,
}]
uploader: Full name of the video uploader.
license: License name the video is licensed under.
creator: The creator of the video.
+ release_timestamp: UNIX timestamp of the moment the video was released.
release_date: The date (YYYYMMDD) when the video was released.
- timestamp: UNIX timestamp of the moment the video became available.
+ timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
def _real_extract(self, url):
video_id = self._match_id(url)
formats = self._extract_m3u8_formats(
- self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
+ self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
for f in formats:
wh = self._BITRATE_MAP.get(f.get('tbr'))
if wh:
from .common import InfoExtractor
from ..compat import (
+ compat_parse_qs,
compat_str,
compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
'description': stream_value.get('description'),
'license': stream_value.get('license'),
'timestamp': int_or_none(stream.get('timestamp')),
+ 'release_timestamp': int_or_none(stream_value.get('release_time')),
'tags': stream_value.get('tags'),
'duration': int_or_none(media.get('duration')),
'channel': try_get(signing_channel, lambda x: x['value']['title']),
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
'timestamp': 1595694354,
'upload_date': '20200725',
+ 'release_timestamp': 1595340697,
+ 'release_date': '20200721',
'width': 1280,
'height': 720,
}
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
'timestamp': 1591312601,
'upload_date': '20200604',
+ 'release_timestamp': 1591312421,
+ 'release_date': '20200604',
'tags': list,
'duration': 2570,
'channel': 'The LBRY Foundation',
}]
_PAGE_SIZE = 50
- def _fetch_page(self, claim_id, url, page):
+ def _fetch_page(self, claim_id, url, params, page):
page += 1
+ page_params = {
+ 'channel_ids': [claim_id],
+ 'claim_type': 'stream',
+ 'no_totals': True,
+ 'page': page,
+ 'page_size': self._PAGE_SIZE,
+ }
+ page_params.update(params)
result = self._call_api_proxy(
- 'claim_search', claim_id, {
- 'channel_ids': [claim_id],
- 'claim_type': 'stream',
- 'no_totals': True,
- 'page': page,
- 'page_size': self._PAGE_SIZE,
- 'stream_types': self._SUPPORTED_STREAM_TYPES,
- }, 'page %d' % page)
+ 'claim_search', claim_id, page_params, 'page %d' % page)
for item in (result.get('items') or []):
stream_claim_name = item.get('name')
stream_claim_id = item.get('claim_id')
result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
claim_id = result['claim_id']
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ content = qs.get('content', [None])[0]
+ params = {
+ 'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+ 'order_by': {
+ 'new': ['release_time'],
+ 'top': ['effective_amount'],
+ 'trending': ['trending_group', 'trending_mixed'],
+ }[qs.get('order', ['new'])[0]],
+ 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+ }
+ duration = qs.get('duration', [None])[0]
+ if duration:
+ params['duration'] = {
+ 'long': '>=1200',
+ 'short': '<=240',
+ }[duration]
+ language = qs.get('language', ['all'])[0]
+ if language != 'all':
+ languages = [language]
+ if language == 'en':
+ languages.append('none')
+ params['any_languages'] = languages
entries = OnDemandPagedList(
- functools.partial(self._fetch_page, claim_id, url),
+ functools.partial(self._fetch_page, claim_id, url, params),
self._PAGE_SIZE)
result_value = result.get('value') or {}
return self.playlist_result(
else:
age_limit = None
+ webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
+
return {
'id': video_id,
'title': title,
'description': description,
- 'thumbnail': urljoin(url, video.get('thumbnailPath')),
+ 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
'timestamp': unified_timestamp(video.get('publishedAt')),
'uploader': account_data('displayName', compat_str),
'uploader_id': str_or_none(account_data('id', int)),
'tags': try_get(video, lambda x: x['tags'], list),
'categories': categories,
'formats': formats,
- 'subtitles': subtitles
+ 'subtitles': subtitles,
+ 'webpage_url': webpage_url,
}
title = (data.get('title') or data.get('grid_title') or video_id).strip()
+ urls = []
formats = []
duration = None
if extract_formats:
if not isinstance(format_dict, dict):
continue
format_url = url_or_none(format_dict.get('url'))
- if not format_url:
+ if not format_url or format_url in urls:
continue
+ urls.append(format_url)
duration = float_or_none(format_dict.get('duration'), scale=1000)
ext = determine_ext(format_url)
if 'hls' in format_id.lower() or ext == 'm3u8':
'params': {
'skip_download': True,
},
+ 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
}, {
# subtitles
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
webpage = dl_webpage('pc')
error_msg = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+ (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+ r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
webpage, 'error message', default=None, group='error')
if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg)
upload_date = None
formats = []
+
+ def add_format(format_url, height=None):
+ tbr = None
+ mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
+ if mobj:
+ if not height:
+ height = int(mobj.group('height'))
+ tbr = int(mobj.group('tbr'))
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ 'tbr': tbr,
+ })
+
for video_url, height in video_urls:
if not upload_date:
upload_date = self._search_regex(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
- tbr = None
- mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
- if mobj:
- if not height:
- height = int(mobj.group('height'))
- tbr = int(mobj.group('tbr'))
- formats.append({
- 'url': video_url,
- 'format_id': '%dp' % height if height else None,
- 'height': height,
- 'tbr': tbr,
- })
+ if '/video/get_media' in video_url:
+ medias = self._download_json(video_url, video_id, fatal=False)
+ if isinstance(medias, list):
+ for media in medias:
+ if not isinstance(media, dict):
+ continue
+ video_url = url_or_none(media.get('videoUrl'))
+ if not video_url:
+ continue
+ height = int_or_none(media.get('quality'))
+ add_format(video_url, height)
+ continue
+ add_format(video_url)
self._sort_formats(formats)
video_uploader = self._html_search_regex(
from __future__ import unicode_literals
import base64
+import io
import re
-import time
+import sys
from .common import InfoExtractor
from ..compat import (
determine_ext,
ExtractorError,
float_or_none,
+ qualities,
remove_end,
remove_start,
- sanitized_Request,
std_headers,
)
-
-def _decrypt_url(png):
- encrypted_data = compat_b64decode(png)
- text_index = encrypted_data.find(b'tEXt')
- text_chunk = encrypted_data[text_index - 4:]
- length = compat_struct_unpack('!I', text_chunk[:4])[0]
- # Use bytearray to get integers when iterating in both python 2.x and 3.x
- data = bytearray(text_chunk[8:8 + length])
- data = [chr(b) for b in data if b != 0]
- hash_index = data.index('#')
- alphabet_data = data[:hash_index]
- url_data = data[hash_index + 1:]
- if url_data[0] == 'H' and url_data[3] == '%':
- # remove useless HQ%% at the start
- url_data = url_data[4:]
-
- alphabet = []
- e = 0
- d = 0
- for l in alphabet_data:
- if d == 0:
- alphabet.append(l)
- d = e = (e + 1) % 4
- else:
- d -= 1
- url = ''
- f = 0
- e = 3
- b = 1
- for letter in url_data:
- if f == 0:
- l = int(letter) * 10
- f = 1
- else:
- if e == 0:
- l += int(letter)
- url += alphabet[l]
- e = (b + 3) % 4
- f = 0
- b += 1
- else:
- e -= 1
-
- return url
+_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
class RTVEALaCartaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
'duration': 5024.566,
+ 'series': 'Balonmano',
},
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, {
'note': 'Live stream',
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
'info_dict': {
'id': '1694255',
- 'ext': 'flv',
- 'title': 'TODO',
+ 'ext': 'mp4',
+ 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': 'live stream',
},
- 'skip': 'The f4m manifest can\'t be used yet',
}, {
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
- 'md5': 'e55e162379ad587e9640eda4f7353c0f',
+ 'md5': 'd850f3c8731ea53952ebab489cf81cbf',
'info_dict': {
'id': '4236788',
'ext': 'mp4',
- 'title': 'Servir y proteger - Capítulo 104 ',
+ 'title': 'Servir y proteger - Capítulo 104',
'duration': 3222.0,
},
- 'params': {
- 'skip_download': True, # requires ffmpeg
- },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True,
def _real_initialize(self):
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
- manager_info = self._download_json(
+ self._manager = self._download_json(
'http://www.rtve.es/odin/loki/' + user_agent_b64,
- None, 'Fetching manager info')
- self._manager = manager_info['manager']
+ None, 'Fetching manager info')['manager']
+
+ @staticmethod
+ def _decrypt_url(png):
+ encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
+ while True:
+ length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+ chunk_type = encrypted_data.read(4)
+ if chunk_type == b'IEND':
+ break
+ data = encrypted_data.read(length)
+ if chunk_type == b'tEXt':
+ alphabet_data, text = data.split(b'\0')
+ quality, url_data = text.split(b'%%')
+ alphabet = []
+ e = 0
+ d = 0
+ for l in _bytes_to_chr(alphabet_data):
+ if d == 0:
+ alphabet.append(l)
+ d = e = (e + 1) % 4
+ else:
+ d -= 1
+ url = ''
+ f = 0
+ e = 3
+ b = 1
+ for letter in _bytes_to_chr(url_data):
+ if f == 0:
+ l = int(letter) * 10
+ f = 1
+ else:
+ if e == 0:
+ l += int(letter)
+ url += alphabet[l]
+ e = (b + 3) % 4
+ f = 0
+ b += 1
+ else:
+ e -= 1
+
+ yield quality.decode(), url
+ encrypted_data.read(4) # CRC
+
+ def _extract_png_formats(self, video_id):
+ png = self._download_webpage(
+ 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
+ video_id, 'Downloading url information', query={'q': 'v2'})
+ q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+ formats = []
+ for quality, video_url in self._decrypt_url(png):
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, 'dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': quality,
+ 'quality': q(quality),
+ 'url': video_url,
+ })
+ self._sort_formats(formats)
+ return formats
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
- title = info['title']
- png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
- png_request = sanitized_Request(png_url)
- png_request.add_header('Referer', url)
- png = self._download_webpage(png_request, video_id, 'Downloading url information')
- video_url = _decrypt_url(png)
- ext = determine_ext(video_url)
-
- formats = []
- if not video_url.endswith('.f4m') and ext != 'm3u8':
- if '?' not in video_url:
- video_url = video_url.replace('resources/', 'auth/resources/')
- video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
-
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id='hds', fatal=False))
- else:
- formats.append({
- 'url': video_url,
- })
- self._sort_formats(formats)
+ title = info['title'].strip()
+ formats = self._extract_png_formats(video_id)
subtitles = None
- if info.get('sbtFile') is not None:
- subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+ sbt_file = info.get('sbtFile')
+ if sbt_file:
+ subtitles = self.extract_subtitles(video_id, sbt_file)
+
+ is_live = info.get('live') is True
return {
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'formats': formats,
'thumbnail': info.get('image'),
- 'page_url': url,
'subtitles': subtitles,
- 'duration': float_or_none(info.get('duration'), scale=1000),
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'is_live': is_live,
+ 'series': info.get('programTitle'),
}
def _get_subtitles(self, video_id, sub_file):
for s in subs)
-class RTVEInfantilIE(InfoExtractor):
+class RTVEInfantilIE(RTVEALaCartaIE):
IE_NAME = 'rtve.es:infantil'
IE_DESC = 'RTVE infantil'
- _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
_TESTS = [{
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
- 'md5': '915319587b33720b8e0357caaa6617e6',
+ 'md5': '5747454717aedf9f9fdf212d1bcfc48d',
'info_dict': {
'id': '3040283',
'ext': 'mp4',
'title': 'Maneras de vivir',
- 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+ 'thumbnail': r're:https?://.+/1426182947956\.JPG',
'duration': 357.958,
},
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- info = self._download_json(
- 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
- video_id)['page']['items'][0]
-
- webpage = self._download_webpage(url, video_id)
- vidplayer_id = self._search_regex(
- r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
- png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
- png = self._download_webpage(png_url, video_id, 'Downloading url information')
- video_url = _decrypt_url(png)
-
- return {
- 'id': video_id,
- 'ext': 'mp4',
- 'title': info['title'],
- 'url': video_url,
- 'thumbnail': info.get('image'),
- 'duration': float_or_none(info.get('duration'), scale=1000),
- }
-
-
-class RTVELiveIE(InfoExtractor):
+class RTVELiveIE(RTVEALaCartaIE):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
'info_dict': {
'id': 'la-1',
'ext': 'mp4',
- 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
+ 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
'skip_download': 'live stream',
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- start_time = time.gmtime()
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
title = remove_start(title, 'Estoy viendo ')
- title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
vidplayer_id = self._search_regex(
(r'playerId=player([0-9]+)',
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
r'data-id=["\'](\d+)'),
webpage, 'internal video ID')
- png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
- png = self._download_webpage(png_url, video_id, 'Downloading url information')
- m3u8_url = _decrypt_url(png)
- formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
- 'formats': formats,
+ 'title': self._live_title(title),
+ 'formats': self._extract_png_formats(vidplayer_id),
'is_live': True,
}
_NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
+ 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
'info_dict': {
- 'id': '275286',
+ 'id': '816924',
'ext': 'mp4',
- 'title': 'مجلس الشباب الموسم 1 كليب 1',
- 'timestamp': 1506988800,
- 'upload_date': '20171003',
+ 'title': 'متحف الدحيح الموسم 1 كليب 1',
+ 'timestamp': 1602806400,
+ 'upload_date': '20201016',
+ 'description': 'برومو',
+ 'duration': 22,
+ 'categories': ['كوميديا'],
},
'params': {
# m3u8 download
page_type = 'episode'
playout = self._call_api(
- 'playout/url/' + video_id, video_id)['playout']
+ 'playout/new/url/' + video_id, video_id)['playout']
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
- formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
+ formats = self._extract_m3u8_formats(re.sub(
+ # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
+ r'aws\.manifestfilter=[\w:;,-]+&?',
+ '', playout['url']), video_id, 'mp4')
self._sort_formats(formats)
# video = self._call_api(
class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southpark.cc.com'
- _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
- _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
_TESTS = [{
'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
}, {
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
'only_matching': True,
+ }, {
+ 'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
+ 'only_matching': True,
}]
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'shared.southpark.global',
+ 'ep': '90877963',
+ 'imageEp': 'shared.southpark.global',
+ 'mgid': uri,
+ }
+
class SouthParkEsIE(SouthParkIE):
IE_NAME = 'southpark.cc.com:español'
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
parse_iso8601,
- sanitized_Request,
+ strip_or_none,
+ try_get,
)
class SportDeutschlandIE(InfoExtractor):
- _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+ _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
_TESTS = [{
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
'info_dict': {
- 'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
+ 'id': '5318cac0275701382770543d7edaf0a0',
'ext': 'mp4',
- 'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
- 'categories': ['Badminton-Deutschland'],
- 'view_count': int,
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
- 'timestamp': int,
- 'upload_date': '20200201',
- 'description': 're:.*', # meaningless description for THIS video
+ 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
+ 'duration': 16106.36,
},
+ 'params': {
+ 'noplaylist': True,
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+ 'info_dict': {
+ 'id': 'c6e2fdd01f63013854c47054d2ab776f',
+ 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
+ 'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
+ 'duration': 31397,
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- sport_id = mobj.group('sport')
-
- api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
- sport_id, video_id)
- req = sanitized_Request(api_url, headers={
- 'Accept': 'application/vnd.vidibus.v2.html+json',
- 'Referer': url,
- })
- data = self._download_json(req, video_id)
-
+ display_id = self._match_id(url)
+ data = self._download_json(
+ 'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
+ display_id, query={'access_token': 'true'})
asset = data['asset']
- categories = [data['section']['title']]
-
- formats = []
- smil_url = asset['video']
- if '.smil' in smil_url:
- m3u8_url = smil_url.replace('.smil', '.m3u8')
- formats.extend(
- self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
+ title = (asset.get('title') or asset['label']).strip()
+ asset_id = asset.get('id') or asset.get('uuid')
+ info = {
+ 'id': asset_id,
+ 'title': title,
+ 'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
+ 'duration': int_or_none(asset.get('seconds')),
+ }
+ videos = asset.get('videos') or []
+ if len(videos) > 1:
+ playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
+ if playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ videos = [videos[int(playlist_id)]]
+ self.to_screen('Downloading just a single video because of --no-playlist')
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
- smil_doc = self._download_xml(
- smil_url, video_id, note='Downloading SMIL metadata')
- base_url_el = smil_doc.find('./head/meta')
- if base_url_el:
- base_url = base_url_el.attrib['base']
- formats.extend([{
- 'format_id': 'rmtp',
- 'url': base_url if base_url_el else n.attrib['src'],
- 'play_path': n.attrib['src'],
- 'ext': 'flv',
- 'preference': -100,
- 'format_note': 'Seems to fail at example stream',
- } for n in smil_doc.findall('./body/video')])
+ def entries():
+ for i, video in enumerate(videos, 1):
+ video_id = video.get('uuid')
+ video_url = video.get('url')
+ if not (video_id and video_url):
+ continue
+ formats = self._extract_m3u8_formats(
+ video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
+ if not formats:
+ continue
+ yield {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
+ 'duration': float_or_none(video.get('duration')),
+ }
+ info.update({
+ '_type': 'multi_video',
+ 'entries': entries(),
+ })
else:
- formats.append({'url': smil_url})
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': asset['title'],
- 'thumbnail': asset.get('image'),
- 'description': asset.get('teaser'),
- 'duration': asset.get('duration'),
- 'categories': categories,
- 'view_count': asset.get('views'),
- 'rtmp_live': asset.get('live'),
- 'timestamp': parse_iso8601(asset.get('date')),
- }
+ formats = self._extract_m3u8_formats(
+ videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
+ section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
+ info.update({
+ 'formats': formats,
+ 'display_id': asset.get('permalink'),
+ 'thumbnail': try_get(asset, lambda x: x['images'][0]),
+ 'categories': [section_title] if section_title else None,
+ 'view_count': int_or_none(asset.get('views')),
+ 'is_live': asset.get('is_live') is True,
+ 'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
+ })
+ return info
int_or_none,
remove_start,
smuggle_url,
+ strip_or_none,
try_get,
)
}, {
'url': 'https://tver.jp/episode/79622438',
'only_matching': True,
+ }, {
+ # subtitle = ' '
+ 'url': 'https://tver.jp/corner/f0068870',
+ 'only_matching': True,
}]
_TOKEN = None
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
}
if service == 'cx':
+ title = main['title']
+ subtitle = strip_or_none(main.get('subtitle'))
+ if subtitle:
+ title += ' - ' + subtitle
info.update({
- 'title': main.get('subtitle') or main['title'],
+ 'title': title,
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
'ie_key': 'FujiTVFODPlus7',
})
from ..utils import (
ExtractorError,
int_or_none,
+ try_get,
+ unified_timestamp,
)
setup = self._parse_json(self._search_regex(
r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
- video_data = setup.get('video') or {}
+ player_setup = setup.get('player_setup') or setup
+ video_data = player_setup.get('video') or {}
+ formatted_metadata = video_data.get('formatted_metadata') or {}
info = {
'id': video_id,
- 'title': video_data.get('title_short'),
+ 'title': player_setup.get('title') or video_data.get('title_short'),
'description': video_data.get('description_long') or video_data.get('description_short'),
- 'thumbnail': video_data.get('brightcove_thumbnail')
+ 'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
+ 'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
}
- asset = setup.get('asset') or setup.get('params') or {}
+ asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}
formats = []
hls_url = asset.get('hls_url')
if formats:
self._sort_formats(formats)
info['formats'] = formats
+ info['duration'] = int_or_none(asset.get('duration'))
return info
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
}, {
# Volume embed, Youtube
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
- 'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
+ 'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
'info_dict': {
'id': 'Gy8Md3Eky38',
'ext': 'mp4',
'uploader_id': 'TheVerge',
'upload_date': '20141021',
'uploader': 'The Verge',
+ 'timestamp': 1413907200,
},
'add_ie': ['Youtube'],
'skip': 'similar to the previous test',
# Volume embed, Youtube
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
'info_dict': {
- 'id': 'YCjDnX-Xzhg',
+ 'id': '22986359b',
'ext': 'mp4',
'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
- 'uploader_id': 'voxdotcom',
'upload_date': '20150915',
- 'uploader': 'Vox',
+ 'timestamp': 1442332800,
+ 'duration': 285,
},
'add_ie': ['Youtube'],
'skip': 'similar to the previous test',
'ext': 'mp4',
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+ 'timestamp': 1402938000,
+ 'upload_date': '20140616',
+ 'duration': 4114,
},
'add_ie': ['VoxMediaVolume'],
}]