except ImportError: # Python 2
import Cookie as compat_cookies
+if sys.version_info[0] == 2:
+ class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+ def load(self, rawdata):
+ if isinstance(rawdata, compat_str):
+ rawdata = str(rawdata)
+ return super(compat_cookies_SimpleCookie, self).load(rawdata)
+else:
+ compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+
try:
import html.entities as compat_html_entities
except ImportError: # Python 2
'compat_cookiejar',
'compat_cookiejar_Cookie',
'compat_cookies',
+ 'compat_cookies_SimpleCookie',
'compat_ctypes_WINFUNCTYPE',
'compat_etree_Element',
'compat_etree_fromstring',
from __future__ import unicode_literals
-from .cbs import CBSBaseIE
+import re
+# from .cbs import CBSBaseIE
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+)
-class CBSSportsIE(CBSBaseIE):
- _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
+# class CBSSportsEmbedIE(CBSBaseIE):
+class CBSSportsEmbedIE(InfoExtractor):
+ IE_NAME = 'cbssports:embed'
+ _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
+ (?:
+ ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
+ pcid%3D(?P<pcid>\d+)
+ )'''
_TESTS = [{
- 'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
- 'info_dict': {
- 'id': '1214315075735',
- 'ext': 'mp4',
- 'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
- 'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
- 'timestamp': 1524111457,
- 'upload_date': '20180419',
- 'uploader': 'CBSI-NEW',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
+ 'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
+ 'only_matching': True,
}, {
- 'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
+ 'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
'only_matching': True,
}]
- def _extract_video_info(self, filter_query, video_id):
- return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+ # def _extract_video_info(self, filter_query, video_id):
+ # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+ def _real_extract(self, url):
+ uuid, pcid = re.match(self._VALID_URL, url).groups()
+ query = {'id': uuid} if uuid else {'pcid': pcid}
+ video = self._download_json(
+ 'https://www.cbssports.com/api/content/video/',
+ uuid or pcid, query=query)[0]
+ video_id = video['id']
+ title = video['title']
+ metadata = video.get('metaData') or {}
+ # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
+ # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
+
+ formats = self._extract_m3u8_formats(
+ metadata['files'][0]['url'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ self._sort_formats(formats)
+
+ image = video.get('image')
+ thumbnails = None
+ if image:
+ image_path = image.get('path')
+ if image_path:
+ thumbnails = [{
+ 'url': image_path,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ 'filesize': int_or_none(image.get('size')),
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
+ 'duration': int_or_none(metadata.get('duration')),
+ }
+
+
+class CBSSportsBaseIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
- webpage, 'video id')
- return self._extract_video_info('byId=%s' % video_id, video_id)
+ iframe_url = self._search_regex(
+ r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
+ webpage, 'embed url')
+ return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
+
+
+class CBSSportsIE(CBSSportsBaseIE):
+ IE_NAME = 'cbssports'
+ _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
+ 'info_dict': {
+ 'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
+ 'ext': 'mp4',
+ 'title': 'Cover 3: Stanford Spring Gleaning',
+ 'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
+ 'timestamp': 1617218398,
+ 'upload_date': '20210331',
+ 'duration': 502,
+ },
+ }]
+
+
+class TwentyFourSevenSportsIE(CBSSportsBaseIE):
+ IE_NAME = '247sports'
+ _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
+ 'info_dict': {
+ 'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
+ 'ext': 'mp4',
+ 'title': '2021 QB Jake Garcia senior highlights through five games',
+ 'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
+ 'timestamp': 1607114223,
+ 'upload_date': '20201204',
+ 'duration': 208,
+ },
+ }]
from ..compat import (
compat_cookiejar_Cookie,
- compat_cookies,
+ compat_cookies_SimpleCookie,
compat_etree_Element,
compat_etree_fromstring,
compat_getpass,
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
+ author = e.get('author')
info.update({
'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
- 'uploader': str_or_none(e.get('author')),
+ # author can be an instance of 'Organization' or 'Person' types.
+ # both types can have 'name' property(inherited from 'Thing' type). [1]
+ # however some websites are using 'Text' type instead.
+ # 1. https://schema.org/VideoObject
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
'filesize': float_or_none(e.get('contentSize')),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
self._downloader.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
- """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+ """ Return a compat_cookies_SimpleCookie with the cookies for the url """
req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req)
- cookie = req.get_header('Cookie')
- if cookie and sys.version_info[0] == 2:
- cookie = str(cookie)
- return compat_cookies.SimpleCookie(cookie)
+ return compat_cookies_SimpleCookie(req.get_header('Cookie'))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
- def _call_api(self, path, video_id):
+ def _call_api(self, path, video_id, query=None):
headers = {}
if self._auth_token:
headers['X-Auth-Token'] = self._auth_token
result = self._download_json(
- self._API_BASE_URL + path, video_id, headers=headers)
+ self._API_BASE_URL + path, video_id, headers=headers, query=query)
self._handle_errors(result)
return result['data']
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://app.curiositystream.com/video/2',
- 'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': {
'id': '2',
'ext': 'mp4',
'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
- }
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
video_id = self._match_id(url)
- media = self._call_api('media/' + video_id, video_id)
- title = media['title']
formats = []
- for encoding in media.get('encodings', []):
- m3u8_url = encoding.get('master_playlist_url')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- encoding_url = encoding.get('url')
- file_url = encoding.get('file_url')
- if not encoding_url and not file_url:
- continue
- f = {
- 'width': int_or_none(encoding.get('width')),
- 'height': int_or_none(encoding.get('height')),
- 'vbr': int_or_none(encoding.get('video_bitrate')),
- 'abr': int_or_none(encoding.get('audio_bitrate')),
- 'filesize': int_or_none(encoding.get('size_in_bytes')),
- 'vcodec': encoding.get('video_codec'),
- 'acodec': encoding.get('audio_codec'),
- 'container': encoding.get('container_type'),
- }
- for f_url in (encoding_url, file_url):
- if not f_url:
+ for encoding_format in ('m3u8', 'mpd'):
+ media = self._call_api('media/' + video_id, video_id, query={
+ 'encodingsNew': 'true',
+ 'encodingsFormat': encoding_format,
+ })
+ for encoding in media.get('encodings', []):
+ playlist_url = encoding.get('master_playlist_url')
+ if encoding_format == 'm3u8':
+ # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+ formats.extend(self._extract_m3u8_formats(
+ playlist_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif encoding_format == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ playlist_url, video_id, mpd_id='dash', fatal=False))
+ encoding_url = encoding.get('url')
+ file_url = encoding.get('file_url')
+ if not encoding_url and not file_url:
continue
- fmt = f.copy()
- rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
- if rtmp:
- fmt.update({
- 'url': rtmp.group('url'),
- 'play_path': rtmp.group('playpath'),
- 'app': rtmp.group('app'),
- 'ext': 'flv',
- 'format_id': 'rtmp',
- })
- else:
- fmt.update({
- 'url': f_url,
- 'format_id': 'http',
- })
- formats.append(fmt)
+ f = {
+ 'width': int_or_none(encoding.get('width')),
+ 'height': int_or_none(encoding.get('height')),
+ 'vbr': int_or_none(encoding.get('video_bitrate')),
+ 'abr': int_or_none(encoding.get('audio_bitrate')),
+ 'filesize': int_or_none(encoding.get('size_in_bytes')),
+ 'vcodec': encoding.get('video_codec'),
+ 'acodec': encoding.get('audio_codec'),
+ 'container': encoding.get('container_type'),
+ }
+ for f_url in (encoding_url, file_url):
+ if not f_url:
+ continue
+ fmt = f.copy()
+ rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ })
+ else:
+ fmt.update({
+ 'url': f_url,
+ 'format_id': 'http',
+ })
+ formats.append(fmt)
self._sort_formats(formats)
+ title = media['title']
+
subtitles = {}
for closed_caption in media.get('closed_captions', []):
sub_url = closed_caption.get('file')
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
- 'playlist_mincount': 17,
+ 'playlist_mincount': 16,
}, {
'url': 'https://curiositystream.com/series/2',
'only_matching': True,
CBSNewsIE,
CBSNewsLiveVideoIE,
)
-from .cbssports import CBSSportsIE
+from .cbssports import (
+ CBSSportsEmbedIE,
+ CBSSportsIE,
+ TwentyFourSevenSportsIE,
+)
from .ccc import (
CCCIE,
CCCPlaylistIE,
LimelightChannelIE,
LimelightChannelListIE,
)
-from .line import LineTVIE
+from .line import (
+ LineTVIE,
+ LineLiveIE,
+ LineLiveChannelIE,
+)
from .linkedin import (
LinkedInLearningIE,
LinkedInLearningCourseIE,
MangomoloLiveIE,
)
from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
from .markiza import (
MarkizaIE,
MarkizaPageIE,
'id': '196219',
'display_id': 'stories-from-emona-i',
'ext': 'flac',
- 'title': 'Maya Filipič - Stories from Emona I',
- 'artist': 'Maya Filipič',
+ # 'title': 'Maya Filipič - Stories from Emona I',
+ 'title': 'Stories from Emona I',
+ # 'artist': 'Maya Filipič',
'track': 'Stories from Emona I',
'duration': 210,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1217438117,
'upload_date': '20080730',
+ 'license': 'by-nc-nd',
+ 'view_count': int,
+ 'like_count': int,
+ 'average_rating': int,
+ 'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
}
}, {
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
'only_matching': True,
}]
+ def _call_api(self, resource, resource_id):
+ path = '/api/%ss' % resource
+ rand = compat_str(random.random())
+ return self._download_json(
+ 'https://www.jamendo.com' + path, resource_id, query={
+ 'id[]': resource_id,
+ }, headers={
+ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+ })[0]
+
def _real_extract(self, url):
track_id, display_id = self._VALID_URL_RE.match(url).groups()
- webpage = self._download_webpage(
- 'https://www.jamendo.com/track/' + track_id, track_id)
- models = self._parse_json(self._html_search_regex(
- r"data-bundled-models='([^']+)",
- webpage, 'bundled models'), track_id)
- track = models['track']['models'][0]
+ # webpage = self._download_webpage(
+ # 'https://www.jamendo.com/track/' + track_id, track_id)
+ # models = self._parse_json(self._html_search_regex(
+ # r"data-bundled-models='([^']+)",
+ # webpage, 'bundled models'), track_id)
+ # track = models['track']['models'][0]
+ track = self._call_api('track', track_id)
title = track_name = track['name']
- get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
- artist = get_model('artist')
- artist_name = artist.get('name')
- if artist_name:
- title = '%s - %s' % (artist_name, title)
- album = get_model('album')
+ # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+ # artist = get_model('artist')
+ # artist_name = artist.get('name')
+ # if artist_name:
+ # title = '%s - %s' % (artist_name, title)
+ # album = get_model('album')
formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
urls = []
thumbnails = []
- for _, covers in track.get('cover', {}).items():
+ for covers in (track.get('cover') or {}).values():
for cover_id, cover_url in covers.items():
if not cover_url or cover_url in urls:
continue
})
tags = []
- for tag in track.get('tags', []):
+ for tag in (track.get('tags') or []):
tag_name = tag.get('name')
if not tag_name:
continue
tags.append(tag_name)
stats = track.get('stats') or {}
+ license = track.get('licenseCC') or []
return {
'id': track_id,
'title': title,
'description': track.get('description'),
'duration': int_or_none(track.get('duration')),
- 'artist': artist_name,
+ # 'artist': artist_name,
'track': track_name,
- 'album': album.get('name'),
+ # 'album': album.get('name'),
'formats': formats,
- 'license': '-'.join(track.get('licenseCC', [])) or None,
+ 'license': '-'.join(license) if license else None,
'timestamp': int_or_none(track.get('dateCreated')),
'view_count': int_or_none(stats.get('listenedAll')),
'like_count': int_or_none(stats.get('favorited')),
}
-class JamendoAlbumIE(InfoExtractor):
+class JamendoAlbumIE(JamendoIE):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
'info_dict': {
'id': '121486',
'params': {
'playlistend': 2
}
- }
-
- def _call_api(self, resource, resource_id):
- path = '/api/%ss' % resource
- rand = compat_str(random.random())
- return self._download_json(
- 'https://www.jamendo.com' + path, resource_id, query={
- 'id[]': resource_id,
- }, headers={
- 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
- })[0]
+ }]
def _real_extract(self, url):
album_id = self._match_id(url)
album_name = album.get('name')
entries = []
- for track in album.get('tracks', []):
+ for track in (album.get('tracks') or []):
track_id = track.get('id')
if not track_id:
continue
import re
from .common import InfoExtractor
-from ..utils import js_to_json
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+)
class LineTVIE(InfoExtractor):
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
'view_count': video_info.get('meta', {}).get('count'),
}
+
+
+class LineLiveBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
+
+ def _parse_broadcast_item(self, item):
+ broadcast_id = compat_str(item['id'])
+ title = item['title']
+ is_live = item.get('isBroadcastingNow')
+
+ thumbnails = []
+ for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ })
+
+ channel = item.get('channel') or {}
+ channel_id = str_or_none(channel.get('id'))
+
+ return {
+ 'id': broadcast_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'thumbnails': thumbnails,
+ 'timestamp': int_or_none(item.get('createdAt')),
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
+ 'duration': int_or_none(item.get('archiveDuration')),
+ 'view_count': int_or_none(item.get('viewerCount')),
+ 'comment_count': int_or_none(item.get('chatCount')),
+ 'is_live': is_live,
+ }
+
+
+class LineLiveIE(LineLiveBaseIE):
+ _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
+ 'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
+ 'info_dict': {
+ 'id': '16331360',
+ 'title': '振りコピ講座😙😙😙',
+ 'ext': 'mp4',
+ 'timestamp': 1617095132,
+ 'upload_date': '20210330',
+ 'channel': '白川ゆめか',
+ 'channel_id': '4867368',
+ 'view_count': int,
+ 'comment_count': int,
+ 'is_live': False,
+ }
+ }, {
+ # archiveStatus == 'DELETED'
+ 'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
+ broadcast = self._download_json(
+ self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
+ broadcast_id)
+ item = broadcast['item']
+ info = self._parse_broadcast_item(item)
+ protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
+ formats = []
+ for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
+ if not v:
+ continue
+ if k == 'abr':
+ formats.extend(self._extract_m3u8_formats(
+ v, broadcast_id, 'mp4', protocol,
+ m3u8_id='hls', fatal=False))
+ continue
+ f = {
+ 'ext': 'mp4',
+ 'format_id': 'hls-' + k,
+ 'protocol': protocol,
+ 'url': v,
+ }
+ if not k.isdigit():
+ f['vcodec'] = 'none'
+ formats.append(f)
+ if not formats:
+ archive_status = item.get('archiveStatus')
+ if archive_status != 'ARCHIVED':
+ raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
+ self._sort_formats(formats)
+ info['formats'] = formats
+ return info
+
+
+class LineLiveChannelIE(LineLiveBaseIE):
+ _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
+ _TEST = {
+ 'url': 'https://live.line.me/channels/5893542',
+ 'info_dict': {
+ 'id': '5893542',
+ 'title': 'いくらちゃん',
+ 'description': 'md5:c3a4af801f43b2fac0b02294976580be',
+ },
+ 'playlist_mincount': 29
+ }
+
+ def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
+ while True:
+ for row in (archived_broadcasts.get('rows') or []):
+ share_url = str_or_none(row.get('shareURL'))
+ if not share_url:
+ continue
+ info = self._parse_broadcast_item(row)
+ info.update({
+ '_type': 'url',
+ 'url': share_url,
+ 'ie_key': LineLiveIE.ie_key(),
+ })
+ yield info
+ if not archived_broadcasts.get('hasNextPage'):
+ return
+ archived_broadcasts = self._download_json(
+ self._API_BASE_URL + channel_id + '/archived_broadcasts',
+ channel_id, query={
+ 'lastId': info['id'],
+ })
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
+ return self.playlist_result(
+ self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
+ channel_id, channel.get('title'), channel.get('information'))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MaoriTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
+ 'md5': '5ade8ef53851b6a132c051b1cd858899',
+ 'info_dict': {
+ 'id': '4774724855001',
+ 'ext': 'mp4',
+ 'title': 'Kōrero Mai, Series 1 Episode 54',
+ 'upload_date': '20160226',
+ 'timestamp': 1456455018,
+ 'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
+ 'uploader_id': '1614493167001',
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ brightcove_id = self._search_regex(
+ r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'BrightcoveNew', brightcove_id)
@staticmethod
def _extract_child_with_type(parent, t):
- return next(c for c in parent['children'] if c.get('type') == t)
+ for c in parent['children']:
+ if c.get('type') == t:
+ return c
def _extract_mgid(self, webpage):
try:
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self._extract_child_with_type(data, 'MainContainer')
- video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+ ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
+ video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
mgid = video_player['props']['media']['video']['config']['uri']
return mgid
formats = []
def add_format(format_url, height=None):
+ ext = determine_ext(format_url)
+ if ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ return
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ return
tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
if mobj:
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date:
upload_date = upload_date.replace('/', '')
- ext = determine_ext(video_url)
- if ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id='dash', fatal=False))
- continue
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- continue
if '/video/get_media' in video_url:
medias = self._download_json(video_url, video_id, fatal=False)
if isinstance(medias, list):
# request basic data
basic_data_params = {
'vid': video_id,
- 'ccode': '0590',
+ 'ccode': '0532',
'client_ip': '192.168.1.1',
'utid': cna,
'client_ts': time.time() / 1000,
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
'only_matching': True,
},
+ {
+ # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
+ 'url': 'cBvYw8_A0vQ',
+ 'info_dict': {
+ 'id': 'cBvYw8_A0vQ',
+ 'ext': 'mp4',
+ 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
+ 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
+ 'upload_date': '20201120',
+ 'uploader': 'Walk around Japan',
+ 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
def __init__(self, *args, **kwargs):
def get_text(x):
if not x:
return
- return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
+ text = x.get('simpleText')
+ if text and isinstance(text, compat_str):
+ return text
+ runs = x.get('runs')
+ if not isinstance(runs, list):
+ return
+ return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
search_meta = (
lambda x: self._html_search_meta(x, webpage, default=None)) \