(r'player\.load[^;]+src:\s*["\']([^"\']+)',
r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
- r'data-id="([^"]+)"'),
+ r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
webpage, 'video id')
return self._make_url_result(video_id)
)
from ..utils import (
ExtractorError,
+ float_or_none,
get_element_by_attribute,
int_or_none,
lowercase_escape,
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 0,
'timestamp': 1371748545,
'upload_date': '20130620',
'uploader_id': 'naomipq',
'ext': 'mp4',
'title': 'Video by britneyspears',
'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 0,
'timestamp': 1453760977,
'upload_date': '20160125',
'uploader_id': 'britneyspears',
'title': 'Post by instagram',
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
},
+ }, {
+ # IGTV
+ 'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
+ 'info_dict': {
+ 'id': 'BkfuX9UB-eK',
+ 'ext': 'mp4',
+ 'title': 'Fingerboarding Tricks with @cass.fb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 53.83,
+ 'timestamp': 1530032919,
+ 'upload_date': '20180626',
+ 'uploader_id': 'instagram',
+ 'uploader': 'Instagram',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ 'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
+ }
}, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption')
+ title = media.get('title')
thumbnail = media.get('display_src') or media.get('display_url')
+ duration = float_or_none(media.get('video_duration'))
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username')
continue
entries.append({
'id': node.get('shortcode') or node['id'],
- 'title': 'Video %d' % edge_num,
+ 'title': node.get('title') or 'Video %d' % edge_num,
'url': node_video_url,
'thumbnail': node.get('display_url'),
+ 'duration': float_or_none(node.get('video_duration')),
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
'view_count': int_or_none(node.get('video_view_count')),
'id': video_id,
'formats': formats,
'ext': 'mp4',
- 'title': 'Video by %s' % uploader_id,
+ 'title': title or 'Video by %s' % uploader_id,
'description': description,
+ 'duration': duration,
'thumbnail': thumbnail,
'timestamp': timestamp,
'uploader_id': uploader_id,
# coding: utf-8
from __future__ import unicode_literals
-import re
-import time
-
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
js_to_json,
- try_get,
- update_url_query,
- urlencode_postdata,
)
class PicartoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'https://picarto.tv/Setz',
'info_dict': {
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
-
- metadata = self._download_json(
- 'https://api.picarto.tv/v1/channel/name/' + channel_id,
- channel_id)
-
- if metadata.get('online') is False:
+ channel_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
+ 'query': '''{
+ channel(name: "%s") {
+ adult
+ id
+ online
+ stream_name
+ title
+ }
+ getLoadBalancerUrl(channel_name: "%s") {
+ url
+ }
+}''' % (channel_id, channel_id),
+ })['data']
+ metadata = data['channel']
+
+ if metadata.get('online') == 0:
raise ExtractorError('Stream is offline', expected=True)
+ title = metadata['title']
cdn_data = self._download_json(
- 'https://picarto.tv/process/channel', channel_id,
- data=urlencode_postdata({'loadbalancinginfo': channel_id}),
- note='Downloading load balancing info')
-
- token = mobj.group('token') or 'public'
- params = {
- 'con': int(time.time() * 1000),
- 'token': token,
- }
+ data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
+ channel_id, 'Downloading load balancing info')
- prefered_edge = cdn_data.get('preferedEdge')
formats = []
-
- for edge in cdn_data['edges']:
- edge_ep = edge.get('ep')
- if not edge_ep or not isinstance(edge_ep, compat_str):
+ for source in (cdn_data.get('source') or []):
+ source_url = source.get('url')
+ if not source_url:
continue
- edge_id = edge.get('id')
- for tech in cdn_data['techs']:
- tech_label = tech.get('label')
- tech_type = tech.get('type')
- preference = 0
- if edge_id == prefered_edge:
- preference += 1
- format_id = []
- if edge_id:
- format_id.append(edge_id)
- if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
- format_id.append('hls')
- formats.extend(self._extract_m3u8_formats(
- update_url_query(
- 'https://%s/hls/%s/index.m3u8'
- % (edge_ep, channel_id), params),
- channel_id, 'mp4', quality=preference,
- m3u8_id='-'.join(format_id), fatal=False))
- continue
- elif tech_type == 'video/mp4' or tech_label == 'MP4':
- format_id.append('mp4')
- formats.append({
- 'url': update_url_query(
- 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
- params),
- 'format_id': '-'.join(format_id),
- 'quality': preference,
- })
- else:
- # rtmp format does not seem to work
- continue
+ source_type = source.get('type')
+ if source_type == 'html5/application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif source_type == 'html5/video/mp4':
+ formats.append({
+ 'url': source_url,
+ })
self._sort_formats(formats)
mature = metadata.get('adult')
return {
'id': channel_id,
- 'title': self._live_title(metadata.get('title') or channel_id),
+ 'title': self._live_title(title.strip()),
'is_live': True,
- 'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
'channel': channel_id,
+ 'channel_id': metadata.get('id'),
'channel_url': 'https://picarto.tv/%s' % channel_id,
'age_limit': age_limit,
'formats': formats,
class SBSIE(InfoExtractor):
IE_DESC = 'sbs.com.au'
- _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
_TESTS = [{
# Original URL is handled by the generic IE which finds the iframe:
}, {
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
+ 'only_matching': True,
}]
def _real_extract(self, url):
merge_dicts,
OnDemandPagedList,
parse_filesize,
+ parse_iso8601,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
expected=True)
raise ExtractorError('Unable to log in')
- def _verify_video_password(self, url, video_id, webpage):
+ def _get_video_password(self):
password = self._downloader.params.get('videopassword')
if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = urlencode_postdata({
- 'password': password,
- 'token': token,
- })
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option',
+ expected=True)
+ return password
+
+ def _verify_video_password(self, url, video_id, password, token, vuid):
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
- password_request = sanitized_Request(url + '/password', data)
- password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
- password_request, video_id,
- 'Verifying the password', 'Wrong password')
+ url + '/password', video_id, 'Verifying the password',
+ 'Wrong password', data=urlencode_postdata({
+ 'password': password,
+ 'token': token,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ })
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
)?
(?:videos?/)?
(?P<id>[0-9]+)
- (?:/[\da-f]+)?
+ (?:/(?P<unlisted_hash>[\da-f]{10}))?
/?(?:[?&].*)?(?:[#].*)?$
'''
IE_NAME = 'vimeo'
'id': '54469442',
'ext': 'mp4',
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
- 'uploader': 'The BLN & Business of Software',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
- 'uploader_id': 'theblnbusinessofsoftware',
+ 'uploader': 'Business of Software',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
+ 'uploader_id': 'businessofsoftware',
'duration': 3610,
'description': None,
},
'skip_download': True,
},
'expected_warnings': ['Unable to download JSON metadata'],
+ 'skip': 'this page is no longer available.',
},
{
'url': 'http://player.vimeo.com/video/68375962',
return urls[0] if urls else None
def _verify_player_video_password(self, url, video_id, headers):
- password = self._downloader.params.get('videopassword')
- if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+ password = self._get_video_password()
data = urlencode_postdata({
'password': base64.b64encode(password.encode()),
})
if 'Referer' not in headers:
headers['Referer'] = url
- channel_id = self._search_regex(
- r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
-
# Extract ID from URL
- video_id = self._match_id(url)
+ video_id, unlisted_hash = re.match(self._VALID_URL, url).groups()
+ if unlisted_hash:
+ token = self._download_json(
+ 'https://vimeo.com/_rv/jwt', video_id, headers={
+ 'X-Requested-With': 'XMLHttpRequest'
+ })['token']
+ video = self._download_json(
+ 'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
+ video_id, headers={
+ 'Authorization': 'jwt ' + token,
+ }, query={
+ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
+ })
+ info = self._parse_config(self._download_json(
+ video['config_url'], video_id), video_id)
+ self._vimeo_sort_formats(info['formats'])
+ get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
+ info.update({
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'release_timestamp': get_timestamp('release'),
+ 'timestamp': get_timestamp('created'),
+ 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
+ })
+ connections = try_get(
+ video, lambda x: x['metadata']['connections'], dict) or {}
+ for k in ('comment', 'like'):
+ info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
+ return info
+
orig_url = url
is_pro = 'vimeopro.com/' in url
is_player = '://player.vimeo.com/video/' in url
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
if '_video_password_verified' in data:
raise ExtractorError('video password verification failed!')
- self._verify_video_password(redirect_url, video_id, webpage)
+ video_password = self._get_video_password()
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ self._verify_video_password(
+ redirect_url, video_id, video_password, token, vuid)
return self._real_extract(
smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
else:
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
webpage, 'license', default=None, group='license')
+ channel_id = self._search_regex(
+ r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
info_dict = {
def _real_extract(self, url):
page_url, video_id = re.match(self._VALID_URL, url).groups()
- clip_data = self._download_json(
- page_url.replace('/review/', '/review/data/'),
- video_id)['clipData']
- config_url = clip_data['configUrl']
+ data = self._download_json(
+ page_url.replace('/review/', '/review/data/'), video_id)
+ if data.get('isLocked') is True:
+ video_password = self._get_video_password()
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', video_id)
+ webpage = self._verify_video_password(
+ 'https://vimeo.com/' + video_id, video_id,
+ video_password, viewer['xsrft'], viewer['vuid'])
+ clip_page_config = self._parse_json(self._search_regex(
+ r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
+ webpage, 'clip page config'), video_id)
+ config_url = clip_page_config['player']['config_url']
+ clip_data = clip_page_config.get('clip') or {}
+ else:
+ clip_data = data['clipData']
+ config_url = clip_data['configUrl']
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format(
raise ExtractorError('Unable to log in', expected=True)
def _call_api(self, path_template, video_id, fields=None, limit=None):
- query = {'appId': self._APP_ID, 'gcc': 'KR'}
+ query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
if fields:
query['fields'] = fields
if limit:
return True
- def _download_webpage_handle(self, *args, **kwargs):
- query = kwargs.get('query', {}).copy()
- kwargs['query'] = query
- return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
+ def _initialize_consent(self):
+ cookies = self._get_cookies('https://www.youtube.com/')
+ if cookies.get('__Secure-3PSID'):
+ return
+ consent_id = None
+ consent = cookies.get('CONSENT')
+ if consent:
+ if 'YES' in consent.value:
+ return
+ consent_id = self._search_regex(
+ r'PENDING\+(\d+)', consent.value, 'consent', default=None)
+ if not consent_id:
+ consent_id = random.randint(100, 999)
+ self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
def _real_initialize(self):
+ self._initialize_consent()
if self._downloader is None:
return
if not self._login():
base_url = self.http_scheme() + '//www.youtube.com/'
webpage_url = base_url + 'watch?v=' + video_id
webpage = self._download_webpage(
- webpage_url + '&has_verified=1&bpctr=9999999999',
- video_id, fatal=False)
+ webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
player_response = None
if webpage:
info['channel'] = get_text(try_get(
vsir,
lambda x: x['owner']['videoOwnerRenderer']['title'],
- compat_str))
+ dict))
rows = try_get(
vsir,
lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
'richItemRenderer': (extract_entries, 'contents'), # for hashtag
'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
}
+ on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
continuation_items = try_get(
- response,
- lambda x: dict_get(x, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))[0]['appendContinuationItemsAction']['continuationItems'], list)
+ on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
video_items_renderer = None
for key, value in continuation_item.items():