DWIE,
DWArticleIE,
)
-from .eagleplatform import EaglePlatformIE
+from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .egghead import (
SharedIE,
VivoIE,
)
+from .sharevideos import ShareVideosEmbedIE
from .shemaroome import ShemarooMeIE
from .showroomlive import ShowRoomLiveIE
from .simplecast import (
class AdobeTVVideoIE(AdobeTVBaseIE):
IE_NAME = 'adobetv:video'
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
_TEST = {
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
-import re
import urllib.parse
from .common import InfoExtractor
ExtractorError,
determine_ext,
scale_thumbnails_to_max_format_width,
- unescapeHTML,
)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
- embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
+ embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
return self.playlist_from_matches(
IE_DESC = 'ant1news.gr embedded videos'
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer'
_TESTS = [{
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- _EMBED_URL_RE = rf'{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
- _EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_EMBED_URL_RE})(?P=_q1)'
- for mobj in re.finditer(_EMBED_RE, webpage):
- url = unescapeHTML(mobj.group('url'))
- if not cls.suitable(url):
- continue
- yield url
-
def _real_extract(self, url):
video_id = self._match_id(url)
'subtitles': subtitles,
}
- @staticmethod
- def _extract_urls(ie, webpage, video_id):
- entries = []
- for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
- anvplayer_data = ie._parse_json(
- mobj.group('anvp'), video_id, transform_source=unescapeHTML,
- fatal=False)
- if not anvplayer_data:
- continue
- video = anvplayer_data.get('video')
- if not isinstance(video, compat_str) or not video.isdigit():
- continue
- access_key = anvplayer_data.get('accessKey')
- if not access_key:
- mcp = anvplayer_data.get('mcp')
- if mcp:
- access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
- mcp.lower())
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ for mobj in re.finditer(cls._ANVP_RE, webpage):
+ anvplayer_data = unescapeHTML(json.loads(mobj.group('anvp'))) or {}
+ video_id, access_key = anvplayer_data.get('video'), anvplayer_data.get('accessKey')
if not access_key:
+ access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower())
+ if not (video_id or '').isdigit() or not access_key:
continue
- entries.append(ie.url_result(
- 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
- video_id=video))
- return entries
+ yield cls.url_result(f'anvato:{access_key}:{video_id}', AnvatoIE, video_id)
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
class APAIE(InfoExtractor):
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1']
_TESTS = [{
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
- webpage)]
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id, base_url = mobj.group('id', 'base_url')
class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+ _EMBED_REGEX = [r'<iframe .*?src="(?P<url>http://www\.aparat\.com/video/[^"]+)"']
_TESTS = [{
'url': 'http://www.aparat.com/v/wP8On',
], 'video-api-cdn.%s.arcpublishing.com/api'),
]
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
entries = []
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
)
'''
+ # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1']
_TESTS = [{
'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
'md5': '97f117754e5f3c020f5f26da4a44ebaf',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
class ArteTVEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
+ _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1']
_TESTS = [{
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
'info_dict': {
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
- webpage)]
-
def _real_extract(self, url):
qs = parse_qs(url)
json_url = qs['json_url'][0]
class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
+ _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
_LOGIN_URL = 'https://account.bbc.com/signin'
_NETRC_MACHINE = 'bbc'
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
'md5': '7e427d7ed7af5a75b5855705ec750e2b',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL,
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
-import re
-
from ..utils import (
mimetype2ext,
parse_duration,
class BloggerIE(InfoExtractor):
IE_NAME = 'blogger.com'
_VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)'
- _VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']'''
+ _EMBED_REGEX = [r'''<iframe[^>]+src=["'](?P<url>(?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''']
_TESTS = [{
'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
}
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(BloggerIE._VALID_EMBED, webpage)
-
def _real_extract(self, url):
token_id = self._match_id(url)
webpage = self._download_webpage(url, token_id)
continue
entries.append(self.url_result(video['url']))
- facebook_urls = FacebookIE._extract_urls(webpage)
+ facebook_urls = FacebookIE._extract_embed_urls(url, webpage)
entries.extend([
self.url_result(facebook_url)
for facebook_url in facebook_urls])
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b']
_TESTS = [{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
- webpage)
-
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id
class CinchcastIE(InfoExtractor):
_VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1']
+
_TESTS = [{
'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
'info_dict': {
import base64
-import re
from .common import InfoExtractor
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
+ _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': {
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
class StopExtraction(Exception):
pass
+ @classmethod
+ def _extract_url(cls, webpage): # TODO: Remove
+ """Only for compatibility with some older extractors"""
+ return next(iter(cls._extract_embed_urls(None, webpage) or []), None)
+
class SearchInfoExtractor(InfoExtractor):
"""
)''' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
- EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys())
+ _EMBED_REGEX = [r'''(?x)
+ <(?:iframe|script)[^>]+?src=(["\'])(?P<url>
+ (?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?
+ )\1''' % '|'.join(_SITES.keys())]
_TESTS = [{
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
class CrooksAndLiarsIE(InfoExtractor):
_VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
+ _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1']
+
_TESTS = [{
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
'info_dict': {
video_id = m.group('id')
video_type = 'program' if m.group('type') == 'prog' else 'clip'
else:
- senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ senate_isvp_url = SenateISVPIE._extract_url(webpage)
if senate_isvp_url:
title = self._og_search_title(webpage)
surl = smuggle_url(senate_isvp_url, {'force_title': title})
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
class DailyMailIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)']
_TESTS = [{
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
'md5': 'f6129624562251f628296c3a9ffde124',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
'''
IE_NAME = 'dailymotion'
+ _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
'md5': '074b95bdee76b9e3654137aee9c79dfe',
}
xid'''
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- # Look for embedded Dailymotion player
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
# https://developer.dailymotion.com/player#player-parameters
- for mobj in re.finditer(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
- urls.append(unescapeHTML(mobj.group('url')))
+ yield from super()._extract_embed_urls(url, webpage)
for mobj in re.finditer(
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
- urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
- return urls
+ yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
}]
_OBJECT_TYPE = 'collection'
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Look for embedded Dailymotion playlist player (#3822)
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1',
+ webpage):
+ for p in re.findall(r'list\[\]=/playlist/([^/]+)/', unescapeHTML(mobj.group('url'))):
+ yield '//dailymotion.com/playlist/%s' % p
+
class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user'
-import re
-
from .common import InfoExtractor
class DBTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1']
_TESTS = [{
'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
'md5': 'b8f850ba1860adbda668d367f9b77699',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
- webpage)]
-
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
info = {
-import re
-
from .common import InfoExtractor
from ..utils import int_or_none
)
/id
)/(?P<id>[\d+a-z]+)'''
+ _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
_TESTS = [{
# news
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)']
_TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
- webpage)
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
+import functools
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
+ smuggle_url,
unsmuggle_url,
url_or_none,
)
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- # Regular iframe embedding
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
- webpage)
- if mobj is not None:
- return mobj.group('url')
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ add_referer = functools.partial(smuggle_url, data={'referrer': url})
+
+ res = tuple(super()._extract_embed_urls(url, webpage))
+ if res:
+ return map(add_referer, res)
+
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
data-id=["\'](?P<id>\d+)
''' % PLAYER_JS_RE, webpage)
if mobj is not None:
- return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+ return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())]
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
</script>
''' % PLAYER_JS_RE, webpage)
if mobj is not None:
- return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+ return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())]
@staticmethod
def _handle_error(response):
'age_limit': age_limit,
'formats': formats,
}
+
+
+class ClipYouEmbedIE(InfoExtractor):
+ _VALID_URL = False
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ yield smuggle_url('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), {'referrer': url})
+import re
+import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
'only_matching': True,
}]
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Bypass suitable check
+ for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage):
+ yield mobj.group('url')
+
+ for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage):
+ yield urllib.parse.unquote(mobj.group('url'))
+
def _real_extract(self, url):
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
parse_iso8601,
str_or_none,
try_get,
- unescapeHTML,
url_or_none,
variadic,
)
IE_DESC = 'ert.gr webtv embedded videos'
_BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_TESTS = [{
'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
- EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
-
- for mobj in re.finditer(EMBED_RE, webpage):
- url = unescapeHTML(mobj.group('url'))
- if not cls.suitable(url):
- continue
- yield url
-
def _real_extract(self, url):
video_id = self._match_id(url)
formats, subs = self._extract_m3u8_formats_and_subtitles(
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
_TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
- webpage)]
-
def _real_extract(self, url):
display_id = self._match_id(url)
)
(?P<id>[0-9]+)
'''
+ _EMBED_REGEX = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
+ # Facebook API embed https://developers.facebook.com/docs/plugins/embedded-video-player
+ r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
+ ]
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
'graphURI': '/api/graphql/'
}
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
- webpage):
- urls.append(mobj.group('url'))
- # Facebook API embed
- # see https://developers.facebook.com/docs/plugins/embedded-video-player
- for mobj in re.finditer(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
- data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
- urls.append(mobj.group('url'))
- return urls
-
def _perform_login(self, username, password):
login_page_req = sanitized_Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US')
},
]
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
return [
f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
for mobj in re.finditer(
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
return self.url_result(
- FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
+ FoxNewsIE._extract_embed_urls(url, webpage)[0], FoxNewsIE.ie_key())
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
_TESTS = [{
# without catalog
webpage = self._download_webpage(url, display_id)
- dailymotion_urls = DailymotionIE._extract_urls(webpage)
+ dailymotion_urls = DailymotionIE._extract_embed_urls(url, webpage)
if dailymotion_urls:
return self.playlist_result([
self.url_result(dailymotion_url, DailymotionIE.ie_key())
class GediDigitalIE(InfoExtractor):
- _VALID_URL = r'''(?x:(?P<url>(?:https?:)//video\.
+ _VALID_URL = r'''(?x:(?P<base_url>(?:https?:)//video\.
(?:
(?:
(?:espresso\.)?repubblica
|lasentinella
)\.gelocal
)\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*))'''
+ _EMBED_REGEX = [rf'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])(?P<url>{_VALID_URL})\1''']
_TESTS = [{
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
'md5': '84658d7fb9e55a6e57ecc77b73137494',
urls[i] = urljoin(base_url(e), url_basename(e))
return urls
- @staticmethod
- def _extract_urls(webpage):
- entries = [
- mobj.group('eurl')
- for mobj in re.finditer(r'''(?x)
- (?:
- data-frame-src=|
- <iframe[^\n]+src=
- )
- (["'])(?P<eurl>%s)\1''' % GediDigitalIE._VALID_URL, webpage)]
- return GediDigitalIE._sanitize_urls(entries)
-
- @staticmethod
- def _extract_url(webpage):
- urls = GediDigitalIE._extract_urls(webpage)
- return urls[0] if urls else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage)))
@staticmethod
def _clean_formats(formats):
formats[:] = clean_formats
def _real_extract(self, url):
- video_id = self._match_id(url)
- url = self._match_valid_url(url).group('url')
+ video_id, url = self._match_valid_url(url).group('id', 'base_url')
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
['twitter:title', 'og:title'], webpage, fatal=True)
from . import gen_extractor_classes
from .common import InfoExtractor # isort: split
-from .ant1newsgr import Ant1NewsGrEmbedIE
-from .anvato import AnvatoIE
-from .apa import APAIE
-from .arcpublishing import ArcPublishingIE
-from .arkena import ArkenaIE
-from .arte import ArteTVEmbedIE
-from .bitchute import BitChuteIE
-from .blogger import BloggerIE
from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE
-from .channel9 import Channel9IE
-from .cloudflarestream import CloudflareStreamIE
from .commonprotocols import RtmpIE
-from .condenast import CondeNastIE
-from .dailymail import DailyMailIE
-from .dailymotion import DailymotionIE
-from .dbtv import DBTVIE
-from .digiteka import DigitekaIE
-from .drtuber import DrTuberIE
-from .eagleplatform import EaglePlatformIE
-from .ertgr import ERTWebtvEmbedIE
-from .expressen import ExpressenIE
-from .facebook import FacebookIE
-from .foxnews import FoxNewsIE
-from .gedidigital import GediDigitalIE
-from .gfycat import GfycatIE
-from .glomex import GlomexEmbedIE
-from .googledrive import GoogleDriveIE
-from .indavideo import IndavideoEmbedIE
-from .instagram import InstagramIE
-from .joj import JojIE
-from .jwplatform import JWPlatformIE
-from .kaltura import KalturaIE
-from .kinja import KinjaEmbedIE
-from .limelight import LimelightBaseIE
-from .mainstreaming import MainStreamingIE
-from .medialaan import MedialaanIE
-from .mediaset import MediasetIE
-from .mediasite import MediasiteIE
-from .megaphone import MegaphoneIE
-from .megatvcom import MegaTVComEmbedIE
-from .mofosex import MofosexEmbedIE
-from .mtv import MTVServicesEmbeddedIE
-from .myvi import MyviIE
-from .nbc import NBCSportsVPlayerIE
-from .nexx import NexxEmbedIE, NexxIE
-from .odnoklassniki import OdnoklassnikiIE
-from .onionstudios import OnionStudiosIE
-from .ooyala import OoyalaIE
-from .panopto import PanoptoBaseIE
-from .peertube import PeerTubeIE
-from .piksel import PikselIE
-from .pladform import PladformIE
-from .pornhub import PornHubIE
-from .rcs import RCSEmbedsIE
-from .redtube import RedTubeIE
-from .rumble import RumbleEmbedIE
-from .rutube import RutubeIE
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .senategov import SenateISVPIE
-from .simplecast import SimplecastIE
-from .soundcloud import SoundcloudEmbedIE
-from .spankwire import SpankwireIE
-from .sportbox import SportBoxIE
-from .spotify import SpotifyBaseIE
-from .springboardplatform import SpringboardPlatformIE
-from .substack import SubstackIE
-from .svt import SVTIE
-from .teachable import TeachableIE
-from .ted import TedEmbedIE
-from .theplatform import ThePlatformIE
-from .threeqsdn import ThreeQSDNIE
-from .tiktok import TikTokIE
-from .tnaflix import TNAFlixNetworkEmbedIE
-from .tube8 import Tube8IE
-from .tunein import TuneInBaseIE
-from .tvc import TVCIE
-from .tvopengr import TVOpenGrEmbedIE
-from .tvp import TVPEmbedIE
-from .twentymin import TwentyMinutenIE
-from .udn import UDNEmbedIE
-from .ustream import UstreamIE
-from .vbox7 import Vbox7IE
-from .vice import ViceIE
-from .videa import VideaIE
-from .videomore import VideomoreIE
-from .videopress import VideoPressIE
-from .viewlift import ViewLiftEmbedIE
-from .vimeo import VHXEmbedIE, VimeoIE
-from .viqeo import ViqeoIE
-from .vk import VKIE
-from .vshare import VShareIE
-from .vzaar import VzaarIE
-from .washingtonpost import WashingtonPostIE
-from .webcaster import WebcasterFeedIE
-from .wimtv import WimTVIE
-from .wistia import WistiaIE
-from .xfileshare import XFileShareIE
-from .xhamster import XHamsterEmbedIE
-from .yapfiles import YapFilesIE
-from .youporn import YouPornIE
from .youtube import YoutubeIE
-from .zype import ZypeIE
from ..compat import compat_etree_fromstring
from ..utils import (
KNOWN_EXTENSIONS,
UnsupportedError,
determine_ext,
dict_get,
- float_or_none,
format_field,
int_or_none,
is_html,
'timestamp': 468923808,
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
},
- 'add_ie': [JWPlatformIE.ie_key()],
+ 'add_ie': ['JWPlatform'],
},
{
# Video.js embed, multiple formats
'params': {
'skip_download': True,
},
- 'add_ie': [ArkenaIE.ie_key()],
+ 'add_ie': ['Arkena'],
},
{
'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
'params': {
'skip_download': True,
},
- 'add_ie': [Vbox7IE.ie_key()],
+ 'add_ie': ['Vbox7'],
},
{
# DBTV embeds
'params': {
'skip_download': True,
},
- 'add_ie': [TwentyMinutenIE.ie_key()],
+ 'add_ie': ['TwentyMinuten'],
},
{
# VideoPress embed
'params': {
'skip_download': True,
},
- 'add_ie': [VideoPressIE.ie_key()],
+ 'add_ie': ['VideoPress'],
},
{
# Rutube embed
'params': {
'skip_download': True,
},
- 'add_ie': [RutubeIE.ie_key()],
+ 'add_ie': ['Rutube'],
},
{
# glomex:embed
'ext': 'mp4',
'title': 'Integrated Senate Video Player',
},
- 'add_ie': [SenateISVPIE.ie_key()],
+ 'add_ie': ['SenateISVP'],
},
{
# Limelight embeds (1 channel embed + 4 media embeds)
'uploader': 'The Washington Post',
'upload_date': '20160211',
},
- 'add_ie': [WashingtonPostIE.ie_key()],
+ 'add_ie': ['WashingtonPost'],
},
{
# Mediaset embed
'params': {
'skip_download': True,
},
- 'add_ie': [MediasetIE.ie_key()],
+ 'add_ie': ['Mediaset'],
},
{
# JOJ.sk embeds
'title': 'Slovenskom sa prehnala vlna silných búrok',
},
'playlist_mincount': 5,
- 'add_ie': [JojIE.ie_key()],
+ 'add_ie': ['Joj'],
},
{
# AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
'params': {
'skip_download': True,
},
- 'add_ie': [SpringboardPlatformIE.ie_key()],
+ 'add_ie': ['SpringboardPlatform'],
},
{
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
'ext': 'mp4',
'title': 'Котята',
},
- 'add_ie': [YapFilesIE.ie_key()],
+ 'add_ie': ['YapFiles'],
'params': {
'skip_download': True,
},
'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717',
},
- 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'add_ie': ['CloudflareStream'],
'params': {
'skip_download': True,
},
'uploader': 'StreetKitchen',
'uploader_id': '546363',
},
- 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'add_ie': ['IndavideoEmbed'],
'params': {
'skip_download': True,
},
# Panopto embeds
'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
'info_dict': {
- 'title': 'Insert a quiz into a Panopto video',
- 'id': 'insert-a-quiz-into-a-panopto-video'
+ 'ext': 'mp4',
+ 'id': '0bd3f16c-824a-436a-8486-ac5900693aef',
+ 'title': 'Quizzes in Panopto',
},
- 'playlist_count': 1
},
{
# Ruutu embed
},
{
'url': 'https://www.skimag.com/video/ski-people-1980/',
+ 'md5': '022a7e31c70620ebec18deeab376ee03',
'info_dict': {
- 'id': 'ski-people-1980',
- 'title': 'Ski People (1980)',
- },
- 'playlist_count': 1,
- 'playlist': [{
- 'md5': '022a7e31c70620ebec18deeab376ee03',
- 'info_dict': {
- 'id': 'YTmgRiNU',
- 'ext': 'mp4',
- 'title': '1980 Ski People',
- 'timestamp': 1610407738,
- 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
- 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
- 'duration': 5688.0,
- 'upload_date': '20210111',
- }
- }]
+ 'id': 'YTmgRiNU',
+ 'ext': 'mp4',
+ 'title': '1980 Ski People',
+ 'timestamp': 1610407738,
+ 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
+ 'duration': 5688.0,
+ 'upload_date': '20210111',
+ }
},
{
'note': 'Rumble embed',
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
lambda x: unescapeHTML(x.group(0)), webpage)
- # TODO: Remove
- video_title, video_description, video_thumbnail, age_limit, video_uploader = \
- info_dict['title'], info_dict['description'], info_dict['thumbnail'], info_dict['age_limit'], domain_name
-
- # TODO: Move Embeds
- self._downloader.write_debug('Looking for single embeds')
-
- # Look for Brightcove Legacy Studio embeds
+ # TODO: Move to respective extractors
+ self._downloader.write_debug('Looking for Brightcove embeds')
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
entries = [{
return {
'_type': 'playlist',
- 'title': video_title,
+ 'title': info_dict['title'],
'id': video_id,
'entries': entries,
}
-
- # Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_brightcove_urls(self, webpage)
if bc_urls:
return self.playlist_from_matches(
- bc_urls, video_id, video_title,
+ bc_urls, video_id, info_dict['title'],
getter=lambda x: smuggle_url(x, {'referrer': url}),
ie='BrightcoveNew')
- # Look for Nexx embeds
- nexx_urls = NexxIE._extract_urls(webpage)
- if nexx_urls:
- return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
-
- # Look for Nexx iFrame embeds
- nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
- if nexx_embed_urls:
- return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
-
- # Look for ThePlatform embeds
- tp_urls = ThePlatformIE._extract_urls(webpage)
- if tp_urls:
- return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
-
- arc_urls = ArcPublishingIE._extract_urls(webpage)
- if arc_urls:
- return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
-
- mychannels_urls = MedialaanIE._extract_urls(webpage)
- if mychannels_urls:
- return self.playlist_from_matches(
- mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
-
- # Look for embedded rtl.nl player
- matches = re.findall(
- r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
- webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
-
- vimeo_urls = VimeoIE._extract_urls(url, webpage)
- if vimeo_urls:
- return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
-
- vhx_url = VHXEmbedIE._extract_url(url, webpage)
- if vhx_url:
- return self.url_result(vhx_url, VHXEmbedIE.ie_key())
-
- # Invidious Instances
- # https://github.com/yt-dlp/yt-dlp/issues/195
- # https://github.com/iv-org/invidious/pull/1730
- youtube_url = self._search_regex(
- r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
- webpage, 'youtube link', default=None)
- if youtube_url:
- return self.url_result(youtube_url, YoutubeIE.ie_key())
-
- # Look for YouTube embeds
- youtube_urls = YoutubeIE._extract_urls(webpage)
- if youtube_urls:
- return self.playlist_from_matches(
- youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
-
- matches = DailymotionIE._extract_urls(webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title)
-
- # Look for embedded Dailymotion playlist player (#3822)
- m = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
- if m:
- playlists = re.findall(
- r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
- if playlists:
- return self.playlist_from_matches(
- playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
-
- # Look for DailyMail embeds
- dailymail_urls = DailyMailIE._extract_urls(webpage)
- if dailymail_urls:
- return self.playlist_from_matches(
- dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
-
- # Look for Teachable embeds, must be before Wistia
- teachable_url = TeachableIE._extract_url(webpage, url)
- if teachable_url:
- return self.url_result(teachable_url)
-
- # Look for embedded Wistia player
- wistia_urls = WistiaIE._extract_urls(webpage)
- if wistia_urls:
- playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
- playlist['entries'] = list(playlist['entries'])
- for entry in playlist['entries']:
- entry.update({
- '_type': 'url_transparent',
- 'uploader': video_uploader,
- })
- return playlist
-
- # Look for SVT player
- svt_url = SVTIE._extract_url(webpage)
- if svt_url:
- return self.url_result(svt_url, 'SVT')
-
- # Look for Bandcamp pages with custom domain
- mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
- if mobj is not None:
- burl = unescapeHTML(mobj.group(1))
- # Don't set the extractor because it can be a track url or an album
- return self.url_result(burl)
-
- # Check for Substack custom domains
- substack_url = SubstackIE._extract_url(webpage, url)
- if substack_url:
- return self.url_result(substack_url, SubstackIE)
-
- # Look for embedded Vevo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Viddler player
- mobj = re.search(
- r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NYTimes player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Libsyn player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
- or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
- or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
- if mobj is not None:
- embed_token = self._search_regex(
- r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
- webpage, 'ooyala embed token', default=None)
- return OoyalaIE._build_url_result(smuggle_url(
- mobj.group('ec'), {
- 'domain': url,
- 'embed_token': embed_token,
- }))
-
- # Look for multiple Ooyala embeds on SBN network websites
- mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
- if mobj is not None:
- embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
- if embeds:
- return self.playlist_from_matches(
- embeds, video_id, video_title,
- getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
-
- # Look for Aparat videos
- mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Aparat')
-
- # Look for MPORA videos
- mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Mpora')
-
- # Look for embedded Facebook player
- facebook_urls = FacebookIE._extract_urls(webpage)
- if facebook_urls:
- return self.playlist_from_matches(facebook_urls, video_id, video_title)
-
- # Look for embedded VK player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'VK')
-
- # Look for embedded Odnoklassniki player
- odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
- if odnoklassniki_url:
- return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
-
- # Look for sibnet embedded player
- sibnet_urls = VKIE._extract_sibnet_urls(webpage)
- if sibnet_urls:
- return self.playlist_from_matches(sibnet_urls, video_id, video_title)
-
- # Look for embedded ivi player
- mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Ivi')
-
- # Look for embedded Huffington Post player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'HuffPost')
-
- # Look for embed.ly
- mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
- mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
- if mobj is not None:
- return self.url_result(urllib.parse.unquote(mobj.group('url')))
-
- # Look for funnyordie embed
- matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
- if matches:
- return self.playlist_from_matches(
- matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
-
- # Look for Simplecast embeds
- simplecast_urls = SimplecastIE._extract_urls(webpage)
- if simplecast_urls:
- return self.playlist_from_matches(
- simplecast_urls, video_id, video_title)
-
- # Look for BBC iPlayer embed
- matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
-
- # Look for embedded RUTV player
- rutv_url = RUTVIE._extract_url(webpage)
- if rutv_url:
- return self.url_result(rutv_url, 'RUTV')
-
- # Look for embedded TVC player
- tvc_url = TVCIE._extract_url(webpage)
- if tvc_url:
- return self.url_result(tvc_url, 'TVC')
-
- # Look for embedded SportBox player
- sportbox_urls = SportBoxIE._extract_urls(webpage)
- if sportbox_urls:
- return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
-
- # Look for embedded Spotify player
- spotify_urls = SpotifyBaseIE._extract_urls(webpage)
- if spotify_urls:
- return self.playlist_from_matches(spotify_urls, video_id, video_title)
-
- # Look for embedded XHamster player
- xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
- if xhamster_urls:
- return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
-
- # Look for embedded TNAFlixNetwork player
- tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
- if tnaflix_urls:
- return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
-
- # Look for embedded PornHub player
- pornhub_urls = PornHubIE._extract_urls(webpage)
- if pornhub_urls:
- return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
-
- # Look for embedded DrTuber player
- drtuber_urls = DrTuberIE._extract_urls(webpage)
- if drtuber_urls:
- return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
-
- # Look for embedded RedTube player
- redtube_urls = RedTubeIE._extract_urls(webpage)
- if redtube_urls:
- return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
-
- # Look for embedded Tube8 player
- tube8_urls = Tube8IE._extract_urls(webpage)
- if tube8_urls:
- return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
-
- # Look for embedded Mofosex player
- mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
- if mofosex_urls:
- return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
-
- # Look for embedded Spankwire player
- spankwire_urls = SpankwireIE._extract_urls(webpage)
- if spankwire_urls:
- return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
-
- # Look for embedded YouPorn player
- youporn_urls = YouPornIE._extract_urls(webpage)
- if youporn_urls:
- return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
-
- # Look for embedded Tvigle player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Tvigle')
-
- # Look for embedded TED player
- ted_urls = TedEmbedIE._extract_urls(webpage)
- if ted_urls:
- return self.playlist_from_matches(ted_urls, video_id, video_title, ie=TedEmbedIE.ie_key())
-
- # Look for embedded Ustream videos
- ustream_url = UstreamIE._extract_url(webpage)
- if ustream_url:
- return self.url_result(ustream_url, UstreamIE.ie_key())
-
- # Look for embedded arte.tv player
- arte_urls = ArteTVEmbedIE._extract_urls(webpage)
- if arte_urls:
- return self.playlist_from_matches(arte_urls, video_id, video_title)
-
- # Look for embedded francetv player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Myvi.ru player
- myvi_url = MyviIE._extract_url(webpage)
- if myvi_url:
- return self.url_result(myvi_url)
-
- # Look for embedded soundcloud player
- soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
- if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
-
- # Look for tunein player
- tunein_urls = TuneInBaseIE._extract_urls(webpage)
- if tunein_urls:
- return self.playlist_from_matches(tunein_urls, video_id, video_title)
-
- # Look for embedded mtvservices player
- mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
- if mtvservices_url:
- return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
-
- # Look for embedded yahoo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Yahoo')
-
- # Look for embedded sbs.com.au player
- mobj = re.search(
- r'''(?x)
- (?:
- <meta\s+property="og:video"\s+content=|
- <iframe[^>]+?src=
- )
- (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'SBS')
-
- # Look for embedded Cinchcast player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Cinchcast')
-
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
- webpage)
- if not mobj:
- mobj = re.search(
- r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'MLB')
-
- mobj = re.search(
- r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
- webpage)
- if mobj is not None:
- return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
-
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Livestream')
-
- # Look for Zapiks embed
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Zapiks')
-
- # Look for Kaltura embeds
- kaltura_urls = KalturaIE._extract_urls(webpage)
- if kaltura_urls:
- return self.playlist_from_matches(
- kaltura_urls, video_id, video_title,
- getter=lambda x: smuggle_url(x, {'source_url': url}),
- ie=KalturaIE.ie_key())
-
- # Look for EaglePlatform embeds
- eagleplatform_url = EaglePlatformIE._extract_url(webpage)
- if eagleplatform_url:
- return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
-
- # Look for ClipYou (uses EaglePlatform) embeds
- mobj = re.search(
- r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
- if mobj is not None:
- return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
-
- # Look for Pladform embeds
- pladform_url = PladformIE._extract_url(webpage)
- if pladform_url:
- return self.url_result(pladform_url)
-
- # Look for Videomore embeds
- videomore_url = VideomoreIE._extract_url(webpage)
- if videomore_url:
- return self.url_result(videomore_url)
-
- # Look for Webcaster embeds
- webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
- if webcaster_url:
- return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
-
- # Look for Playwire embeds
- mobj = re.search(
- r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Crooks and Liars embeds
- mobj = re.search(
- r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NBC Sports VPlayer embeds
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
-
- # Look for NBC News embeds
- nbc_news_embed_url = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
- if nbc_news_embed_url:
- return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
-
- # Look for Google Drive embeds
- google_drive_url = GoogleDriveIE._extract_url(webpage)
- if google_drive_url:
- return self.url_result(google_drive_url, 'GoogleDrive')
-
- # Look for UDN embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
- if mobj is not None:
- return self.url_result(
- urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed')
-
- # Look for Senate ISVP iframe
- senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
- if senate_isvp_url:
- return self.url_result(senate_isvp_url, 'SenateISVP')
-
- # Look for Kinja embeds
- kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
- if kinja_embed_urls:
- return self.playlist_from_matches(
- kinja_embed_urls, video_id, video_title)
-
- # Look for OnionStudios embeds
- onionstudios_url = OnionStudiosIE._extract_url(webpage)
- if onionstudios_url:
- return self.url_result(onionstudios_url)
-
- # Look for Blogger embeds
- blogger_urls = BloggerIE._extract_urls(webpage)
- if blogger_urls:
- return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key())
-
- # Look for ViewLift embeds
- viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
- if viewlift_url:
- return self.url_result(viewlift_url)
-
- # Look for JWPlatform embeds
- jwplatform_urls = JWPlatformIE._extract_urls(webpage)
- if jwplatform_urls:
- return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
-
- # Look for Digiteka embeds
- digiteka_url = DigitekaIE._extract_url(webpage)
- if digiteka_url:
- return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
-
- # Look for Arkena embeds
- arkena_url = ArkenaIE._extract_url(webpage)
- if arkena_url:
- return self.url_result(arkena_url, ArkenaIE.ie_key())
-
- # Look for Piksel embeds
- piksel_url = PikselIE._extract_url(webpage)
- if piksel_url:
- return self.url_result(piksel_url, PikselIE.ie_key())
-
- # Look for Limelight embeds
- limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
- if limelight_urls:
- return self.playlist_result(
- limelight_urls, video_id, video_title, video_description)
-
- # Look for Anvato embeds
- anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
- if anvato_urls:
- return self.playlist_result(
- anvato_urls, video_id, video_title, video_description)
-
- # Look for AdobeTVVideo embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))),
- 'AdobeTVVideo')
-
- # Look for Vine embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
-
- # Look for VODPlatform embeds
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
-
- # Look for Mangomolo embeds
- mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
- (?:
- admin\.mangomolo\.com/analytics/index\.php/customers/embed|
- player\.mangomolo\.com/v1
- )/
- (?:
- video\?.*?\bid=(?P<video_id>\d+)|
- (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
- ).+?)\1''', webpage)
- if mobj is not None:
- info = {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
- video_id = mobj.group('video_id')
- if video_id:
- info.update({
- 'ie_key': 'MangomoloVideo',
- 'id': video_id,
- })
- else:
- info.update({
- 'ie_key': 'MangomoloLive',
- 'id': mobj.group('channel_id'),
- })
- return info
-
- # Look for Instagram embeds
- instagram_embed_url = InstagramIE._extract_embed_url(webpage)
- if instagram_embed_url is not None:
- return self.url_result(
- self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
-
- # Look for 3Q SDN embeds
- threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
- if threeqsdn_url:
- return {
- '_type': 'url_transparent',
- 'ie_key': ThreeQSDNIE.ie_key(),
- 'url': self._proto_relative_url(threeqsdn_url),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
-
- # Look for VBOX7 embeds
- vbox7_url = Vbox7IE._extract_url(webpage)
- if vbox7_url:
- return self.url_result(vbox7_url, Vbox7IE.ie_key())
-
- # Look for DBTV embeds
- dbtv_urls = DBTVIE._extract_urls(webpage)
- if dbtv_urls:
- return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
-
- # Look for Videa embeds
- videa_urls = VideaIE._extract_urls(webpage)
- if videa_urls:
- return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
-
- # Look for 20 minuten embeds
- twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
- if twentymin_urls:
- return self.playlist_from_matches(
- twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
-
- # Look for VideoPress embeds
- videopress_urls = VideoPressIE._extract_urls(webpage)
- if videopress_urls:
- return self.playlist_from_matches(
- videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
-
- # Look for Rutube embeds
- rutube_urls = RutubeIE._extract_urls(webpage)
- if rutube_urls:
- return self.playlist_from_matches(
- rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
-
- # Look for Glomex embeds
- glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url))
- if glomex_urls:
- return self.playlist_from_matches(
- glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key())
-
- # Look for megatv.com embeds
- megatvcom_urls = list(MegaTVComEmbedIE._extract_urls(webpage))
- if megatvcom_urls:
- return self.playlist_from_matches(
- megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key())
-
- # Look for ant1news.gr embeds
- ant1newsgr_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
- if ant1newsgr_urls:
- return self.playlist_from_matches(
- ant1newsgr_urls, video_id, video_title, ie=Ant1NewsGrEmbedIE.ie_key())
-
- # Look for WashingtonPost embeds
- wapo_urls = WashingtonPostIE._extract_urls(webpage)
- if wapo_urls:
- return self.playlist_from_matches(
- wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
-
- # Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(self, webpage)
- if mediaset_urls:
- return self.playlist_from_matches(
- mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
-
- # Look for JOJ.sk embeds
- joj_urls = JojIE._extract_urls(webpage)
- if joj_urls:
- return self.playlist_from_matches(
- joj_urls, video_id, video_title, ie=JojIE.ie_key())
-
- # Look for megaphone.fm embeds
- mpfn_urls = MegaphoneIE._extract_urls(webpage)
- if mpfn_urls:
- return self.playlist_from_matches(
- mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
-
- # Look for vzaar embeds
- vzaar_urls = VzaarIE._extract_urls(webpage)
- if vzaar_urls:
- return self.playlist_from_matches(
- vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
-
- channel9_urls = Channel9IE._extract_urls(webpage)
- if channel9_urls:
- return self.playlist_from_matches(
- channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
-
- vshare_urls = VShareIE._extract_urls(webpage)
- if vshare_urls:
- return self.playlist_from_matches(
- vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
-
- # Look for Mediasite embeds
- mediasite_urls = MediasiteIE._extract_urls(webpage)
- if mediasite_urls:
- entries = [
- self.url_result(smuggle_url(
- urllib.parse.urljoin(url, mediasite_url),
- {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
- for mediasite_url in mediasite_urls]
- return self.playlist_result(entries, video_id, video_title)
-
- springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
- if springboardplatform_urls:
- return self.playlist_from_matches(
- springboardplatform_urls, video_id, video_title,
- ie=SpringboardPlatformIE.ie_key())
-
- yapfiles_urls = YapFilesIE._extract_urls(webpage)
- if yapfiles_urls:
- return self.playlist_from_matches(
- yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
-
- vice_urls = ViceIE._extract_urls(webpage)
- if vice_urls:
- return self.playlist_from_matches(
- vice_urls, video_id, video_title, ie=ViceIE.ie_key())
-
- xfileshare_urls = XFileShareIE._extract_urls(webpage)
- if xfileshare_urls:
- return self.playlist_from_matches(
- xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
-
- cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
- if cloudflarestream_urls:
- return self.playlist_from_matches(
- cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
-
- peertube_urls = PeerTubeIE._extract_urls(webpage, url)
- if peertube_urls:
- return self.playlist_from_matches(
- peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
-
- indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
- if indavideo_urls:
- return self.playlist_from_matches(
- indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
-
- apa_urls = APAIE._extract_urls(webpage)
- if apa_urls:
- return self.playlist_from_matches(
- apa_urls, video_id, video_title, ie=APAIE.ie_key())
-
- foxnews_urls = FoxNewsIE._extract_urls(webpage)
- if foxnews_urls:
- return self.playlist_from_matches(
- foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
-
- sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
- r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
- webpage)]
- if sharevideos_urls:
- return self.playlist_from_matches(
- sharevideos_urls, video_id, video_title)
-
- viqeo_urls = ViqeoIE._extract_urls(webpage)
- if viqeo_urls:
- return self.playlist_from_matches(
- viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
-
- expressen_urls = ExpressenIE._extract_urls(webpage)
- if expressen_urls:
- return self.playlist_from_matches(
- expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
-
- zype_urls = ZypeIE._extract_urls(webpage)
- if zype_urls:
- return self.playlist_from_matches(
- zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
-
- gedi_urls = GediDigitalIE._extract_urls(webpage)
- if gedi_urls:
- return self.playlist_from_matches(
- gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
-
- # Look for RCS media group embeds
- rcs_urls = RCSEmbedsIE._extract_urls(webpage)
- if rcs_urls:
- return self.playlist_from_matches(
- rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
-
- wimtv_urls = WimTVIE._extract_urls(webpage)
- if wimtv_urls:
- return self.playlist_from_matches(
- wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
-
- bitchute_urls = BitChuteIE._extract_urls(webpage)
- if bitchute_urls:
- return self.playlist_from_matches(
- bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
-
- rumble_urls = RumbleEmbedIE._extract_urls(webpage)
- if len(rumble_urls) == 1:
- return self.url_result(rumble_urls[0], RumbleEmbedIE.ie_key())
- if rumble_urls:
- return self.playlist_from_matches(
- rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
-
- # Look for (tvopen|ethnos).gr embeds
- tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
- if tvopengr_urls:
- return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
-
- # Look for ert.gr webtv embeds
- ertwebtv_urls = list(ERTWebtvEmbedIE._extract_urls(webpage))
- if len(ertwebtv_urls) == 1:
- return self.url_result(self._proto_relative_url(ertwebtv_urls[0]), video_title=video_title, url_transparent=True)
- elif ertwebtv_urls:
- return self.playlist_from_matches(ertwebtv_urls, video_id, video_title, ie=ERTWebtvEmbedIE.ie_key())
-
- tvp_urls = TVPEmbedIE._extract_urls(webpage)
- if tvp_urls:
- return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
-
- # Look for MainStreaming embeds
- mainstreaming_urls = MainStreamingIE._extract_urls(webpage)
- if mainstreaming_urls:
- return self.playlist_from_matches(mainstreaming_urls, video_id, video_title, ie=MainStreamingIE.ie_key())
-
- # Look for Gfycat Embeds
- gfycat_urls = GfycatIE._extract_urls(webpage)
- if gfycat_urls:
- return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
-
- panopto_urls = PanoptoBaseIE._extract_urls(webpage)
- if panopto_urls:
- return self.playlist_from_matches(panopto_urls, video_id, video_title)
-
- # Look for Ruutu embeds
- ruutu_urls = RuutuIE._extract_urls(webpage)
- if ruutu_urls:
- return self.playlist_from_matches(ruutu_urls, video_id, video_title)
-
- # Look for Tiktok embeds
- tiktok_urls = TikTokIE._extract_urls(webpage)
- if tiktok_urls:
- return self.playlist_from_matches(tiktok_urls, video_id, video_title)
- # TODO: END: Move Embeds
-
self._downloader.write_debug('Looking for embeds')
embeds = []
for ie in gen_extractor_classes():
return {
**info_dict,
'_type': 'url',
- 'ie_key': JWPlatformIE.ie_key(),
+ 'ie_key': 'JWPlatform',
'url': jwplayer_data['playlist'],
}
try:
entry_info_dict = {
'id': video_id,
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
+ 'uploader': domain_name,
+ 'title': info_dict['title'],
+ 'age_limit': info_dict['age_limit'],
'http_headers': headers,
}
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
+ _EMBED_REGEX = [rf'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
'only_matching': True
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>%s)' % GfycatIE._VALID_URL,
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
return cls._smuggle_origin_url(f'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url)
@classmethod
- def _extract_urls(cls, webpage, origin_url):
+ def _extract_embed_urls(cls, url, webpage):
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
quot_re = r'["\']'
(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
)(?P=q)'''
for mobj in re.finditer(regex, webpage):
- url = unescapeHTML(mobj.group('url'))
- if cls.suitable(url):
- yield cls._smuggle_origin_url(url, origin_url)
+ embed_url = unescapeHTML(mobj.group('url'))
+ if cls.suitable(embed_url):
+ yield cls._smuggle_origin_url(embed_url, url)
regex = fr'''(?x)
<glomex-player [^>]+?>|
for mobj in re.finditer(regex, webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
- yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
+ yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], url)
# naive parsing of inline scripts for hard-coded integration parameters
regex = fr'''(?x)
continue
playlist_id = re.search(regex % 'playlistId', script)
if playlist_id:
- yield cls.build_player_url(playlist_id, integration_id, origin_url)
+ yield cls.build_player_url(playlist_id, integration_id, url)
def _real_extract(self, url):
url, origin_url = self._unsmuggle_origin_url(url)
_caption_formats_ext = []
_captions_xml = None
- @staticmethod
- def _extract_url(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
webpage)
if mobj:
- return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+ yield 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _download_subtitles_xml(self, video_id, subtitles_id, hl):
if self._captions_xml:
if kaltura_id:
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
- yt_urls = YoutubeIE._extract_urls(webpage)
+ yt_urls = YoutubeIE._extract_embed_urls(url, webpage)
if yt_urls:
return self.playlist_from_matches(
yt_urls, video_id, title, ie=YoutubeIE.ie_key())
HPLEmbedPlayer/\?segmentId=
)
(?P<id>[0-9a-f]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1']
_TEST = {
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
class IndavideoEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
+ # Some example URLs covered by generic extractor:
+ # http://indavideo.hu/video/Vicces_cica_1
+ # http://index.indavideo.hu/video/2015_0728_beregszasz
+ # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+ # http://erotika.indavideo.hu/video/Amator_tini_punci
+ # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+ # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
_TESTS = [{
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
'only_matching': True,
}]
- # Some example URLs covered by generic extractor:
- # http://indavideo.hu/video/Vicces_cica_1
- # http://index.indavideo.hu/video/2015_0728_beregszasz
- # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
- # http://erotika.indavideo.hu/video/Amator_tini_punci
- # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
- # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
class InstagramIE(InstagramBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'only_matching': True,
}]
- @staticmethod
- def _extract_embed_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
- webpage)
- if mobj:
- return mobj.group('url')
-
- blockquote_el = get_element_by_attribute(
- 'class', 'instagram-media', webpage)
- if blockquote_el is None:
- return
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ res = tuple(super()._extract_embed_urls(url, webpage))
+ if res:
+ return res
- mobj = re.search(
- r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
+ mobj = re.search(r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1',
+ get_element_by_attribute('class', 'instagram-media', webpage) or '')
if mobj:
- return mobj.group('link')
+ return [mobj.group('link')]
def _real_extract(self, url):
video_id, url = self._match_valid_url(url).group('id', 'url')
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1']
_GEO_BYPASS = False
_GEO_COUNTRIES = ['RU']
_LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
)
(?P<id>[^/?#^]+)
'''
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
'info_dict': {
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- urls = JWPlatformIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
# <input value=URL> is used by hyland.com
# if we find <iframe>, dont look for <input>
}
]
- @staticmethod
- def _extract_url(webpage):
- urls = KalturaIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
finditer = (
list(re.finditer(
for k, v in embed_info.items():
if v:
embed_info[k] = v.strip()
- url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+ embed_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_mobj = re.search(
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
if service_mobj:
- url = smuggle_url(url, {'service_url': service_mobj.group('id')})
- urls.append(url)
+ embed_url = smuggle_url(embed_url, {'service_url': service_mobj.group('id')})
+ urls.append(embed_url)
return urls
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_str,
parse_iso8601,
strip_or_none,
try_get,
- unescapeHTML,
- urljoin,
)
vine|
youtube-(?:list|video)
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
'only_matching': True,
'youtube-video': ('youtube.com/embed/', 'Youtube'),
}
- @staticmethod
- def _extract_urls(webpage, url):
- return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
- r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
- webpage)]
-
def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).groups()
class LibsynIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1']
_TESTS = [{
'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
@classmethod
- def _extract_urls(cls, webpage, source_url):
+ def _extract_embed_urls(cls, url, webpage):
lm = {
'Media': 'media',
'Channel': 'channel',
}
def smuggle(url):
- return smuggle_url(url, {'source_url': source_url})
+ return smuggle_url(url, {'source_url': url})
entries = []
for kind, video_id in re.findall(
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
_VALID_URL = r'https?://(?:new\.)?livestream\.com/(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))/(?:events/(?P<event_id>\d+)|(?P<event_name>[^/]+))(?:/videos/(?P<id>\d+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
+
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
class MainStreamingIE(InfoExtractor):
_VALID_URL = r'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
IE_DESC = 'MainStreaming Player'
_TESTS = [
}
]
- @staticmethod
- def _extract_urls(webpage):
- mobj = re.findall(
- r'<iframe[^>]+?src=["\']?(?P<url>%s)["\']?' % MainStreamingIE._VALID_URL, webpage)
- if mobj:
- return [group[0] for group in mobj]
-
def _playlist_entries(self, host, playlist_content):
for entry in playlist_content:
content_id = entry.get('contentID')
compat_b64decode,
compat_urllib_parse_unquote,
)
-from ..utils import int_or_none
+from ..utils import classproperty, int_or_none
class MangomoloBaseIE(InfoExtractor):
- _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+ _BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+ _SLUG = None
+
+ @classproperty
+ def _VALID_URL(cls):
+ return f'{cls._BASE_REGEX}{cls._SLUG}'
+
+ @classproperty
+ def _EMBED_REGEX(cls):
+ return [rf'<iframe[^>]+src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1']
+
+ def _extract_from_webpage(self, url, webpage):
+ for res in super()._extract_from_webpage(url, webpage):
+ yield {
+ **res,
+ '_type': 'url_transparent',
+ 'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'),
+ 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+ }
def _get_real_id(self, page_id):
return page_id
class MangomoloVideoIE(MangomoloBaseIE):
_TYPE = 'video'
IE_NAME = 'mangomolo:' + _TYPE
- _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
+ _SLUG = r'video\?.*?\bid=(?P<id>\d+)'
+
_IS_LIVE = False
class MangomoloLiveIE(MangomoloBaseIE):
_TYPE = 'live'
IE_NAME = 'mangomolo:' + _TYPE
- _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+ _SLUG = r'(?:live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_IS_LIVE = True
def _get_real_id(self, page_id):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
entries = []
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
mychannels_id = extract_attributes(element).get('data-mychannels-id')
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(ie, webpage):
+ def _extract_from_webpage(self, url, webpage):
def _qs(url):
return parse_qs(url)
video_id = embed_qs.get('id', [None])[0]
if not video_id:
continue
- urlh = ie._request_webpage(
- embed_url, video_id, note='Following embed URL redirect')
+ urlh = self._request_webpage(embed_url, video_id, note='Following embed URL redirect')
embed_url = urlh.geturl()
program_guid = _program_guid(_qs(embed_url))
if program_guid:
str_or_none,
try_call,
try_get,
- unescapeHTML,
+ smuggle_url,
unsmuggle_url,
url_or_none,
urljoin,
class MediasiteIE(InfoExtractor):
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/[^/#?]+/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
+ _EMBED_REGEX = [r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE]
_TESTS = [
{
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
5: 'video3',
}
- @staticmethod
- def _extract_urls(webpage):
- return [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(
- r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
- webpage)]
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(embed_url, {'UrlReferrer': url})
def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
slide_base_url = Stream['SlideBaseUrl']
-import re
-
from .common import InfoExtractor
from ..utils import js_to_json
IE_NAME = 'megaphone.fm'
IE_DESC = 'megaphone.fm embedded players'
_VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
_TEST = {
'url': 'https://player.megaphone.fm/GLT9749789991?"',
'md5': '4816a0de523eb3e972dc0dda2c191f96',
'duration': episode_data['duration'],
'formats': formats,
}
-
- @classmethod
- def _extract_urls(cls, webpage):
- return [m[0] for m in re.findall(
- r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]
IE_NAME = 'megatvcom:embed'
IE_DESC = 'megatv.com embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.)?megatv\.com/embed/?\?p=(?P<id>\d+)'
- _EMBED_RE = re.compile(rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''')
+ _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
_TESTS = [{
'url': 'https://www.megatv.com/embed/?p=2020520979',
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- for mobj in cls._EMBED_RE.finditer(webpage):
- yield unescapeHTML(mobj.group('url'))
-
def _match_canonical_url(self, webpage):
LINK_RE = r'''(?x)
<link(?:
(?P<id>\d+)
)
'''
+ _EMBED_REGEX = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
+ ]
_TESTS = [
{
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
class MofosexEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)']
_TESTS = [{
'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvservices:embedded'
_VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1']
_TEST = {
# From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
},
}
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _get_feed_url(self, uri, url=None):
video_id = self._id_from_uri(uri)
config = self._download_json(
-import re
-
from .common import InfoExtractor
from .vimple import SprutoBaseIE
)
(?P<id>[\da-zA-Z_-]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1']
_TESTS = [{
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
'only_matching': True,
}]
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
class NBCSportsVPlayerIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+ _EMBED_REGEX = [r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE]
_TESTS = [{
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- video_urls = re.search(
- r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
- if video_urls:
- return video_urls.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
class NBCNewsIE(ThePlatformIE):
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1']
_TESTS = [
{
webpage)
return mobj.group('id') if mobj else None
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
return entries
- @staticmethod
- def _extract_url(webpage):
- return NexxIE._extract_urls(webpage)[0]
-
def _handle_error(self, response):
if traverse_obj(response, ('metadata', 'notice'), expected_type=str):
self.report_warning('%s said: %s' % (self.IE_NAME, response['metadata']['notice']))
class NexxEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
+ # Reference. https://nx-s.akamaized.net/files/201510/44.pdf
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
'md5': '16746bfc28c42049492385c989b26c4a',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- # Reference:
- # 1. https://nx-s.akamaized.net/files/201510/44.pdf
-
- # iFrame Embed Integration
- return [mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
- webpage)]
-
def _real_extract(self, url):
embed_id = self._match_id(url)
class NYTimesIE(NYTimesBaseIE):
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
_TESTS = [{
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
)
(?P<id>[\d-]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
_TESTS = [{
'note': 'Coub embedded',
'url': 'http://ok.ru/video/1484130554189',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
try:
return self._extract_desktop(url)
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import js_to_json
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+ _EMBED_REGEX = [r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1']
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
determine_ext,
float_or_none,
int_or_none,
+ smuggle_url,
try_get,
unsmuggle_url,
)
}
]
+ def _extract_from_webpage(self, url, webpage):
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ yield self._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
+ return
+
+ # Look for multiple Ooyala embeds on SBN network websites
+ mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+ if mobj is not None:
+ for v in self._parse_json(mobj.group(1), self._generic_id(url), fatal=False) or []:
+ yield self._build_url_result(smuggle_url(v['provider_video_id'], {'domain': url}))
+
@staticmethod
def _url_for_embed_code(embed_code):
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
-import re
import calendar
import json
import functools
def _parse_fragment(url):
return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()}
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=["\'](?P<url>%s/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)' % PanoptoIE.BASE_URL_RE,
- webpage)]
-
class PanoptoIE(PanoptoBaseIE):
_VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)']
_TESTS = [
{
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
)
(?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE)
+ _EMBED_REGEX = [r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})''']
_TESTS = [{
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
'md5': '8563064d245a4be5705bddb22bb00a28',
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
return 'peertube:%s:%s' % mobj.group('host', 'id')
- @staticmethod
- def _extract_urls(webpage, source_url):
- entries = re.findall(
- r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
- % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
- if not entries:
- peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
- if peertube_url:
- entries = [peertube_url]
- return entries
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ embeds = tuple(super()._extract_embed_urls(url, webpage))
+ if embeds:
+ return embeds
+
+ peertube_url = cls._extract_peertube_url(webpage, url)
+ if peertube_url:
+ return [peertube_url]
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
return self._download_json(
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1']
# Alive example URLs can be found here https://www.periscope.tv/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
token = self._match_id(url)
)\.jp|
vidego\.baltimorecity\.gov
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)']
_TESTS = [
{
'url': 'http://player.piksel.com/v/ums2867l',
}
]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _call_api(self, app_token, resource, display_id, query, fatal=True):
response = (self._download_json(
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1']
_TESTS = [{
'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282',
'info_dict': {
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
class PlaywireIE(InfoExtractor):
_VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1']
+
_TESTS = [{
'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
'md5': 'e6398701e3595888125729eaa2329ed9',
)
(?P<id>[\da-z]+)
''' % PornHubBaseIE._PORNHUB_HOST_RE
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
- webpage)
-
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
(?:gazzanet\.)?gazzetta
)\.it)
/video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])
+ (?P<url>(?:https?:)?//video\.
+ (?:
+ rcs|
+ (?:corriere\w+\.)?corriere|
+ (?:gazzanet\.)?gazzetta
+ )
+ \.it/video-embed/.+?)
+ \1''']
_TESTS = [{
'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
'md5': '623ecc8ffe7299b2d0c1046d8331a9df',
urls[i] = urljoin(base_url(e), url_basename(e))
return urls
- @staticmethod
- def _extract_urls(webpage):
- entries = [
- mobj.group('url')
- for mobj in re.finditer(r'''(?x)
- (?:
- data-frame-src=|
- <iframe[^\n]+src=
- )
- (["'])
- (?P<url>(?:https?:)?//video\.
- (?:
- rcs|
- (?:corriere\w+\.)?corriere|
- (?:gazzanet\.)?gazzetta
- )
- \.it/video-embed/.+?)
- \1''', webpage)]
- return RCSEmbedsIE._sanitize_urls(entries)
-
- @staticmethod
- def _extract_url(webpage):
- urls = RCSEmbedsIE._extract_urls(webpage)
- return urls[0] if urls else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage)))
class RCSIE(RCSBaseIE):
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
_TESTS = [{
'url': 'https://www.redtube.com/38864951',
'md5': '4fba70cbca3aefd25767ab4b523c9878',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
class RtlNlIE(InfoExtractor):
IE_NAME = 'rtl.nl'
IE_DESC = 'rtl.nl and rtlxl.nl'
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)']
_VALID_URL = r'''(?x)
https?://(?:(?:www|static)\.)?
(?:
class RumbleEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
+ _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://rumble.com/embed/v5pv5f',
'md5': '36a18a049856720189f30977ccbb2c34',
}]
@classmethod
- def _extract_urls(cls, webpage):
- embeds = tuple(re.finditer(
- fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{cls._VALID_URL})', webpage))
+ def _extract_embed_urls(cls, url, webpage):
+ embeds = tuple(super()._extract_embed_urls(url, webpage))
if embeds:
- return [mobj.group('url') for mobj in embeds]
+ return embeds
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
-import re
import itertools
from .common import InfoExtractor
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1']
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
def suitable(cls, url):
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
- @staticmethod
- def _extract_urls(webpage):
- return [mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_and_extract_info(video_id)
)
(?P<id>\d+)
'''
+ _EMBED_URLS = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1',
+ r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
+ ]
_TESTS = [
{
},
]
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
- mobj = re.search(
- r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
_API_BASE = 'https://gatling.nelonenmedia.fi'
@classmethod
- def _extract_urls(cls, webpage):
+ def _extract_embed_urls(cls, url, webpage):
# nelonen.fi
settings = try_call(
lambda: json.loads(re.search(
.*?\bplay=|/watch/
)|news/(?:embeds/)?video/
)(?P<id>[0-9]+)'''
+ _EMBED_REGEX = [r'''(?x)]
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''']
_TESTS = [{
# Original URL is handled by the generic IE which finds the iframe:
class SenateISVPIE(InfoExtractor):
_IE_NAME = 'senate.gov:isvp'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
+ _EMBED_REGEX = [r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]"]
_TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
'only_matching': True,
}]
- @staticmethod
- def _search_iframe_url(webpage):
- mobj = re.search(
- r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
@classmethod
- def _extract_url(cls, webpage):
+ def _extract_embed_urls(cls, url, webpage):
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
\1>''', webpage)
if mobj:
sc = mobj.group('SC')
- return cls._URL_TEMPLATE % sc
+ yield cls._URL_TEMPLATE % sc
def _real_extract(self, url):
playlist_id = self._match_id(url)
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_str,
class SeznamZpravyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1']
_TESTS = [{
'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy',
'info_dict': {
},
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url') for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1',
- webpage)]
-
def _extract_sdn_formats(self, sdn_url, video_id):
sdn_data = self._download_json(sdn_url, video_id)
return self.playlist_result([
self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
- for entry_url in SeznamZpravyIE._extract_urls(webpage)],
+ for entry_url in SeznamZpravyIE._extract_embed_urls(url, webpage)],
article_id, title, description)
--- /dev/null
+from .common import InfoExtractor
+
+
+class ShareVideosEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1']
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
class SimplecastIE(SimplecastBaseIE):
IE_NAME = 'simplecast'
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
+ _EMBED_REGEX = [rf'''(?x)<iframe[^>]+src=["\']
+ (?P<url>https?://(?:
+ embed\.simplecast\.com/[0-9a-f]{8}|
+ player\.simplecast\.com/{SimplecastBaseIE._UUID_REGEX}
+ ))''']
_COMMON_TEST_INFO = {
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'''(?x)<iframe[^>]+src=["\']
- (
- https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
- player\.simplecast\.com/%s
- ))''' % SimplecastBaseIE._UUID_REGEX, webpage)
-
def _real_extract(self, url):
episode_id = self._match_id(url)
episode = self._call_api('episodes/%s', episode_id)
class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1']
_TEST = {
# from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
'only_matching': True,
}
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
- webpage)]
-
def _real_extract(self, url):
query = parse_qs(url)
api_url = query['url'][0]
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)']
_TESTS = [{
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
class SportBoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"']
_TESTS = [{
'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
'info_dict': {
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
}
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://open\.spotify.com/embed/[^"]+)"']
def _real_initialize(self):
self._ACCESS_TOKEN = self._download_json(
'series': series,
}
- @classmethod
- def _extract_urls(cls, webpage):
- return re.findall(
- r'<iframe[^>]+src="(https?://open\.spotify.com/embed/[^"]+)"',
- webpage)
-
class SpotifyIE(SpotifyBaseIE):
IE_NAME = 'spotify'
xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
)
'''
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1']
_TESTS = [{
'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
'md5': '5c3cb7b5c55740d482561099e920f192',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
class StreamableIE(InfoExtractor):
_VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//streamable\.com/.+?)(?P=q1)']
_TESTS = [
{
'url': 'https://streamable.com/dnd1',
}
]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
- webpage)
- if mobj:
- return mobj.group('src')
-
def _real_extract(self, url):
video_id = self._match_id(url)
}]
@classmethod
- def _extract_url(cls, webpage, url):
+ def _extract_embed_urls(cls, url, webpage):
if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
return
mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
if mobj:
parsed = urllib.parse.urlparse(url)
- return parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
+ yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
+ raise cls.StopExtraction()
def _extract_video_formats(self, video_id, username):
formats, subtitles = [], {}
class SVTIE(SVTBaseIE):
_VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+ _EMBED_REGEX = [r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % _VALID_URL]
_TEST = {
'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
},
}
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
widget_id = mobj.group('widget_id')
r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
webpage)
- @staticmethod
- def _extract_url(webpage, source_url):
- if not TeachableIE._is_teachable(webpage):
- return
- if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
- return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ if cls._is_teachable(webpage):
+ if re.match(r'https?://[^/]+/(?:courses|p)', url):
+ yield f'{cls._URL_PREFIX}{url}'
+ raise cls.StopExtraction()
def _real_extract(self, url):
mobj = self._match_valid_url(url)
webpage = self._download_webpage(url, video_id)
- wistia_urls = WistiaIE._extract_urls(webpage)
+ wistia_urls = WistiaIE._extract_embed_urls(url, webpage)
if not wistia_urls:
if any(re.search(p, webpage) for p in (
r'class=["\']lecture-contents-locked',
class TedEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed(?:-ssl)?\.ted\.com/'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL}.+?)\1']
_TESTS = [{
'url': 'https://embed.ted.com/talks/janet_stovall_how_to_get_serious_about_diversity_and_inclusion_in_the_workplace',
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- return [mobj.group('url') for mobj in re.finditer(
- fr'<iframe[^>]+?src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1', webpage)]
-
def _real_extract(self, url):
return self.url_result(re.sub(r'://embed(-ssl)?', '://www', url), TedTalkIE.ie_key())
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
+ _EMBED_REGEX = [
+ r'''(?x)
+ <meta\s+
+ property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
+ content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''',
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1'
+ ]
_TESTS = [{
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
}]
@classmethod
- def _extract_urls(cls, webpage):
- m = re.search(
- r'''(?x)
- <meta\s+
- property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
- content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
- ''', webpage)
- if m:
- return [m.group('url')]
-
+ def _extract_embed_urls(cls, url, webpage):
# Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044
- matches = re.findall(
- r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
- if matches:
- return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield re.sub(r'\s', '', embed_url)
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
-import re
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
IE_NAME = '3qsdn'
IE_DESC = '3Q SDN'
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % _VALID_URL]
_TESTS = [{
# https://player.3qsdn.com/demo.html
'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
- if mobj:
- return mobj.group('url')
+ def _extract_from_webpage(self, url, webpage):
+ for res in super()._extract_from_webpage(url, webpage):
+ yield {
+ **res,
+ '_type': 'url_transparent',
+ 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
import itertools
import json
import random
-import re
import string
import time
class TikTokIE(TikTokBaseIE):
_VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
+ _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
'only_matching': True
}]
- @classmethod
- def _extract_urls(cls, webpage):
- return [mobj.group('url') for mobj in re.finditer(
- rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
-
def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://player\.(?:tna|emp)flix\.com/video/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1']
_TITLE_REGEX = r'<title>([^<]+)</title>'
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
- webpage)]
-
class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
_DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
class Tube8IE(KeezMoviesIE):
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)']
_TESTS = [{
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
'md5': '65e20c48e6abff62ed0c3965fff13a39',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
- webpage)
-
def _real_extract(self, url):
webpage, info = self._extract_info(url)
class TuneInBaseIE(InfoExtractor):
_API_BASE_URL = 'http://tunein.com/tuner/tune/'
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)',
- webpage)
-
def _real_extract(self, url):
content_id = self._match_id(url)
class TuneInStationIE(TuneInBaseIE):
IE_NAME = 'tunein:station'
_VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId=|embed/player/s)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)']
_API_URL_QUERY = '?tuneType=Station&stationId=%s'
@classmethod
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
class TVCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1']
_TEST = {
'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
},
}
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1']
_GEO_BYPASS = False
_GEO_COUNTRIES = ['RU']
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
get_elements_text_and_html_by_attribute,
scale_thumbnails_to_max_format_width,
- unescapeHTML,
)
IE_NAME = 'tvopengr:embed'
IE_DESC = 'tvopen.gr embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.|cdn\.|)(?:tvopen|ethnos).gr/embed/(?P<id>\d+)'
- _EMBED_RE = re.compile(rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''')
+ _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
_TESTS = [{
'url': 'https://cdn.ethnos.gr/embed/100963',
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- for mobj in cls._EMBED_RE.finditer(webpage):
- yield unescapeHTML(mobj.group('url'))
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self._return_canonical_url(url, video_id)
=)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL[4:]})']
_TESTS = [{
'url': 'tvp:194536',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage, **kw):
- return [m.group('embed') for m in re.finditer(
- r'(?x)<iframe[^>]+?src=(["\'])(?P<embed>%s)' % TVPEmbedIE._VALID_URL[4:],
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1']
_TESTS = [{
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
'md5': 'e7264320db31eed8c38364150c12496e',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
IE_DESC = '聯合影音'
_PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
_VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % _PROTOCOL_RELATIVE_VALID_URL]
_TESTS = [{
'url': 'http://video.udn.com/embed/news/300040',
'info_dict': {
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
IE_NAME = 'ustream'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1']
_TESTS = [{
'url': 'http://www.ustream.tv/recorded/20274954',
'md5': '088f151799e8f572f84eb62f17d73e5c',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
- if mobj is not None:
- return mobj.group('url')
-
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
def num_to_hex(n):
return hex(n)[2:]
-import re
-
from .common import InfoExtractor
from ..utils import ExtractorError
)
(?P<id>[\da-fA-F]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
_GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
vevo:)
(?P<id>[^&?#]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
_TESTS = [{
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
import hashlib
import json
import random
-import re
import time
from .adobepass import AdobePassIE
class ViceIE(ViceBaseIE, AdobePassIE):
IE_NAME = 'vice'
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
_TESTS = [{
'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
'info_dict': {
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
- webpage)
-
- @staticmethod
- def _extract_url(webpage):
- urls = ViceIE._extract_urls(webpage)
- return urls[0] if urls else None
-
def _real_extract(self, url):
locale, video_id = self._match_valid_url(url).groups()
class ViddlerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
+ _EMBED_REGEX = [r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1']
+
_TESTS = [{
'url': 'http://www.viddler.com/v/43903784',
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
import random
-import re
import string
import struct
)
(?P<id>[^?#&]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
_TESTS = [{
'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
}]
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
- webpage)]
-
@staticmethod
def rc4(cipher_text, key):
res = b''
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_str,
(?P<id>\d+)
(?:[/?#&]|\.(?:xml|json)|$)
'''
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ <iframe[^>]+src=([\'"])|
+ <object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=
+ )(?P<url>https?://videomore\.ru/[^?#"']+/\d+(?:\.xml)?)
+ ''']
_TESTS = [{
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
'md5': '44455a346edc0d509ac5b5a5b531dc35',
}]
_GEO_BYPASS = False
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
- webpage)
- if not mobj:
- mobj = re.search(
- r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
- webpage)
-
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('sid') or mobj.group('id')
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
_ID_REGEX = r'[\da-zA-Z]{8}'
_PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
_VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>(?:https?://)?{_PATH_REGEX}{_ID_REGEX})']
_TESTS = [{
'url': 'https://videopress.com/embed/kUJmAcSf',
'md5': '706956a6c875873d51010921310e4bc6',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
import json
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
class ViewLiftEmbedIE(ViewLiftBaseIE):
IE_NAME = 'viewlift:embed'
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX]
_TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93',
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
domain, film_id = self._match_valid_url(url).groups()
site = domain.split('.')[-2]
unsmuggle_url,
urlencode_postdata,
urljoin,
- unescapeHTML,
urlhandle_detect_ext,
)
/?(?:[?&].*)?(?:[#].*)?$
'''
IE_NAME = 'vimeo'
+ _EMBED_REGEX = [
+ # iframe
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
+ # Embedded (swf embed) Vimeo player
+ r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+ # Non-standard embedded Vimeo player
+ r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+ ]
_TESTS = [
{
'url': 'http://vimeo.com/56015672#at=0',
# vimeo embed with check-password page protected by Referer header
]
- @staticmethod
- def _extract_urls(url, webpage):
- urls = []
- # Look for embedded (iframe) Vimeo player
- for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
- webpage):
- urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
- PLAIN_EMBED_RE = (
- # Look for embedded (swf embed) Vimeo player
- r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
- # Look more for non-standard embedded Vimeo player
- r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
- )
- for embed_re in PLAIN_EMBED_RE:
- for mobj in re.finditer(embed_re, webpage):
- urls.append(mobj.group('url'))
- return urls
-
- @staticmethod
- def _extract_url(url, webpage):
- urls = VimeoIE._extract_urls(url, webpage)
- return urls[0] if urls else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield cls._smuggle_referrer(embed_url, url)
def _verify_player_video_password(self, url, video_id, headers):
password = self._get_video_password()
class VHXEmbedIE(VimeoBaseInfoExtractor):
IE_NAME = 'vhx:embed'
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://embed\.vhx\.tv/videos/\d+[^"]*)"']
- @staticmethod
- def _extract_url(url, webpage):
- mobj = re.search(
- r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
- return VimeoIE._smuggle_referrer(unescapeHTML(mobj.group(1)), url) if mobj else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield cls._smuggle_referrer(embed_url, url)
def _real_extract(self, url):
video_id = self._match_id(url)
class VineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))']
_TESTS = [{
'url': 'https://vine.co/v/b9KOOWX7HUx',
'md5': '2f36fed6235b16da96ce9b4dc890940d',
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
)
(?P<id>[\da-f]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1']
_TESTS = [{
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
'md5': 'a169dd1a6426b350dca4296226f21e76',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
class VKIE(VKBaseIE):
IE_NAME = 'vk'
IE_DESC = 'VK'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
_VALID_URL = r'''(?x)
https?://
(?:
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
)
'''
+ # https://help.sibnet.ru/?sibnet_video_embed
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1']
_TESTS = [
{
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'only_matching': True,
}]
- @staticmethod
- def _extract_sibnet_urls(webpage):
- # https://help.sibnet.ru/?sibnet_video_embed
- return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('videoid')
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
- dailymotion_urls = DailymotionIE._extract_urls(info_page)
+ dailymotion_urls = DailymotionIE._extract_embed_urls(url, info_page)
if dailymotion_urls:
return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
- sibnet_urls = self._extract_sibnet_urls(info_page)
+ sibnet_urls = self._extract_embed_urls(url, info_page)
if sibnet_urls:
return self.url_result(sibnet_urls[0])
class VODPlatformIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1']
_TESTS = [{
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
class VoxMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src="(?P<url>https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"']
_TESTS = [{
# Volume embed, Youtube
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
-import re
-
from .common import InfoExtractor
from ..utils import ExtractorError, decode_packed_codes
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
- webpage)
-
def _extract_packed(self, webpage):
packed = self._search_regex(
r'(eval\(function.+)', webpage, 'packed code')
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)']
_TESTS = [{
# HTTP and HLS
'url': 'https://vzaar.com/videos/1152805',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})']
_TESTS = [{
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
'only_matching': True,
}]
- @classmethod
- def _extract_urls(cls, webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
class WebcasterFeedIE(InfoExtractor):
_VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
+ _EMBED_REGEX = [r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)']
_TEST = {
'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
'only_matching': True,
}
- @staticmethod
- def _extract_url(ie, webpage):
- mobj = re.search(
- r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)',
- webpage)
- if mobj:
- return mobj.group('url')
+ def _extract_from_webpage(self, url, webpage):
+ yield from super()._extract_from_webpage(url, webpage)
+
for secure in (True, False):
- video_url = ie._og_search_video_url(
- webpage, secure=secure, default=None)
+ video_url = self._og_search_video_url(webpage, secure=secure, default=None)
if video_url:
mobj = re.search(
r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
video_url)
if mobj:
- return mobj.group('url')
+ yield self.url_result(mobj.group('url'), self)
def _real_extract(self, url):
video_id = self._match_id(url)
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
(?P<type>vod|live|cast)[=/]
(?P<id>%s).*?)''' % _UUID_RE
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})']
_TESTS = [{
# vod stream
'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe[^>]+src=["\'](?P<url>%s)' % WimTVIE._VALID_URL,
- webpage)]
-
def _real_initialize(self):
if not self._player:
self._get_player_data()
ExtractorError,
float_or_none,
int_or_none,
+ try_call,
try_get,
- unescapeHTML,
)
class WistiaIE(WistiaBaseIE):
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
-
+ _EMBED_REGEX = [r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})']
_TESTS = [{
# with hls video
'url': 'wistia:807fafadvk',
}]
# https://wistia.com/support/embed-and-share/video-on-your-website
- @staticmethod
- def _extract_url(webpage):
- urls = WistiaIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- for match in re.finditer(
- r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
- urls.append(unescapeHTML(match.group('url')))
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ urls = list(super()._extract_embed_urls(url, webpage))
+
for match in re.finditer(
r'''(?sx)
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
urls.append('wistia:%s' % match.group('id'))
return urls
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ from .teachable import TeachableIE
+
+ if list(TeachableIE._extract_embed_urls(url, webpage)):
+ return
+
+ for entry in super()._extract_from_webpage(url, webpage):
+ yield {
+ **entry,
+ '_type': 'url_transparent',
+ 'uploader': try_call(lambda: re.match(r'(?:https?://)?([^/]+)/', url).group(1)),
+ }
+
def _real_extract(self, url):
video_id = self._match_id(url)
embed_config = self._download_embed_config('media', video_id, url)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
% '|'.join(site for site in list(zip(*_SITES))[0]))
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
_FILE_NOT_FOUND_REGEXES = (
r'>(?:404 - )?File Not Found<',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
- % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
- webpage)]
-
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': {
}
}
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1']
+
_TESTS = [{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'info_dict': {
if items.get('markup'):
entries.extend(
- self.url_result(yt_url) for yt_url in YoutubeIE._extract_urls(items['markup']))
+ self.url_result(yt_url) for yt_url in YoutubeIE._extract_embed_urls(url, items['markup']))
return self.playlist_result(
entries, item.get('uuid'),
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
qualities,
- unescapeHTML,
url_or_none,
)
class YapFilesIE(InfoExtractor):
_YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
_VALID_URL = r'https?:%s' % _YAPFILES_URL
+ _EMBED_REGEX = [rf'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_YAPFILES_URL}.*?)\1']
_TESTS = [{
# with hd
'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
- % YapFilesIE._YAPFILES_URL, webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
class YouPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
- webpage)
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
(?:\#|$)""" % {
'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
}
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''']
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
url, video_id, f'Marking {label}watched',
'Unable to mark watched', fatal=False)
- @staticmethod
- def _extract_urls(webpage):
- # Embedded YouTube player
- entries = [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
- \1''', webpage)]
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ # Invidious Instances
+ # https://github.com/yt-dlp/yt-dlp/issues/195
+ # https://github.com/iv-org/invidious/pull/1730
+ mobj = re.search(
+ r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
+ webpage)
+ if mobj:
+ yield cls.url_result(mobj.group('url'), cls)
+ raise cls.StopExtraction()
+
+ yield from super()._extract_from_webpage(url, webpage)
# lazyYT YouTube embed
- entries.extend(list(map(
- unescapeHTML,
- re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+ for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
+ yield cls.url_result(unescapeHTML(id_), cls, id_)
# Wordpress "YouTube Video Importer" plugin
- matches = re.findall(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
- data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
- entries.extend(m[-1] for m in matches)
-
- return entries
-
- @staticmethod
- def _extract_url(webpage):
- urls = YoutubeIE._extract_urls(webpage)
- return urls[0] if urls else None
+ for m in re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
+ yield cls.url_result(m[-1], cls, m[-1])
@classmethod
def extract_id(cls, url):
class ZapiksIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"']
_TESTS = [
{
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
_ID_RE = r'[\da-fA-F]+'
_COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
_VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
+ _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_COMMON_RE % _ID_RE}.+?)\1']
_TEST = {
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
'md5': 'eaee31d474c76a955bdaba02a505c595',
},
}
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)