mimetype2ext,
orderedSet,
parse_duration,
+ parse_resolution,
sanitized_Request,
smuggle_url,
unescapeHTML,
from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
-from .senateisvp import SenateISVPIE
+from .senategov import SenateISVPIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .arte import ArteTVEmbedIE
from .videopress import VideoPressIE
from .rutube import RutubeIE
+from .glomex import GlomexEmbedIE
+from .megatvcom import MegaTVComEmbedIE
from .limelight import LimelightBaseIE
from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE
from .vshare import VShareIE
from .mediasite import MediasiteIE
from .springboardplatform import SpringboardPlatformIE
+from .ted import TedEmbedIE
from .yapfiles import YapFilesIE
from .vice import ViceIE
from .xfileshare import XFileShareIE
from .medialaan import MedialaanIE
from .simplecast import SimplecastIE
from .wimtv import WimTVIE
+from .tvopengr import TVOpenGrEmbedIE
+from .ertgr import ERTWebtvEmbedIE
from .tvp import TVPEmbedIE
from .blogger import BloggerIE
+from .mainstreaming import MainStreamingIE
+from .gfycat import GfycatIE
class GenericIE(InfoExtractor):
'duration': 45.115,
},
},
- # 5min embed
- {
- 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
- 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
- 'info_dict': {
- 'id': '518726732',
- 'ext': 'mp4',
- 'title': 'Facebook Creates "On This Day" | Crunch Report',
- 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
- 'timestamp': 1427237531,
- 'uploader': 'Crunch Report',
- 'upload_date': '20150324',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
# Crooks and Liars embed
{
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
},
'add_ie': [RutubeIE.ie_key()],
},
+ {
+ # glomex:embed
+ 'url': 'https://www.skai.gr/news/world/iatrikos-syllogos-tourkias-to-turkovac-aplo-dialyma-erntogan-eiste-apateones-kai-pseytes',
+ 'info_dict': {
+ 'id': 'v-ch2nkhcirwc9-sf',
+ 'ext': 'mp4',
+ 'title': 'md5:786e1e24e06c55993cee965ef853a0c1',
+ 'description': 'md5:8b517a61d577efe7e36fde72fd535995',
+ 'timestamp': 1641885019,
+ 'upload_date': '20220111',
+ 'duration': 460000,
+ 'thumbnail': 'https://i3thumbs.glomex.com/dC1idjJwdndiMjRzeGwvMjAyMi8wMS8xMS8wNy8xMF8zNV82MWRkMmQ2YmU5ZTgyLmpwZw==/profile:player-960x540',
+ },
+ },
+ {
+ # megatvcom:embed
+ 'url': 'https://www.in.gr/2021/12/18/greece/apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize/',
+ 'info_dict': {
+ 'id': 'apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize',
+ 'title': 'md5:5e569cf996ec111057c2764ec272848f',
+ },
+ 'playlist': [{
+ 'md5': '1afa26064ff00ccb91617957dbc73dc1',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '564916',
+ 'display_id': 'md5:6cdf22d3a2e7bacb274b7295089a1770',
+ 'title': 'md5:33b9dd39584685b62873043670eb52a6',
+ 'description': 'md5:c1db7310f390518ac36dd69d947ef1a1',
+ 'timestamp': 1639753145,
+ 'upload_date': '20211217',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/prezerakos-1024x597.jpg',
+ },
+ }, {
+ 'md5': '4a1c220695f1ef865a8b7966a53e2474',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '564905',
+ 'display_id': 'md5:ead15695e485e649aed2b81ebd699b88',
+ 'title': 'md5:2b71fd54249a3ca34609fe39ae31c47b',
+ 'description': 'md5:c42e12f638d0a97d6de4508e2c4df982',
+ 'timestamp': 1639753047,
+ 'upload_date': '20211217',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
+ },
+ }]
+ },
+ {
+ 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
+ 'info_dict': {
+ 'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
+ 'ext': 'mp4',
+ 'title': 'md5:df64f5b61c06d0e9556c0cdd5cf14464',
+ 'thumbnail': 'https://www.ert.gr/themata/photos/2021/20220114-apotis6-gouales-pita.jpg',
+ },
+ },
{
# ThePlatform embedded with whitespaces in URLs
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
'skip_download': True,
},
},
+ {
+ # tvopengr:embed
+ 'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
+ 'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
+ 'info_dict': {
+ 'id': '101119',
+ 'ext': 'mp4',
+ 'display_id': 'oikarpoitondiapragmateyseonhparosias',
+ 'title': 'md5:b979f4d640c568617d6547035528a149',
+ 'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
+ 'timestamp': 1641772800,
+ 'upload_date': '20220110',
+ 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
+
+ }
+ },
{
# blogger embed
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
}
},
+ {
+ # KVS Player (for sites that serve kt_player.js via non-https urls)
+ 'url': 'http://www.camhub.world/embed/389508',
+ 'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
+ 'info_dict': {
+ 'id': '389508',
+ 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
+ 'ext': 'mp4',
+ 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
+ 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg',
+ }
+ },
{
# Reddit-hosted video that will redirect and be processed by RedditIE
# Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
'timestamp': 1636788683.0,
'upload_date': '20211113'
}
+ },
+ {
+ # MainStreaming player
+ 'url': 'https://www.lactv.it/2021/10/03/lac-news24-la-settimana-03-10-2021/',
+ 'info_dict': {
+ 'id': 'EUlZfGWkGpOd',
+ 'title': 'La Settimana ',
+ 'description': '03 Ottobre ore 02:00',
+ 'ext': 'mp4',
+ 'live_status': 'not_live',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ 'duration': 1512
+ }
+ },
+ {
+ # Multiple gfycat iframe embeds
+ 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
+ 'info_dict': {
+ 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
+ 'id': 'board'
+ },
+ 'playlist_count': 8,
+ },
+ {
+ # Multiple gfycat gifs (direct links)
+ 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
+ 'info_dict': {
+ 'title': '옳게 된 크롭 니트 스테이씨 아이사',
+ 'id': 'board'
+ },
+ 'playlist_count': 6
+ },
+ {
+ # Multiple gfycat embeds, with uppercase "IFR" in urls
+ 'url': 'https://kkzz.kr/?vid=2295',
+ 'info_dict': {
+ 'title': '지방시 앰버서더 에스파 카리나 움짤',
+ 'id': '?vid=2295'
+ },
+ 'playlist_count': 9
}
- #
]
def report_following_redirect(self, new_url):
subtitles = {}
if format_id.endswith('mpegurl'):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
+ elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
+ formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id)
elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id)
else:
return self.url_result(mobj.group('url'), 'Tvigle')
# Look for embedded TED player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'TED')
+ ted_urls = TedEmbedIE._extract_urls(webpage)
+ if ted_urls:
+ return self.playlist_from_matches(ted_urls, video_id, video_title, ie=TedEmbedIE.ie_key())
# Look for embedded Ustream videos
ustream_url = UstreamIE._extract_url(webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
- # Look for 5min embeds
- mobj = re.search(
- r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
- if mobj is not None:
- return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
-
# Look for Crooks and Liars embeds
mobj = re.search(
r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
return self.playlist_from_matches(
rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
+ # Look for Glomex embeds
+ glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url))
+ if glomex_urls:
+ return self.playlist_from_matches(
+ glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key())
+
+ # Look for megatv.com embeds
+ megatvcom_urls = list(MegaTVComEmbedIE._extract_urls(webpage))
+ if megatvcom_urls:
+ return self.playlist_from_matches(
+ megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key())
+
# Look for WashingtonPost embeds
wapo_urls = WashingtonPostIE._extract_urls(webpage)
if wapo_urls:
return self.playlist_from_matches(
rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
+ # Look for (tvopen|ethnos).gr embeds
+ tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
+ if tvopengr_urls:
+ return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
+
+ # Look for ert.gr webtv embeds
+ ertwebtv_urls = list(ERTWebtvEmbedIE._extract_urls(webpage))
+ if len(ertwebtv_urls) == 1:
+ return self.url_result(self._proto_relative_url(ertwebtv_urls[0]), video_title=video_title, url_transparent=True)
+ elif ertwebtv_urls:
+ return self.playlist_from_matches(ertwebtv_urls, video_id, video_title, ie=ERTWebtvEmbedIE.ie_key())
+
tvp_urls = TVPEmbedIE._extract_urls(webpage)
if tvp_urls:
return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
+ # Look for MainStreaming embeds
+ mainstreaming_urls = MainStreamingIE._extract_urls(webpage)
+ if mainstreaming_urls:
+ return self.playlist_from_matches(mainstreaming_urls, video_id, video_title, ie=MainStreamingIE.ie_key())
+
+ # Look for Gfycat Embeds
+ gfycat_urls = GfycatIE._extract_urls(webpage)
+ if gfycat_urls:
+ return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(webpage, video_id, default={})
- if json_ld.get('url'):
+ if json_ld.get('url') not in (url, None):
self.report_detected('JSON LD')
+ if determine_ext(json_ld['url']) == 'm3u8':
+ json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
+ json_ld['url'], video_id, 'mp4')
+ json_ld.pop('url')
+ self._sort_formats(json_ld['formats'])
+ else:
+ json_ld['_type'] = 'url_transparent'
+ json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
return merge_dicts(json_ld, info_dict)
def check_video(vurl):
self.report_detected('JW Player embed')
if not found:
# Look for generic KVS player
- found = re.search(r'<script [^>]*?src="https://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
+ found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
if found:
self.report_detected('KWS Player')
if found.group('maj_ver') not in ['4', '5']:
protocol, _, _ = url.partition('/')
thumbnail = protocol + thumbnail
+ url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys()))
formats = []
- for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
- if key in flashvars and '/get_file/' in flashvars[key]:
- next_format = {
- 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
- 'format_id': flashvars.get(key + '_text', key),
- 'ext': 'mp4',
- }
- height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
- if height:
- next_format['height'] = int(height.group(1))
- else:
- next_format['quality'] = 1
- formats.append(next_format)
+ for key in url_keys:
+ if '/get_file/' not in flashvars[key]:
+ continue
+ format_id = flashvars.get(f'{key}_text', key)
+ formats.append({
+ 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
+ 'format_id': format_id,
+ 'ext': 'mp4',
+ **(parse_resolution(format_id) or parse_resolution(flashvars[key]))
+ })
+ if not formats[-1].get('height'):
+ formats[-1]['quality'] = 1
+
self._sort_formats(formats)
return {