]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/generic.py
[tvopengr] Add extractors (#2297)
[yt-dlp.git] / yt_dlp / extractor / generic.py
CommitLineData
dcdb292f 1# coding: utf-8
cfe50f04 2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
f7854627 12 compat_etree_fromstring,
4e363703 13 compat_str,
1ddb9456 14 compat_urllib_parse_unquote,
a5caba1e 15 compat_urlparse,
f7300c5c 16 compat_xml_parse_error,
8c25f81b
PH
17)
18from ..utils import (
b759a0d4 19 determine_ext,
9b122384 20 ExtractorError,
c8e9a235 21 float_or_none,
aa94a6d3 22 HEADRequest,
c76eb41b 23 int_or_none,
61ca9a80 24 is_html,
a4a554a7 25 js_to_json,
63d990d2 26 KNOWN_EXTENSIONS,
6cc62232 27 merge_dicts,
63d990d2 28 mimetype2ext,
ed2d6a19 29 orderedSet,
c76eb41b 30 parse_duration,
11c86170 31 parse_resolution,
5c2266df 32 sanitized_Request,
9d4660ca
PH
33 smuggle_url,
34 unescapeHTML,
29f7c58a 35 unified_timestamp,
4d54ef20 36 unsmuggle_url,
416c7fcb 37 UnsupportedError,
29f7c58a 38 url_or_none,
39 xpath_attr,
76c73715 40 xpath_text,
c76eb41b 41 xpath_with_ns,
9b122384 42)
b7a8c1bc 43from .commonprotocols import RtmpIE
ed126900 44from .brightcove import (
4fcaa4f4 45 BrightcoveLegacyIE,
5c17f0a6 46 BrightcoveNewIE,
ed126900 47)
3f59b015
S
48from .nexx import (
49 NexxIE,
50 NexxEmbedIE,
51)
a2edf2e7 52from .nbc import NBCSportsVPlayerIE
c0d0b01f 53from .ooyala import OoyalaIE
93d020dd 54from .rutv import RUTVIE
954c1d05 55from .tvc import TVCIE
476cf548 56from .sportbox import SportBoxIE
6dd94d3a 57from .myvi import MyviIE
1419fafd 58from .condenast import CondeNastIE
418c5cc3 59from .udn import UDNEmbedIE
909b0d66 60from .senategov import SenateISVPIE
bab19a8e 61from .svt import SVTIE
65d161c4 62from .pornhub import PornHubIE
2bb5b6d0 63from .xhamster import XHamsterEmbedIE
2c9ca782 64from .tnaflix import TNAFlixNetworkEmbedIE
37e7a71c 65from .drtuber import DrTuberIE
e28ed498 66from .redtube import RedTubeIE
06993715 67from .tube8 import Tube8IE
4e7b5bba 68from .mofosex import MofosexEmbedIE
8fae1a04 69from .spankwire import SpankwireIE
52c4c515 70from .youporn import YouPornIE
29f7c58a 71from .vimeo import (
72 VimeoIE,
73 VHXEmbedIE,
74)
3c4fbfec 75from .dailymotion import DailymotionIE
71a1db89 76from .dailymail import DailyMailIE
1ac1c4c2 77from .onionstudios import OnionStudiosIE
67167920 78from .viewlift import ViewLiftEmbedIE
46fde8a1 79from .mtv import MTVServicesEmbeddedIE
45dad7ba 80from .pladform import PladformIE
ff18735c 81from .videomore import VideomoreIE
83f1481b 82from .webcaster import WebcasterFeedIE
5b251628 83from .googledrive import GoogleDriveIE
7cb09524 84from .jwplatform import JWPlatformIE
aecfcd4e 85from .digiteka import DigitekaIE
1979969f 86from .arkena import ArkenaIE
5a51775a 87from .instagram import InstagramIE
5d39176f 88from .threeqsdn import ThreeQSDNIE
4d8819d2 89from .theplatform import ThePlatformIE
c287f2bc 90from .kaltura import KalturaIE
06a96da1 91from .eagleplatform import EaglePlatformIE
fd6ca382 92from .facebook import FacebookIE
548c3957 93from .soundcloud import SoundcloudEmbedIE
027e2312 94from .tunein import TuneInBaseIE
2a1321a2 95from .vbox7 import Vbox7IE
b0c8f2e9 96from .dbtv import DBTVIE
b1c35797 97from .piksel import PikselIE
e186a9ec 98from .videa import VideaIE
b687c85e 99from .twentymin import TwentyMinutenIE
d77ac737 100from .ustream import UstreamIE
8bdd16b4 101from .arte import ArteTVEmbedIE
6ef3e65a 102from .videopress import VideoPressIE
eb3079b6 103from .rutube import RutubeIE
71738b14 104from .glomex import GlomexEmbedIE
32b95bb6 105from .megatvcom import MegaTVComEmbedIE
e5d39886 106from .limelight import LimelightBaseIE
7986c3ab 107from .anvato import AnvatoIE
55719459 108from .washingtonpost import WashingtonPostIE
58bb4402 109from .wistia import WistiaIE
5d29af3d 110from .mediaset import MediasetIE
73cf76a9 111from .joj import JojIE
24e966e8 112from .megaphone import MegaphoneIE
41918eaa 113from .vzaar import VzaarIE
26bae2d9 114from .channel9 import Channel9IE
0987f2dd 115from .vshare import VShareIE
2ca7ed41 116from .mediasite import MediasiteIE
7d540621 117from .springboardplatform import SpringboardPlatformIE
4259402c 118from .ted import TedEmbedIE
4c780fbd 119from .yapfiles import YapFilesIE
86c8cfc5 120from .vice import ViceIE
178ee883 121from .xfileshare import XFileShareIE
660a230b 122from .cloudflarestream import CloudflareStreamIE
6bd499e8 123from .peertube import PeerTubeIE
5ee7ae5c 124from .teachable import TeachableIE
aee36ca8 125from .indavideo import IndavideoEmbedIE
cfd7f2a6 126from .apa import APAIE
f51f526b 127from .foxnews import FoxNewsIE
9d1b2138 128from .viqeo import ViqeoIE
57c68ec4 129from .expressen import ExpressenIE
83852e57 130from .zype import ZypeIE
416c3ca7 131from .odnoklassniki import OdnoklassnikiIE
b73612a2 132from .vk import VKIE
55adb63e 133from .kinja import KinjaEmbedIE
feee67ae 134from .gedidigital import GediDigitalIE
a85e131b 135from .rcs import RCSEmbedsIE
097f1663 136from .bitchute import BitChuteIE
62852977 137from .rumble import RumbleEmbedIE
29f7c58a 138from .arcpublishing import ArcPublishingIE
2181983a 139from .medialaan import MedialaanIE
bc2ca1bb 140from .simplecast import SimplecastIE
e4edeb62 141from .wimtv import WimTVIE
1a20d295 142from .tvopengr import TVOpenGrEmbedIE
56bb56f3 143from .tvp import TVPEmbedIE
764f5de2 144from .blogger import BloggerIE
9c634ef8 145from .mainstreaming import MainStreamingIE
9f517bb1 146from .gfycat import GfycatIE
9b122384 147
0838239e 148
9b122384 149class GenericIE(InfoExtractor):
79649588 150 IE_DESC = 'Generic downloader that works on some sites'
9b122384 151 _VALID_URL = r'.*'
79649588 152 IE_NAME = 'generic'
cfe50f04 153 _TESTS = [
c5fa81fe
S
154 # Direct link to a video
155 {
156 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
157 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
158 'info_dict': {
159 'id': 'trailer',
160 'ext': 'mp4',
161 'title': 'trailer',
162 'upload_date': '20100513',
163 }
164 },
c5138a7c 165 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
166 {
167 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
168 'md5': '128c42e68b13950268b648275386fc74',
169 'info_dict': {
170 'id': 'FictionJunction-Parallel_Hearts',
171 'ext': 'flac',
172 'title': 'FictionJunction-Parallel_Hearts',
173 'upload_date': '20140522',
174 },
175 'expected_warnings': [
176 'URL could be a direct video link, returning it as such.'
39efc6e3
YCH
177 ],
178 'skip': 'URL invalid',
c5fa81fe
S
179 },
180 # Direct download with broken HEAD
181 {
182 'url': 'http://ai-radio.org:8000/radio.opus',
183 'info_dict': {
184 'id': 'radio',
185 'ext': 'opus',
186 'title': 'radio',
187 },
188 'params': {
189 'skip_download': True, # infinite live stream
190 },
191 'expected_warnings': [
ef0e4e7b
YCH
192 r'501.*Not Implemented',
193 r'400.*Bad Request',
c5fa81fe
S
194 ],
195 },
196 # Direct link with incorrect MIME type
197 {
198 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
199 'md5': '4ccbebe5f36706d85221f204d7eb5913',
200 'info_dict': {
201 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
202 'id': '5_Lennart_Poettering_-_Systemd',
203 'ext': 'webm',
204 'title': '5_Lennart_Poettering_-_Systemd',
205 'upload_date': '20141120',
206 },
207 'expected_warnings': [
208 'URL could be a direct video link, returning it as such.'
209 ]
210 },
211 # RSS feed
212 {
213 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
214 'info_dict': {
215 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
216 'title': 'Zero Punctuation',
217 'description': 're:.*groundbreaking video review series.*'
218 },
219 'playlist_mincount': 11,
220 },
221 # RSS feed with enclosure
222 {
223 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
224 'info_dict': {
29f7c58a 225 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
226 'title': 'MSNBC Rachel Maddow (video)',
227 'description': 're:.*her unique approach to storytelling.*',
228 },
229 'playlist': [{
230 'info_dict': {
231 'ext': 'mov',
232 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
233 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
234 'description': 're:.*her unique approach to storytelling.*',
235 'upload_date': '20201204',
236 },
237 }],
238 },
239 # RSS feed with item with description and thumbnails
240 {
241 'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
242 'info_dict': {
243 'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
244 'title': 're:.*100% Hydrogen.*',
245 'description': 're:.*In this episode.*',
246 },
247 'playlist': [{
248 'info_dict': {
249 'ext': 'm4a',
250 'id': 'c1c879525ce2cb640b344507e682c36d',
251 'title': 're:Hydrogen!',
252 'description': 're:.*In this episode we are going.*',
253 'timestamp': 1567977776,
254 'upload_date': '20190908',
255 'duration': 459,
256 'thumbnail': r're:^https?://.*\.jpg$',
257 'episode_number': 1,
258 'season_number': 1,
259 'age_limit': 0,
260 },
261 }],
262 'params': {
263 'skip_download': True,
264 },
c5fa81fe 265 },
01aec848
BG
266 # RSS feed with enclosures and unsupported link URLs
267 {
268 'url': 'http://www.hellointernet.fm/podcast?format=rss',
269 'info_dict': {
270 'id': 'http://www.hellointernet.fm/podcast?format=rss',
271 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
272 'title': 'Hello Internet',
273 },
274 'playlist_mincount': 100,
275 },
8765222d
S
276 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
277 {
278 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
279 'info_dict': {
280 'id': 'smil',
281 'ext': 'mp4',
282 'title': 'Automatics, robotics and biocybernetics',
283 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
e327b736 284 'upload_date': '20130627',
8765222d
S
285 'formats': 'mincount:16',
286 'subtitles': 'mincount:1',
287 },
288 'params': {
289 'force_generic_extractor': True,
290 'skip_download': True,
291 },
292 },
293 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
294 {
295 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
296 'info_dict': {
297 'id': 'hds',
298 'ext': 'flv',
299 'title': 'hds',
300 'formats': 'mincount:1',
301 },
302 'params': {
303 'skip_download': True,
304 },
305 },
306 # SMIL from https://www.restudy.dk/video/play/id/1637
307 {
308 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
309 'info_dict': {
310 'id': 'video_1637',
311 'ext': 'flv',
312 'title': 'video_1637',
313 'formats': 'mincount:3',
314 },
315 'params': {
316 'skip_download': True,
317 },
318 },
319 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
320 {
321 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
322 'info_dict': {
323 'id': 'smil-service',
324 'ext': 'flv',
325 'title': 'smil-service',
326 'formats': 'mincount:1',
327 },
328 'params': {
329 'skip_download': True,
330 },
331 },
332 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
333 {
334 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
335 'info_dict': {
336 'id': '4719370',
337 'ext': 'mp4',
338 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
339 'formats': 'mincount:3',
340 },
341 'params': {
342 'skip_download': True,
343 },
344 },
1de5cd3b
S
345 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
346 {
347 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
348 'info_dict': {
349 'id': 'mZlp2ctYIUEB',
350 'ext': 'mp4',
351 'title': 'Tikibad ontruimd wegens brand',
352 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
ec85ded8 353 'thumbnail': r're:^https?://.*\.jpg$',
1de5cd3b
S
354 'duration': 33,
355 },
356 'params': {
357 'skip_download': True,
358 },
359 },
9d939cec
S
360 # MPD from http://dash-mse-test.appspot.com/media.html
361 {
362 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
363 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
364 'info_dict': {
365 'id': 'car-20120827-manifest',
366 'ext': 'mp4',
367 'title': 'car-20120827-manifest',
368 'formats': 'mincount:9',
0738187f 369 'upload_date': '20130904',
9d939cec 370 },
9d939cec 371 },
20938f76
S
372 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
373 {
374 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
375 'info_dict': {
376 'id': 'content',
377 'ext': 'mp4',
378 'title': 'content',
379 'formats': 'mincount:8',
380 },
381 'params': {
382 # m3u8 downloads
383 'skip_download': True,
39efc6e3
YCH
384 },
385 'skip': 'video gone',
20938f76 386 },
edd9b71c
S
387 # m3u8 served with Content-Type: text/plain
388 {
389 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
390 'info_dict': {
391 'id': 'index',
392 'ext': 'mp4',
393 'title': 'index',
394 'upload_date': '20140720',
395 'formats': 'mincount:11',
396 },
397 'params': {
398 # m3u8 downloads
399 'skip_download': True,
39efc6e3
YCH
400 },
401 'skip': 'video gone',
edd9b71c 402 },
c5fa81fe
S
403 # google redirect
404 {
405 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
406 'info_dict': {
407 'id': 'cmQHVoWB5FY',
408 'ext': 'mp4',
409 'upload_date': '20130224',
410 'uploader_id': 'TheVerge',
ec85ded8 411 'description': r're:^Chris Ziegler takes a look at the\.*',
c5fa81fe
S
412 'uploader': 'The Verge',
413 'title': 'First Firefox OS phones side-by-side',
414 },
415 'params': {
416 'skip_download': False,
417 }
418 },
6c91a5a7
S
419 {
420 # redirect in Refresh HTTP header
421 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
422 'info_dict': {
423 'id': 'pO8h3EaFRdo',
424 'ext': 'mp4',
425 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
426 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
427 'upload_date': '20150917',
428 'uploader_id': 'brtvofficial',
429 'uploader': 'Boiler Room',
430 },
431 'params': {
432 'skip_download': False,
433 },
434 },
cfe50f04 435 {
79649588 436 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 437 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 438 'info_dict': {
d360a146
S
439 'id': '13601338388002',
440 'ext': 'mp4',
79649588
PH
441 'uploader': 'www.hodiho.fr',
442 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
443 }
444 },
c19f7764
JMF
445 # bandcamp page with custom domain
446 {
79649588
PH
447 'add_ie': ['Bandcamp'],
448 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 449 'info_dict': {
fd50bf62
S
450 'id': '3235767654',
451 'ext': 'mp3',
79649588
PH
452 'title': 'The Pony Mash',
453 'uploader': 'M_Pallante',
c19f7764 454 },
79649588 455 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 456 },
eeb165e6 457 {
53a664ed
S
458 # embedded brightcove video
459 # it also tests brightcove videos that need to set the 'Referer'
460 # in the http requests
3b7d9aa4 461 'add_ie': ['BrightcoveLegacy'],
79649588
PH
462 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
463 'info_dict': {
464 'id': '2765128793001',
465 'ext': 'mp4',
466 'title': 'Le cours de bourse : l’analyse technique',
467 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
468 'uploader': 'BFM BUSINESS',
eeb165e6 469 },
79649588
PH
470 'params': {
471 'skip_download': True,
eeb165e6
JMF
472 },
473 },
53a664ed
S
474 {
475 # embedded with itemprop embedURL and video id spelled as `idVideo`
476 'add_id': ['BrightcoveLegacy'],
477 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
478 'info_dict': {
479 'id': '5255628253001',
480 'ext': 'mp4',
481 'title': 'md5:37c519b1128915607601e75a87995fc0',
482 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
483 'uploader': 'BFM BUSINESS',
484 'uploader_id': '876450612001',
485 'timestamp': 1482255315,
486 'upload_date': '20161220',
487 },
488 'params': {
489 'skip_download': True,
490 },
491 },
17ab4d3b 492 {
067aa17e 493 # https://github.com/ytdl-org/youtube-dl/issues/2253
17ab4d3b 494 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
495 'md5': '0ba9446db037002366bab3b3eb30c88c',
496 'info_dict': {
fd50bf62
S
497 'id': '3101154703001',
498 'ext': 'mp4',
17ab4d3b
PH
499 'title': 'Still no power',
500 'uploader': 'thestar.com',
501 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
502 },
3b7d9aa4 503 'add_ie': ['BrightcoveLegacy'],
39efc6e3 504 'skip': 'video gone',
17ab4d3b 505 },
0479c625
S
506 {
507 'url': 'http://www.championat.com/video/football/v/87/87499.html',
508 'md5': 'fb973ecf6e4a78a67453647444222983',
509 'info_dict': {
510 'id': '3414141473001',
511 'ext': 'mp4',
512 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
513 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
514 'uploader': 'Championat',
515 },
516 },
bdf97017 517 {
067aa17e 518 # https://github.com/ytdl-org/youtube-dl/issues/3541
3b7d9aa4 519 'add_ie': ['BrightcoveLegacy'],
bdf97017
NJ
520 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
521 'info_dict': {
522 'id': '3866516442001',
37aab278 523 'ext': 'mp4',
bdf97017
NJ
524 'title': 'Leer mij vrouwen kennen: Aflevering 1',
525 'description': 'Leer mij vrouwen kennen: Aflevering 1',
526 'uploader': 'SBS Broadcasting',
527 },
37aab278 528 'skip': 'Restricted to Netherlands',
bdf97017 529 'params': {
37aab278 530 'skip_download': True, # m3u8 download
bdf97017
NJ
531 },
532 },
06d0ad9a
YCH
533 {
534 # Brightcove video in <iframe>
535 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
536 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
537 'info_dict': {
538 'id': '5360463607001',
539 'ext': 'mp4',
540 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
541 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
542 'uploader': 'United Nations',
543 'uploader_id': '1362235914001',
544 'timestamp': 1489593889,
545 'upload_date': '20170315',
546 },
547 'add_ie': ['BrightcoveLegacy'],
548 },
16e2c8f7
YCH
549 {
550 # Brightcove with alternative playerID key
551 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
552 'info_dict': {
553 'id': 'nmeth.2062_SV1',
554 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
555 },
556 'playlist': [{
557 'info_dict': {
558 'id': '2228375078001',
559 'ext': 'mp4',
560 'title': 'nmeth.2062-sv1',
561 'description': 'nmeth.2062-sv1',
562 'timestamp': 1363357591,
563 'upload_date': '20130315',
564 'uploader': 'Nature Publishing Group',
565 'uploader_id': '1964492299001',
566 },
567 }],
568 },
40158f55
JH
569 {
570 # Brightcove with UUID in videoPlayer
571 'url': 'http://www8.hp.com/cn/zh/home.html',
572 'info_dict': {
573 'id': '5255815316001',
574 'ext': 'mp4',
575 'title': 'Sprocket Video - China',
576 'description': 'Sprocket Video - China',
577 'uploader': 'HP-Video Gallery',
578 'timestamp': 1482263210,
579 'upload_date': '20161220',
580 'uploader_id': '1107601872001',
581 },
582 'params': {
583 'skip_download': True, # m3u8 download
584 },
585 'skip': 'video rotates...weekly?',
586 },
587 {
588 # Brightcove:new type [2].
589 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
590 'md5': '2b35148fcf48da41c9fb4591650784f3',
591 'info_dict': {
592 'id': '5348741021001',
593 'ext': 'mp4',
594 'upload_date': '20170306',
595 'uploader_id': '4191638492001',
596 'timestamp': 1488769918,
597 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
598
599 },
600 },
601 {
602 # Alternative brightcove <video> attributes
603 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
604 'info_dict': {
605 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
606 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
607 },
608 'playlist': [{
609 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
610 'info_dict': {
611 'id': '5311302538001',
612 'ext': 'mp4',
613 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
614 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
615 'timestamp': 1486321708,
616 'upload_date': '20170205',
617 'uploader_id': '800000640001',
618 },
619 'only_matching': True,
620 }],
621 },
b68a812e
S
622 {
623 # Brightcove with UUID in videoPlayer
624 'url': 'http://www8.hp.com/cn/zh/home.html',
625 'info_dict': {
626 'id': '5255815316001',
627 'ext': 'mp4',
628 'title': 'Sprocket Video - China',
629 'description': 'Sprocket Video - China',
630 'uploader': 'HP-Video Gallery',
631 'timestamp': 1482263210,
632 'upload_date': '20161220',
633 'uploader_id': '1107601872001',
634 },
635 'params': {
636 'skip_download': True, # m3u8 download
637 },
638 },
c0d0b01f
JMF
639 # ooyala video
640 {
79649588 641 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 642 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
643 'info_dict': {
644 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
645 'ext': 'mp4',
3486df38 646 'title': '2cc213299525360.mov', # that's what we get
53e06b25 647 'duration': 238.231,
c0d0b01f 648 },
87830900 649 'add_ie': ['Ooyala'],
c0d0b01f 650 },
bf94d763
S
651 {
652 # ooyala video embedded with http://player.ooyala.com/iframe.js
653 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
654 'info_dict': {
655 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
656 'ext': 'mp4',
657 'title': '"Steve Jobs: Man in the Machine" trailer',
658 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
53e06b25 659 'duration': 135.427,
bf94d763
S
660 },
661 'params': {
662 'skip_download': True,
663 },
39efc6e3 664 'skip': 'movie expired',
bf94d763 665 },
198d4cb4
GR
666 # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
667 {
668 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
669 'info_dict': {
670 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
671 'ext': 'mp4',
672 'title': 'Steampunk Fest Comes to Honesdale',
673 'duration': 43.276,
674 },
675 'params': {
676 'skip_download': True,
677 }
678 },
1b86cc41 679 # embed.ly video
680 {
681 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
682 'info_dict': {
683 'id': '9ODmcdjQcHQ',
684 'ext': 'mp4',
0a5bce56
PH
685 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
686 'upload_date': '20140225',
687 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
688 'uploader': 'Tested',
689 'uploader_id': 'testedcom',
1b86cc41 690 },
691 # No need to test YoutubeIE here
692 'params': {
693 'skip_download': True,
694 },
695 },
60cc4dc4
PH
696 # funnyordie embed
697 {
698 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
699 'info_dict': {
700 'id': '18e820ec3f',
701 'ext': 'mp4',
702 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
703 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 704 },
39efc6e3
YCH
705 # HEAD requests lead to endless 301, while GET is OK
706 'expected_warnings': ['301'],
60cc4dc4 707 },
93d020dd
S
708 # RUTV embed
709 {
710 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
711 'info_dict': {
712 'id': '776940',
713 'ext': 'mp4',
714 'title': 'Охотское море стало целиком российским',
715 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
716 },
717 'params': {
718 # m3u8 download
719 'skip_download': True,
720 },
aab74fa1 721 },
f37bdbe5
S
722 # TVC embed
723 {
724 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
725 'info_dict': {
726 'id': '55304',
727 'ext': 'mp4',
728 'title': 'Дошкольное воспитание',
729 },
730 },
b827a601
S
731 # SportBox embed
732 {
733 'url': 'http://www.vestifinance.ru/articles/25753',
734 'info_dict': {
735 'id': '25753',
05d1e7aa 736 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
b827a601
S
737 },
738 'playlist': [{
739 'info_dict': {
740 'id': '370908',
741 'title': 'Госзаказ. День 3',
742 'ext': 'mp4',
743 }
744 }, {
745 'info_dict': {
746 'id': '370905',
747 'title': 'Госзаказ. День 2',
748 'ext': 'mp4',
749 }
750 }, {
751 'info_dict': {
752 'id': '370902',
753 'title': 'Госзаказ. День 1',
754 'ext': 'mp4',
755 }
756 }],
757 'params': {
758 # m3u8 download
759 'skip_download': True,
760 },
761 },
bf20b9c5
S
762 # Myvi.ru embed
763 {
764 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
765 'info_dict': {
766 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
767 'ext': 'mp4',
768 'title': 'Ужастики, русский трейлер (2015)',
ec85ded8 769 'thumbnail': r're:^https?://.*\.jpg$',
bf20b9c5
S
770 'duration': 153,
771 }
772 },
c76799c5
S
773 # XHamster embed
774 {
775 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
776 'info_dict': {
777 'id': 'showthread',
778 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
779 },
780 'playlist_mincount': 7,
39efc6e3
YCH
781 # This forum does not allow <iframe> syntaxes anymore
782 # Now HTML tags are displayed as-is
783 'skip': 'No videos on this page',
c76799c5 784 },
aab74fa1
PH
785 # Embedded TED video
786 {
787 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 788 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 789 'info_dict': {
a8eb5a8e 790 'id': '1969',
aab74fa1 791 'ext': 'mp4',
a8eb5a8e
PH
792 'title': 'Hidden miracles of the natural world',
793 'uploader': 'Louie Schwartzberg',
794 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 795 }
60cc4dc4 796 },
d95e35d6
S
797 # nowvideo embed hidden behind percent encoding
798 {
799 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
800 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
801 'info_dict': {
802 'id': '06e53103ca9aa',
803 'ext': 'flv',
804 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
805 'description': 'No description',
806 },
0f2a2ba1 807 },
893f8832
PH
808 # arte embed
809 {
810 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
811 'md5': '7653032cbb25bf6c80d80f217055fa43',
812 'info_dict': {
813 'id': '048195-004_PLUS7-F',
814 'ext': 'flv',
815 'title': 'X:enius',
816 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
817 'upload_date': '20140320',
818 },
819 'params': {
820 'skip_download': 'Requires rtmpdump'
39efc6e3
YCH
821 },
822 'skip': 'video gone',
893f8832 823 },
cbd55ade
S
824 # francetv embed
825 {
826 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
827 'info_dict': {
828 'id': 'EV_30231',
829 'ext': 'mp4',
830 'title': 'Alcaline, le concert avec Calogero',
831 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
832 'upload_date': '20150226',
833 'timestamp': 1424989860,
834 'duration': 5400,
835 },
836 'params': {
837 # m3u8 downloads
838 'skip_download': True,
839 },
840 'expected_warnings': [
841 'Forbidden'
842 ]
843 },
fa35cdad
PH
844 # Condé Nast embed
845 {
846 'url': 'http://www.wired.com/2014/04/honda-asimo/',
847 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
848 'info_dict': {
849 'id': '53501be369702d3275860000',
850 'ext': 'mp4',
851 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
852 }
ebd3c7b3
PH
853 },
854 # Dailymotion embed
855 {
856 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
857 'md5': '441aeeb82eb72c422c7f14ec533999cd',
858 'info_dict': {
859 'id': 'k2mm4bCdJ6CQ2i7c8o2',
860 'ext': 'mp4',
861 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
0738187f 862 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
ebd3c7b3 863 'uploader': 'Spi0n',
0738187f
YCH
864 'uploader_id': 'xgditw',
865 'upload_date': '20140425',
866 'timestamp': 1398441542,
ebd3c7b3
PH
867 },
868 'add_ie': ['Dailymotion'],
2b88feed 869 },
71a1db89
S
870 # DailyMail embed
871 {
872 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
873 'info_dict': {
874 'id': '1495629',
875 'ext': 'mp4',
876 'title': 'Care worker punches elderly dementia patient in head 11 times',
877 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
878 },
879 'add_ie': ['DailyMail'],
880 'params': {
881 'skip_download': True,
882 },
883 },
2b88feed
PH
884 # YouTube embed
885 {
886 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
887 'info_dict': {
888 'id': 'FXRb4ykk4S0',
889 'ext': 'mp4',
890 'title': 'The NBL Auction 2014',
891 'uploader': 'BADMINTON England',
892 'uploader_id': 'BADMINTONEvents',
893 'upload_date': '20140603',
894 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
895 },
896 'add_ie': ['Youtube'],
897 'params': {
898 'skip_download': True,
899 }
900 },
a0566bbf 901 # MTVServices embed
c5cd249e 902 {
1fa309da
YCH
903 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
904 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
c5cd249e 905 'info_dict': {
1fa309da 906 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
c5cd249e 907 'ext': 'mp4',
1fa309da
YCH
908 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
909 'description': 'Two valets share their love for movie star Liam Neesons.',
05d1e7aa
YCH
910 'timestamp': 1349922600,
911 'upload_date': '20121011',
c5cd249e
JMF
912 },
913 },
61013473 914 # YouTube embed via <data-embed-url="">
915 {
916 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 917 'info_dict': {
a8eb5a8e 918 'id': '4vAffPZIT44',
61013473 919 'ext': 'mp4',
a8eb5a8e 920 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
921 'uploader': 'Gameloft',
922 'uploader_id': 'gameloft',
a8eb5a8e
PH
923 'upload_date': '20140828',
924 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
925 },
926 'params': {
927 'skip_download': True,
61013473 928 }
c8e9a235 929 },
61568e50
JH
930 # YouTube <object> embed
931 {
932 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
933 'md5': '516718101ec834f74318df76259fb3cc',
934 'info_dict': {
935 'id': 'msN87y-iEx0',
936 'ext': 'webm',
937 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
938 'upload_date': '20080526',
939 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
940 'uploader': 'Christopher Sykes',
941 'uploader_id': 'ChristopherJSykes',
942 },
943 'add_ie': ['Youtube'],
944 },
c8e9a235
PH
945 # Camtasia studio
946 {
947 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
948 'playlist': [{
949 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
950 'info_dict': {
951 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
952 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
953 'ext': 'flv',
954 'duration': 2235.90,
955 }
956 }, {
957 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
958 'info_dict': {
959 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
960 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
961 'ext': 'flv',
962 'duration': 2235.93,
963 }
964 }],
965 'info_dict': {
966 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
967 }
4d805e06
PH
968 },
969 # Flowplayer
970 {
971 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
972 'md5': '9d65602bf31c6e20014319c7d07fba27',
973 'info_dict': {
974 'id': '5123ea6d5e5a7',
975 'ext': 'mp4',
976 'age_limit': 18,
977 'uploader': 'www.handjobhub.com',
d6d9186f 978 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 979 }
0990305d 980 },
22a6f150 981 # Multiple brightcove videos
067aa17e 982 # https://github.com/ytdl-org/youtube-dl/issues/2283
22a6f150
PH
983 {
984 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
985 'info_dict': {
986 'id': 'always-never',
987 'title': 'Always / Never - The New Yorker',
988 },
989 'playlist_count': 3,
990 'params': {
991 'extract_flat': False,
992 'skip_download': True,
993 }
1a94ff68
S
994 },
995 # MLB embed
996 {
997 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
998 'md5': '96f09a37e44da40dd083e12d9a683327',
999 'info_dict': {
1000 'id': '33322633',
1001 'ext': 'mp4',
1002 'title': 'Ump changes call to ball',
1003 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
1004 'duration': 48,
1005 'timestamp': 1401537900,
1006 'upload_date': '20140531',
ec85ded8 1007 'thumbnail': r're:^https?://.*\.jpg$',
1a94ff68
S
1008 },
1009 },
746c67d7
NJ
1010 # Wistia embed
1011 {
6c114b12
S
1012 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
1013 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
746c67d7 1014 'info_dict': {
6c114b12 1015 'id': '6e2wtrbdaf',
746c67d7 1016 'ext': 'mov',
6c114b12
S
1017 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
1018 'description': 'a Paywall Videos video from Remilon',
1019 'duration': 644.072,
1020 'uploader': 'study.com',
1021 'timestamp': 1459678540,
1022 'upload_date': '20160403',
1023 'filesize': 24687186,
746c67d7
NJ
1024 },
1025 },
52cffcb1 1026 {
1027 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
1028 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
1029 'info_dict': {
1030 'id': 'uxjb0lwrcz',
1031 'ext': 'mp4',
6c114b12 1032 'title': 'Conversation about Hexagonal Rails Part 1',
0738187f 1033 'description': 'a Martin Fowler video from ThoughtWorks',
52cffcb1 1034 'duration': 1715.0,
85d7b765 1035 'uploader': 'thoughtworks.wistia.com',
0738187f 1036 'timestamp': 1401832161,
6c114b12 1037 'upload_date': '20140603',
70b7e3fb 1038 },
52cffcb1 1039 },
7ded6545
S
1040 # Wistia standard embed (async)
1041 {
1042 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
1043 'info_dict': {
1044 'id': '807fafadvk',
1045 'ext': 'mp4',
1046 'title': 'Drip Brennan Dunn Workshop',
1047 'description': 'a JV Webinars video from getdrip-1',
1048 'duration': 4986.95,
7ded6545 1049 'timestamp': 1463607249,
6c114b12 1050 'upload_date': '20160518',
7ded6545
S
1051 },
1052 'params': {
1053 'skip_download': True,
1054 }
1055 },
ac645ac7
PH
1056 # Soundcloud embed
1057 {
1058 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
1059 'info_dict': {
1060 'id': '174391317',
1061 'ext': 'mp3',
1062 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
1063 'uploader': 'Sophos Security',
1064 'title': 'Chet Chat 171 - Oct 29, 2014',
1065 'upload_date': '20141029',
1066 }
af63fed7 1067 },
db19df6c
S
1068 # Soundcloud multiple embeds
1069 {
1070 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
1071 'info_dict': {
1072 'id': '52809',
1073 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
1074 },
1075 'playlist_mincount': 7,
1076 },
027e2312
S
1077 # TuneIn station embed
1078 {
1079 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
1080 'info_dict': {
1081 'id': '204146',
1082 'ext': 'mp3',
1083 'title': 'CNRV',
1084 'location': 'Paris, France',
1085 'is_live': True,
1086 },
1087 'params': {
1088 # Live stream
1089 'skip_download': True,
1090 },
1091 },
af63fed7
PH
1092 # Livestream embed
1093 {
1094 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
1095 'info_dict': {
1096 'id': '67864563',
1097 'ext': 'flv',
1098 'upload_date': '20141112',
1099 'title': 'Rosetta #CometLanding webcast HL 10',
1100 }
1101 },
78d3b3e2
YCH
1102 # Another Livestream embed, without 'new.' in URL
1103 {
1104 'url': 'https://www.freespeech.org/',
1105 'info_dict': {
1106 'id': '123537347',
1107 'ext': 'mp4',
1108 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
1109 },
1110 'params': {
1111 # Live stream
1112 'skip_download': True,
1113 },
1114 },
65f3a228
PH
1115 # LazyYT
1116 {
e8e4cc5a 1117 'url': 'https://skiplagged.com/',
65f3a228 1118 'info_dict': {
e8e4cc5a
JH
1119 'id': 'skiplagged',
1120 'title': 'Skiplagged: The smart way to find cheap flights',
65f3a228 1121 },
e8e4cc5a
JH
1122 'playlist_mincount': 1,
1123 'add_ie': ['Youtube'],
4e262a88 1124 },
42bdd9d0
PH
1125 # Cinchcast embed
1126 {
1127 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1128 'info_dict': {
1129 'id': '7141703',
1130 'ext': 'mp3',
1131 'upload_date': '20141126',
1132 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1133 }
1134 },
501f13fb
PH
1135 # Cinerama player
1136 {
1137 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1138 'info_dict': {
1139 'id': '730m_DandD_1901_512k',
1140 'ext': 'mp4',
1141 'uploader': 'www.abc.net.au',
1142 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1143 }
796df3c6
S
1144 },
1145 # embedded viddler video
1146 {
1147 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1148 'info_dict': {
1149 'id': '4d03aad9',
1150 'ext': 'mp4',
1151 'uploader': 'deadspin',
1152 'title': 'WALL-TO-GORTAT',
1153 'timestamp': 1422285291,
1154 'upload_date': '20150126',
1155 },
1156 'add_ie': ['Viddler'],
a0f71985 1157 },
2051acde
S
1158 # Libsyn embed
1159 {
1160 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1161 'info_dict': {
1162 'id': '3377616',
1163 'ext': 'mp3',
1164 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1165 'description': 'md5:601cb790edd05908957dae8aaa866465',
1166 'upload_date': '20150220',
1167 },
326fa4e6 1168 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
2051acde 1169 },
a0f71985
PH
1170 # jwplayer YouTube
1171 {
1172 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1173 'info_dict': {
1174 'id': 'Mrj4DVp2zeA',
1175 'ext': 'mp4',
f37e3f99 1176 'upload_date': '20150212',
a0f71985 1177 'uploader': 'The National Archives UK',
2637fadc 1178 'description': 'md5:8078af856dca76edc42910b61273dbbf',
a0f71985
PH
1179 'uploader_id': 'NationalArchives08',
1180 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1181 },
59b8ab58 1182 },
5620f840
S
1183 # jwplayer rtmp
1184 {
6899b1d9 1185 'url': 'http://www.suffolk.edu/sjc/live.php',
5620f840 1186 'info_dict': {
6899b1d9 1187 'id': 'live',
5620f840
S
1188 'ext': 'flv',
1189 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1190 'uploader': 'www.suffolk.edu',
1191 },
1192 'params': {
1193 'skip_download': True,
2637fadc 1194 },
6899b1d9 1195 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
5620f840 1196 },
5e7bbac3 1197 # jwplayer with only the json URL
1198 {
1199 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454',
1200 'info_dict': {
1201 'id': 'TljWkvWH',
1202 'ext': 'mp4',
1203 'upload_date': '20180306',
1204 'title': 'md5:91eb1862f6526415214f62c00b453936',
1205 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa',
1206 'timestamp': 1520367225,
1207 },
1208 'params': {
1209 'skip_download': True,
1210 },
1211 },
a4a554a7
YCH
1212 # Complex jwplayer
1213 {
1214 'url': 'http://www.indiedb.com/games/king-machine/videos',
1215 'info_dict': {
1216 'id': 'videos',
1217 'ext': 'mp4',
1218 'title': 'king machine trailer 1',
2637fadc 1219 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
a4a554a7
YCH
1220 'thumbnail': r're:^https?://.*\.jpg$',
1221 },
1222 },
03486dbb
RU
1223 {
1224 # JWPlayer config passed as variable
1225 'url': 'http://www.txxx.com/videos/3326530/ariele/',
1226 'info_dict': {
1227 'id': '3326530_hq',
1228 'ext': 'mp4',
1229 'title': 'ARIELE | Tube Cup',
1230 'uploader': 'www.txxx.com',
1231 'age_limit': 18,
1232 },
1233 'params': {
1234 'skip_download': True,
1235 }
1236 },
939be9ad
JH
1237 {
1238 # JWPlatform iframe
2fac2e91 1239 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
939be9ad 1240 'info_dict': {
2fac2e91 1241 'id': 'AG26UQXM',
939be9ad 1242 'ext': 'mp4',
2fac2e91
AG
1243 'upload_date': '20160719',
1244 'timestamp': 468923808,
1245 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
939be9ad 1246 },
805f5bf7 1247 'add_ie': [JWPlatformIE.ie_key()],
939be9ad 1248 },
63d990d2 1249 {
c5b7014a 1250 # Video.js embed, multiple formats
63d990d2
S
1251 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
1252 'info_dict': {
1253 'id': 'yygqldloqIk',
1254 'ext': 'mp4',
1255 'title': 'SolidWorks. Урок 6 Настройка чертежа',
1256 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
1257 'upload_date': '20130314',
1258 'uploader': 'PROстое3D',
1259 'uploader_id': 'PROstoe3D',
1260 },
1261 'params': {
1262 'skip_download': True,
1263 },
1264 },
c5b7014a
S
1265 {
1266 # Video.js embed, single format
1267 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
1268 'info_dict': {
1269 'id': 'watch',
1270 'ext': 'mp4',
1271 'title': 'Step 1 - Good Foundation',
1272 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
1273 },
1274 'params': {
1275 'skip_download': True,
1276 },
1277 },
59b8ab58
PH
1278 # rtl.nl embed
1279 {
1280 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1281 'playlist_mincount': 5,
1282 'info_dict': {
1283 'id': 'aanslagen-kopenhagen',
2637fadc 1284 'title': 'Aanslagen Kopenhagen',
59b8ab58 1285 }
255fca5e
S
1286 },
1287 # Zapiks embed
1288 {
1289 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1290 'info_dict': {
1291 'id': '118046',
1292 'ext': 'mp4',
1293 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1294 }
1295 },
66e568de
S
1296 # Kaltura embed (different embed code)
1297 {
1298 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1299 'info_dict': {
1300 'id': '1_a52wc67y',
1301 'ext': 'flv',
1302 'upload_date': '20150127',
1303 'uploader_id': 'PremierMedia',
1304 'timestamp': int,
1305 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1306 },
1307 },
87703231
YCH
1308 # Kaltura embed with single quotes
1309 {
1310 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1311 'info_dict': {
1312 'id': '0_izeg5utt',
1313 'ext': 'mp4',
1314 'title': '35871',
1315 'timestamp': 1355743100,
1316 'upload_date': '20121217',
e30991f9 1317 'uploader_id': 'cplapp@learn360.com',
87703231
YCH
1318 },
1319 'add_ie': ['Kaltura'],
1320 },
427cd050
S
1321 {
1322 # Kaltura embedded via quoted entry_id
1323 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1324 'info_dict': {
1325 'id': '0_utuok90b',
1326 'ext': 'mp4',
1327 'title': '06_matthew_brender_raj_dutt',
1328 'timestamp': 1466638791,
1329 'upload_date': '20160622',
1330 },
1331 'add_ie': ['Kaltura'],
1332 'expected_warnings': [
1333 'Could not send HEAD request'
1334 ],
1335 'params': {
1336 'skip_download': True,
1337 }
1338 },
8ab7e6c4
YCH
1339 {
1340 # Kaltura embedded, some fileExt broken (#11480)
1341 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1342 'info_dict': {
1343 'id': '1_sgtvehim',
1344 'ext': 'mp4',
1345 'title': 'Our "Standard Models" of particle physics and cosmology',
1346 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1347 'timestamp': 1321158993,
1348 'upload_date': '20111113',
1349 'uploader_id': 'kps1',
1350 },
1351 'add_ie': ['Kaltura'],
1352 },
a01825a5
JH
1353 {
1354 # Kaltura iframe embed
1355 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1356 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1357 'info_dict': {
1358 'id': '0_f2cfbpwy',
1359 'ext': 'mp4',
1360 'title': 'I. M. Pei: A Centennial Celebration',
1361 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1362 'upload_date': '20170403',
1363 'uploader_id': 'batchUser',
1364 'timestamp': 1491232186,
1365 },
1366 'add_ie': ['Kaltura'],
1367 },
c21692fa
S
1368 {
1369 # Kaltura iframe embed, more sophisticated
1370 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
1371 'info_dict': {
1372 'id': '1_9gzouybz',
1373 'ext': 'mp4',
1374 'title': 'lecture-05sep2017',
1375 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
1376 'upload_date': '20170913',
1377 'uploader_id': 'eps2',
1378 'timestamp': 1505340777,
1379 },
1380 'params': {
1381 'skip_download': True,
1382 },
1383 'add_ie': ['Kaltura'],
1384 },
e30991f9
S
1385 {
1386 # meta twitter:player
1387 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
1388 'info_dict': {
1389 'id': '0_01b42zps',
1390 'ext': 'mp4',
1391 'title': 'Main Twerk (Video)',
1392 'upload_date': '20171208',
1393 'uploader_id': 'sebastian.salinas@thechive.com',
1394 'timestamp': 1512713057,
1395 },
1396 'params': {
1397 'skip_download': True,
1398 },
1399 'add_ie': ['Kaltura'],
1400 },
250b042c
S
1401 # referrer protected EaglePlatform embed
1402 {
1403 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
1404 'info_dict': {
1405 'id': '582306',
1406 'ext': 'mp4',
1407 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1408 'thumbnail': r're:^https?://.*\.jpg$',
1409 'duration': 3382,
1410 'view_count': int,
1411 },
1412 'params': {
1413 'skip_download': True,
1414 },
135c9c42 1415 },
665e9452 1416 # ClipYou (EaglePlatform) embed (custom URL)
d47ae7f6
S
1417 {
1418 'url': 'http://muz-tv.ru/play/7129/',
4645432d 1419 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
d47ae7f6
S
1420 'info_dict': {
1421 'id': '12820',
1422 'ext': 'mp4',
1423 'title': "'O Sole Mio",
ec85ded8 1424 'thumbnail': r're:^https?://.*\.jpg$',
d47ae7f6
S
1425 'duration': 216,
1426 'view_count': int,
1427 },
250b042c
S
1428 'params': {
1429 'skip_download': True,
1430 },
2637fadc 1431 'skip': 'This video is unavailable.',
d47ae7f6 1432 },
f8388757
S
1433 # Pladform embed
1434 {
1435 'url': 'http://muz-tv.ru/kinozal/view/7400/',
1436 'info_dict': {
1437 'id': '100183293',
1438 'ext': 'mp4',
62259846 1439 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757 1440 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
ec85ded8 1441 'thumbnail': r're:^https?://.*\.jpg$',
f8388757
S
1442 'duration': 694,
1443 'age_limit': 0,
1444 },
2637fadc 1445 'skip': 'HTTP Error 404: Not Found',
f8388757 1446 },
c798f15b
S
1447 # Playwire embed
1448 {
1449 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1450 'info_dict': {
1451 'id': '3519514',
1452 'ext': 'mp4',
1453 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
ec85ded8 1454 'thumbnail': r're:^https?://.*\.png$',
c798f15b
S
1455 'duration': 45.115,
1456 },
1457 },
ad320e9b
NJ
1458 # 5min embed
1459 {
1460 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1461 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1462 'info_dict': {
1463 'id': '518726732',
1464 'ext': 'mp4',
1465 'title': 'Facebook Creates "On This Day" | Crunch Report',
2637fadc
RA
1466 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
1467 'timestamp': 1427237531,
1468 'uploader': 'Crunch Report',
1469 'upload_date': '20150324',
1470 },
1471 'params': {
1472 # m3u8 download
1473 'skip_download': True,
ad320e9b
NJ
1474 },
1475 },
a4257017
S
1476 # Crooks and Liars embed
1477 {
1478 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1479 'info_dict': {
1480 'id': '8RUoRhRi',
1481 'ext': 'mp4',
1482 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1483 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1484 'timestamp': 1428207000,
1485 'upload_date': '20150405',
1486 'uploader': 'Heather',
1487 },
1488 },
1489 # Crooks and Liars external embed
1490 {
1491 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1492 'info_dict': {
1493 'id': 'MTE3MjUtMzQ2MzA',
1494 'ext': 'mp4',
1495 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1496 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1497 'timestamp': 1265032391,
1498 'upload_date': '20100201',
1499 'uploader': 'Heather',
1500 },
1501 },
facecb84 1502 # NBC Sports vplayer embed
a2edf2e7 1503 {
facecb84 1504 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 1505 'info_dict': {
facecb84
S
1506 'id': 'ln7x1qSThw4k',
1507 'ext': 'flv',
1508 'title': "PFT Live: New leader in the 'new-look' defense",
1509 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
0738187f
YCH
1510 'uploader': 'NBCU-SPORTS',
1511 'upload_date': '20140107',
1512 'timestamp': 1389118457,
a2edf2e7 1513 },
2637fadc 1514 'skip': 'Invalid Page URL',
418c5cc3 1515 },
de3eb07e
YCH
1516 # NBC News embed
1517 {
1518 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1519 'md5': '1aa589c675898ae6d37a17913cf68d66',
1520 'info_dict': {
2637fadc 1521 'id': 'x_dtl_oa_LettermanliftPR_160608',
de3eb07e 1522 'ext': 'mp4',
2637fadc 1523 'title': 'David Letterman: A Preview',
de3eb07e 1524 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
2637fadc
RA
1525 'upload_date': '20160609',
1526 'timestamp': 1465431544,
1527 'uploader': 'NBCU-NEWS',
de3eb07e
YCH
1528 },
1529 },
418c5cc3
YCH
1530 # UDN embed
1531 {
811586eb 1532 'url': 'https://video.udn.com/news/300346',
01c58f84 1533 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 1534 'info_dict': {
01c58f84 1535 'id': '300346',
418c5cc3 1536 'ext': 'mp4',
01c58f84 1537 'title': '中一中男師變性 全校師生力挺',
ec85ded8 1538 'thumbnail': r're:^https?://.*\.jpg$',
811586eb
YCH
1539 },
1540 'params': {
1541 # m3u8 download
1542 'skip_download': True,
1543 },
2637fadc 1544 'expected_warnings': ['Failed to parse JSON Expecting value'],
edfcf7ab 1545 },
b26733ba
YCH
1546 # Brightcove URL in single quotes
1547 {
1548 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1549 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1550 'info_dict': {
1551 'id': '4255764656001',
1552 'ext': 'mp4',
1553 'title': 'SN Presents: Russell Martin, World Citizen',
1554 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1555 'uploader': 'Rogers Sportsnet',
0738187f
YCH
1556 'uploader_id': '1704050871',
1557 'upload_date': '20150525',
1558 'timestamp': 1432570283,
b26733ba 1559 },
756f574e 1560 },
55adb63e 1561 # Kinja embed
8084be78
S
1562 {
1563 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1564 'info_dict': {
55adb63e 1565 'id': '106351',
8084be78
S
1566 'ext': 'mp4',
1567 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
55adb63e 1568 'description': 'Migrated from OnionStudios',
ec85ded8 1569 'thumbnail': r're:^https?://.*\.jpe?g$',
55adb63e
RA
1570 'uploader': 'clickhole',
1571 'upload_date': '20150527',
1572 'timestamp': 1432744860,
8084be78
S
1573 }
1574 },
b8c1cc1a
S
1575 # SnagFilms embed
1576 {
1577 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1578 'info_dict': {
1579 'id': '74849a00-85a9-11e1-9660-123139220831',
1580 'ext': 'mp4',
1581 'title': '#whilewewatch',
1582 }
1583 },
a5158f38
YCH
1584 # AdobeTVVideo embed
1585 {
1586 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1587 'md5': '43662b577c018ad707a63766462b1e87',
1588 'info_dict': {
1589 'id': '2456',
1590 'ext': 'mp4',
1591 'title': 'New experience with Acrobat DC',
1592 'description': 'New experience with Acrobat DC',
1593 'duration': 248.667,
1594 },
1f812580 1595 },
ed126900 1596 # BrightcoveInPageEmbed embed
1597 {
1598 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1599 'info_dict': {
1600 'id': '4238694884001',
1601 'ext': 'flv',
1602 'title': 'Tabletop: Dread, Last Thoughts',
1603 'description': 'Tabletop: Dread, Last Thoughts',
1604 'duration': 51690,
1605 },
750b9ff0 1606 },
d10fe835
YCH
1607 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1608 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1609 {
1610 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1611 'info_dict': {
1612 'id': '4785848093001',
1613 'ext': 'mp4',
1614 'title': 'The Cardinal Pell Interview',
1615 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1616 'uploader': 'GlobeCast Australia - GlobeStream',
0738187f
YCH
1617 'uploader_id': '2733773828001',
1618 'upload_date': '20160304',
1619 'timestamp': 1457083087,
d10fe835
YCH
1620 },
1621 'params': {
1622 # m3u8 downloads
1623 'skip_download': True,
1624 },
1625 },
9edf47df
S
1626 {
1627 # Brightcove embed with whitespace around attribute names
1628 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1629 'info_dict': {
1630 'id': '3167554373001',
1631 'ext': 'mp4',
1632 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1633 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1634 'uploader_id': '1079349493',
1635 'upload_date': '20140207',
1636 'timestamp': 1391810548,
1637 },
1638 'params': {
1639 'skip_download': True,
1640 },
1641 },
134c207e
YCH
1642 # Another form of arte.tv embed
1643 {
1644 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1645 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1646 'info_dict': {
1647 'id': '030273-562_PLUS7-F',
1648 'ext': 'mp4',
1649 'title': 'ARTE Reportage - Nulle part, en France',
1650 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1651 'upload_date': '20160409',
1652 },
1653 },
4a120778
YCH
1654 # Duplicated embedded video URLs
1655 {
1656 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1657 'info_dict': {
1658 'id': '149298443_480_16c25b74_2',
1659 'ext': 'mp4',
1660 'title': 'vs. Blue Orange Spring Game',
1661 'uploader': 'www.hudl.com',
1662 },
1663 },
371ddb14
S
1664 # twitter:player:stream embed
1665 {
1666 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1667 'info_dict': {
1668 'id': 'master',
1669 'ext': 'mp4',
1670 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1671 'uploader': 'www.rtl.be',
1672 },
1673 'params': {
1674 # m3u8 downloads
1675 'skip_download': True,
1676 },
1677 },
32917907
RA
1678 # twitter:player embed
1679 {
1680 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1681 'md5': 'a3e0df96369831de324f0778e126653c',
1682 'info_dict': {
1683 'id': '4909620399001',
1684 'ext': 'mp4',
1685 'title': 'What Do Black Holes Sound Like?',
1686 'description': 'what do black holes sound like',
1687 'upload_date': '20160524',
1688 'uploader_id': '29913724001',
1689 'timestamp': 1464107587,
1690 'uploader': 'TheAtlantic',
1691 },
1692 'add_ie': ['BrightcoveLegacy'],
fd6ca382
YCH
1693 },
1694 # Facebook <iframe> embed
1695 {
1696 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
dbf0157a 1697 'md5': 'fbcde74f534176ecb015849146dd3aee',
fd6ca382
YCH
1698 'info_dict': {
1699 'id': '599637780109885',
1700 'ext': 'mp4',
1701 'title': 'Facebook video #599637780109885',
1702 },
1703 },
fd1c5fba
S
1704 # Facebook <iframe> embed, plugin video
1705 {
1706 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
1707 'info_dict': {
1708 'id': '1754168231264132',
1709 'ext': 'mp4',
1710 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
1711 'uploader': 'Tariq Ramadan (official)',
1712 'timestamp': 1496758379,
1713 'upload_date': '20170606',
1714 },
1715 'params': {
1716 'skip_download': True,
1717 },
1718 },
fd6ca382
YCH
1719 # Facebook API embed
1720 {
1721 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
dbf0157a 1722 'md5': 'a47372ee61b39a7b90287094d447d94e',
fd6ca382
YCH
1723 'info_dict': {
1724 'id': '10153467542406923',
1725 'ext': 'mp4',
1726 'title': 'Facebook video #10153467542406923',
1727 },
7deef1ba
YCH
1728 },
1729 # Wordpress "YouTube Video Importer" plugin
1730 {
1731 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
dbf0157a 1732 'md5': 'd16797741b560b485194eddda8121b48',
7deef1ba
YCH
1733 'info_dict': {
1734 'id': 'HNTXWDXV9Is',
1735 'ext': 'mp4',
1736 'title': 'Blue Devils Drumline Stanford lot 2016',
1737 'upload_date': '20160627',
1738 'uploader_id': 'GENOCIDE8GENERAL10',
1739 'uploader': 'cylus cyrus',
1740 },
1741 },
81953d1a
RA
1742 {
1743 # video stored on custom kaltura server
1744 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1745 'md5': '537617d06e64dfed891fa1593c4b30cc',
1746 'info_dict': {
1747 'id': '0_1iotm5bh',
1748 'ext': 'mp4',
1749 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1750 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1751 'uploader_id': 'videos.expansion@el-mundo.net',
1752 'upload_date': '20150429',
1753 'timestamp': 1430303472,
1754 },
1755 'add_ie': ['Kaltura'],
1756 },
562de77f
S
1757 {
1758 # multiple kaltura embeds, nsfw
1759 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
1760 'info_dict': {
1761 'id': 'kamila-avec-video-jaime-sadomie',
1762 'title': "Kamila avec vídeo “J'aime sadomie”",
1763 },
1764 'playlist_count': 8,
1765 },
c03adf90
YCH
1766 {
1767 # Non-standard Vimeo embed
1768 'url': 'https://openclassrooms.com/courses/understanding-the-web',
1769 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1770 'info_dict': {
1771 'id': '148867247',
1772 'ext': 'mp4',
1773 'title': 'Understanding the web - Teaser',
1774 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1775 'upload_date': '20151214',
1776 'uploader': 'OpenClassrooms',
1777 'uploader_id': 'openclassrooms',
1778 },
1779 'add_ie': ['Vimeo'],
1780 },
a5ff05df
S
1781 {
1782 # generic vimeo embed that requires original URL passed as Referer
1783 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1784 'only_matching': True,
1785 },
1979969f
S
1786 {
1787 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1788 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1789 'info_dict': {
1790 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1791 'ext': 'mp4',
1792 'title': 'Big Buck Bunny',
1793 'description': 'Royalty free test video',
1794 'timestamp': 1432816365,
1795 'upload_date': '20150528',
1796 'is_live': False,
1797 },
1798 'params': {
1799 'skip_download': True,
1800 },
1801 'add_ie': [ArkenaIE.ie_key()],
1802 },
2a1321a2
S
1803 {
1804 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1805 'info_dict': {
1806 'id': '1c7141f46c',
1807 'ext': 'mp4',
1808 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1809 },
1810 'params': {
1811 'skip_download': True,
1812 },
1813 'add_ie': [Vbox7IE.ie_key()],
1814 },
b0c8f2e9
DR
1815 {
1816 # DBTV embeds
1817 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
fd3ec986
S
1818 'info_dict': {
1819 'id': '43254897',
1820 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1821 },
b0c8f2e9
DR
1822 'playlist_mincount': 3,
1823 },
e186a9ec
S
1824 {
1825 # Videa embeds
1826 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1827 'info_dict': {
1828 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1829 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1830 },
1831 'playlist_mincount': 2,
1832 },
b687c85e
S
1833 {
1834 # 20 minuten embed
1835 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1836 'info_dict': {
1837 'id': '523629',
1838 'ext': 'mp4',
1839 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1840 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1841 },
1842 'params': {
1843 'skip_download': True,
1844 },
1845 'add_ie': [TwentyMinutenIE.ie_key()],
6ef3e65a
S
1846 },
1847 {
1848 # VideoPress embed
1849 'url': 'https://en.support.wordpress.com/videopress/',
1850 'info_dict': {
1851 'id': 'OcobLTqC',
1852 'ext': 'm4v',
1853 'title': 'IMG_5786',
1854 'timestamp': 1435711927,
1855 'upload_date': '20150701',
1856 },
1857 'params': {
1858 'skip_download': True,
1859 },
1860 'add_ie': [VideoPressIE.ie_key()],
fef51645 1861 },
eb3079b6
S
1862 {
1863 # Rutube embed
1864 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1865 'info_dict': {
1866 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1867 'ext': 'flv',
1868 'title': 'Магаззино: Казань 2',
1869 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1870 'uploader': 'Магаззино',
1871 'upload_date': '20170228',
1872 'uploader_id': '996642',
1873 },
1874 'params': {
1875 'skip_download': True,
1876 },
1877 'add_ie': [RutubeIE.ie_key()],
1878 },
71738b14 1879 {
fdf80059 1880 # glomex:embed
71738b14
ZM
1881 'url': 'https://www.skai.gr/news/world/iatrikos-syllogos-tourkias-to-turkovac-aplo-dialyma-erntogan-eiste-apateones-kai-pseytes',
1882 'info_dict': {
1883 'id': 'v-ch2nkhcirwc9-sf',
1884 'ext': 'mp4',
1885 'title': 'md5:786e1e24e06c55993cee965ef853a0c1',
1886 'description': 'md5:8b517a61d577efe7e36fde72fd535995',
1887 'timestamp': 1641885019,
1888 'upload_date': '20220111',
1889 'duration': 460000,
b143e83e 1890 'thumbnail': 'https://i3thumbs.glomex.com/dC1idjJwdndiMjRzeGwvMjAyMi8wMS8xMS8wNy8xMF8zNV82MWRkMmQ2YmU5ZTgyLmpwZw==/profile:player-960x540',
71738b14
ZM
1891 },
1892 },
b143e83e
ZM
1893 {
1894 # megatvcom:embed
1895 'url': 'https://www.in.gr/2021/12/18/greece/apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize/',
1896 'info_dict': {
1897 'id': 'apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize',
1898 'title': 'md5:5e569cf996ec111057c2764ec272848f',
1899 },
1900 'playlist': [{
1901 'md5': '1afa26064ff00ccb91617957dbc73dc1',
1902 'info_dict': {
1903 'ext': 'mp4',
1904 'id': '564916',
1905 'display_id': 'md5:6cdf22d3a2e7bacb274b7295089a1770',
1906 'title': 'md5:33b9dd39584685b62873043670eb52a6',
1907 'description': 'md5:c1db7310f390518ac36dd69d947ef1a1',
1908 'timestamp': 1639753145,
1909 'upload_date': '20211217',
1910 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/prezerakos-1024x597.jpg',
1911 },
1912 }, {
1913 'md5': '4a1c220695f1ef865a8b7966a53e2474',
1914 'info_dict': {
1915 'ext': 'mp4',
1916 'id': '564905',
1917 'display_id': 'md5:ead15695e485e649aed2b81ebd699b88',
1918 'title': 'md5:2b71fd54249a3ca34609fe39ae31c47b',
1919 'description': 'md5:c42e12f638d0a97d6de4508e2c4df982',
1920 'timestamp': 1639753047,
1921 'upload_date': '20211217',
1922 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
1923 },
1924 }]
1925 },
fef51645
YCH
1926 {
1927 # ThePlatform embedded with whitespaces in URLs
1928 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1929 'only_matching': True,
1930 },
97952bdb
JH
1931 {
1932 # Senate ISVP iframe https
1933 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1934 'md5': 'fb8c70b0b515e5037981a2492099aab8',
1935 'info_dict': {
1936 'id': 'govtaff020316',
1937 'ext': 'mp4',
1938 'title': 'Integrated Senate Video Player',
1939 },
1940 'add_ie': [SenateISVPIE.ie_key()],
1941 },
ab87c260
S
1942 {
1943 # Limelight embeds (1 channel embed + 4 media embeds)
1944 'url': 'http://www.sedona.com/FacilitatorTraining2017',
1945 'info_dict': {
1946 'id': 'FacilitatorTraining2017',
1947 'title': 'Facilitator Training 2017',
1948 },
1949 'playlist_mincount': 5,
1950 },
eb02940c
S
1951 {
1952 # Limelight embed (LimelightPlayerUtil.embed)
1953 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
1954 'info_dict': {
1955 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
1956 'ext': 'mp4',
1957 'title': '07448641',
1958 'timestamp': 1499890639,
1959 'upload_date': '20170712',
1960 },
1961 'params': {
1962 'skip_download': True,
1963 },
1964 'add_ie': ['LimelightMedia'],
1965 },
7986c3ab
S
1966 {
1967 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1968 'info_dict': {
1969 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1970 'title': 'Standoff with Walnut Creek murder suspect ends',
1971 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1972 },
1973 'playlist_mincount': 4,
1974 },
55719459
JH
1975 {
1976 # WashingtonPost embed
1977 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1978 'info_dict': {
1979 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1980 'ext': 'mp4',
1981 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1982 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1983 'timestamp': 1455216756,
1984 'uploader': 'The Washington Post',
1985 'upload_date': '20160211',
1986 },
1987 'add_ie': [WashingtonPostIE.ie_key()],
1988 },
2b8e6a68
S
1989 {
1990 # Mediaset embed
1991 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
1992 'info_dict': {
1993 'id': '720642',
1994 'ext': 'mp4',
1995 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
1996 },
1997 'params': {
1998 'skip_download': True,
1999 },
2000 'add_ie': [MediasetIE.ie_key()],
2001 },
73cf76a9
S
2002 {
2003 # JOJ.sk embeds
2004 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
2005 'info_dict': {
2006 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
2007 'title': 'Slovenskom sa prehnala vlna silných búrok',
2008 },
2009 'playlist_mincount': 5,
2010 'add_ie': [JojIE.ie_key()],
2011 },
4328ddf8
S
2012 {
2013 # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
2014 'url': 'https://tvrain.ru/amp/418921/',
2015 'md5': 'cc00413936695987e8de148b67d14f1d',
2016 'info_dict': {
2017 'id': '418921',
2018 'ext': 'mp4',
2019 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
2020 },
2021 },
41918eaa 2022 {
2023 # vzaar embed
1663bd6e
S
2024 'url': 'http://help.vzaar.com/article/165-embedding-video',
2025 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
41918eaa 2026 'info_dict': {
1663bd6e 2027 'id': '8707641',
41918eaa 2028 'ext': 'mp4',
1663bd6e 2029 'title': 'Building A Business Online: Principal Chairs Q & A',
41918eaa 2030 },
2031 },
9ce1ac40 2032 {
2033 # multiple HTML5 videos on one page
2034 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
2035 'info_dict': {
2036 'id': 'keyscenarios',
2037 'title': 'Rescue Kit 14 Free Edition - Getting started',
2038 },
2039 'playlist_count': 4,
0987f2dd
T
2040 },
2041 {
2042 # vshare embed
7a5c1cfe 2043 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
0987f2dd
T
2044 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
2045 'info_dict': {
2046 'id': '0f64ce6',
2047 'title': 'vl14062007715967',
2048 'ext': 'mp4',
2049 }
2ca7ed41
S
2050 },
2051 {
2052 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
2053 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
2054 'info_dict': {
2055 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
2056 'ext': 'mp4',
2057 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
2058 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
2059 'timestamp': 1474354800,
2060 'upload_date': '20160920',
2061 }
7d540621
S
2062 },
2063 {
2064 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
2065 'info_dict': {
2066 'id': '1731611',
2067 'ext': 'mp4',
2068 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
2069 'description': 'md5:eb5f23826a027ba95277d105f248b825',
2070 'timestamp': 1516100691,
2071 'upload_date': '20180116',
2072 },
2073 'params': {
2074 'skip_download': True,
2075 },
2076 'add_ie': [SpringboardPlatformIE.ie_key()],
ea696249 2077 },
4c780fbd
S
2078 {
2079 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
2080 'info_dict': {
2081 'id': 'vMDE4NzI1Mjgt690b',
2082 'ext': 'mp4',
2083 'title': 'Котята',
2084 },
2085 'add_ie': [YapFilesIE.ie_key()],
2086 'params': {
2087 'skip_download': True,
2088 },
1fc37ca3 2089 },
660a230b
S
2090 {
2091 # CloudflareStream embed
2092 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
2093 'info_dict': {
2094 'id': '31c9291ab41fac05471db4e73aa11717',
2095 'ext': 'mp4',
2096 'title': '31c9291ab41fac05471db4e73aa11717',
2097 },
2098 'add_ie': [CloudflareStreamIE.ie_key()],
2099 'params': {
2100 'skip_download': True,
2101 },
2102 },
6bd499e8
S
2103 {
2104 # PeerTube embed
2105 'url': 'https://joinpeertube.org/fr/home/',
2106 'info_dict': {
2107 'id': 'home',
2108 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
2109 },
2110 'playlist_count': 2,
2111 },
aee36ca8
S
2112 {
2113 # Indavideo embed
2114 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
2115 'info_dict': {
2116 'id': '1693903',
2117 'ext': 'mp4',
2118 'title': 'Így kell otthon hamburgert sütni',
2119 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
2120 'timestamp': 1426330212,
2121 'upload_date': '20150314',
2122 'uploader': 'StreetKitchen',
2123 'uploader_id': '546363',
2124 },
2125 'add_ie': [IndavideoEmbedIE.ie_key()],
2126 'params': {
2127 'skip_download': True,
2128 },
2129 },
cfd7f2a6
S
2130 {
2131 # APA embed via JWPlatform embed
2132 'url': 'http://www.vol.at/blue-man-group/5593454',
2133 'info_dict': {
2134 'id': 'jjv85FdZ',
2135 'ext': 'mp4',
2136 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
2137 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2138 'thumbnail': r're:^https?://.*\.jpg$',
2139 'duration': 254,
2140 'timestamp': 1519211149,
2141 'upload_date': '20180221',
2142 },
2143 'params': {
2144 'skip_download': True,
2145 },
2146 },
1fc37ca3
SO
2147 {
2148 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
2149 'md5': 'b68d276de422ab07ee1d49388103f457',
2150 'info_dict': {
2151 'id': '83645793',
2152 'title': 'Lock up and get excited',
1fc37ca3 2153 'ext': 'mp4'
d3431dcb
S
2154 },
2155 'skip': 'TODO: fix nested playlists processing in tests',
2156 },
9d1b2138
S
2157 {
2158 # Viqeo embeds
2159 'url': 'https://viqeo.tv/',
2160 'info_dict': {
2161 'id': 'viqeo',
2162 'title': 'All-new video platform',
2163 },
2164 'playlist_count': 6,
2165 },
d78657fd
BM
2166 {
2167 # Squarespace video embed, 2019-08-28
2168 'url': 'http://ootboxford.com',
2169 'info_dict': {
2170 'id': 'Tc7b_JGdZfw',
2171 'title': 'Out of the Blue, at Childish Things 10',
7cb51b5d
S
2172 'ext': 'mp4',
2173 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
2174 'uploader_id': 'helendouglashouse',
2175 'uploader': 'Helen & Douglas House',
2176 'upload_date': '20140328',
d78657fd
BM
2177 },
2178 'params': {
2179 'skip_download': True,
2180 },
2181 },
29f7c58a 2182 # {
2183 # # Zype embed
2184 # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
2185 # 'info_dict': {
2186 # 'id': '5b400b834b32992a310622b9',
2187 # 'ext': 'mp4',
2188 # 'title': 'Smoky Barbecue Favorites',
2189 # 'thumbnail': r're:^https?://.*\.jpe?g',
2190 # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
2191 # 'upload_date': '20170909',
2192 # 'timestamp': 1504915200,
2193 # },
2194 # 'add_ie': [ZypeIE.ie_key()],
2195 # 'params': {
2196 # 'skip_download': True,
2197 # },
2198 # },
e0b6e988
S
2199 {
2200 # videojs embed
2201 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
2202 'info_dict': {
2203 'id': 'shell',
2204 'ext': 'mp4',
2205 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
2206 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
2207 'thumbnail': r're:^https?://.*\.jpg$',
2208 },
2209 'params': {
2210 'skip_download': True,
2211 },
2212 'expected_warnings': ['Failed to download MPD manifest'],
2213 },
5e3da0d4
RA
2214 {
2215 # DailyMotion embed with DM.player
2216 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
2217 'info_dict': {
2218 'id': 'k6aKkGHd9FJs4mtJN39',
2219 'ext': 'mp4',
2220 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
2221 'description': 'This video is private.',
2222 'uploader_id': 'x1jf30l',
2223 'uploader': 'beIN SPORTS USA',
2224 'upload_date': '20190528',
2225 'timestamp': 1559062971,
2226 },
2227 'params': {
2228 'skip_download': True,
2229 },
2230 },
1a20d295
ZM
2231 {
2232 # tvopengr:embed
2233 'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
2234 'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
2235 'info_dict': {
2236 'id': '101119',
2237 'ext': 'mp4',
2238 'display_id': 'oikarpoitondiapragmateyseonhparosias',
2239 'title': 'md5:b979f4d640c568617d6547035528a149',
2240 'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
2241 'timestamp': 1641772800,
2242 'upload_date': '20220110',
2243 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
2244
2245 }
2246 },
764f5de2
PW
2247 {
2248 # blogger embed
2249 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
2250 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
2251 'info_dict': {
2252 'id': 'BLOGGER-video-3c740e3a49197e16-796',
2253 'ext': 'mp4',
2254 'title': 'Blogger',
2255 'thumbnail': r're:^https?://.*',
2256 },
2257 },
6e6b70d6
S
2258 # {
2259 # # TODO: find another test
2260 # # http://schema.org/VideoObject
2261 # 'url': 'https://flipagram.com/f/nyvTSJMKId',
2262 # 'md5': '888dcf08b7ea671381f00fab74692755',
2263 # 'info_dict': {
2264 # 'id': 'nyvTSJMKId',
2265 # 'ext': 'mp4',
2266 # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
2267 # 'description': '#love for cats.',
2268 # 'timestamp': 1461244995,
2269 # 'upload_date': '20160421',
2270 # },
2271 # 'params': {
2272 # 'force_generic_extractor': True,
2273 # },
29f7c58a 2274 # },
2275 {
2276 # VHX Embed
2277 'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
2278 'info_dict': {
2279 'id': '858208',
2280 'ext': 'mp4',
2281 'title': 'Untitled',
2282 'uploader_id': 'user80538407',
2283 'uploader': 'OTT Videos',
2284 },
2285 },
2286 {
2287 # ArcPublishing PoWa video player
2288 'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
2289 'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
2290 'info_dict': {
2291 'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
2292 'ext': 'mp4',
2293 'title': 'Senate candidates wave to voters on Anchorage streets',
2294 'description': 'md5:91f51a6511f090617353dc720318b20e',
2295 'timestamp': 1604378735,
2296 'upload_date': '20201103',
2297 'duration': 1581,
2298 },
2299 },
2181983a 2300 {
2301 # MyChannels SDK embed
2302 # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
2303 'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
2304 'md5': '90c0699c37006ef18e198c032d81739c',
2305 'info_dict': {
2306 'id': '194165',
2307 'ext': 'mp4',
2308 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
2309 'timestamp': 1611740340,
2310 'upload_date': '20210127',
2311 'duration': 159,
2312 },
2313 },
bc2ca1bb 2314 {
2315 # Simplecast player embed
2316 'url': 'https://www.bio.org/podcast',
2317 'info_dict': {
2318 'id': 'podcast',
2319 'title': 'I AM BIO Podcast | BIO',
2320 },
2321 'playlist_mincount': 52,
2322 },
e4edeb62 2323 {
b73612a2 2324 # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
2325 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
2326 'only_matching': True,
2327 }, {
e4edeb62 2328 # WimTv embed player
2329 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
2330 'info_dict': {
2331 'id': 'wearefmi-pt-2-2021',
2332 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
2333 },
2334 'playlist_count': 1,
a318f59d 2335 }, {
2336 # KVS Player
2337 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
2338 'info_dict': {
2339 'id': '105',
2340 'display_id': 'kelis-4th-of-july',
2341 'ext': 'mp4',
2342 'title': 'Kelis - 4th Of July',
2343 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
2344 },
2345 'params': {
2346 'skip_download': True,
2347 },
2348 }, {
2349 # KVS Player
2350 'url': 'https://www.kvs-demo.com/embed/105/',
2351 'info_dict': {
2352 'id': '105',
2353 'display_id': 'kelis-4th-of-july',
2354 'ext': 'mp4',
2355 'title': 'Kelis - 4th Of July / Embed Player',
2356 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
2357 },
2358 'params': {
2359 'skip_download': True,
2360 },
2361 }, {
2362 # KVS Player
2363 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
2364 'md5': '3397979512c682f6b85b3b04989df224',
2365 'info_dict': {
2366 'id': '2400174',
2367 'display_id': 'french-boy-pantsed',
2368 'ext': 'mp4',
2369 'title': 'French Boy Pantsed - ThisVid.com',
2370 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
2371 }
2372 }, {
2373 # KVS Player
2374 'url': 'https://thisvid.com/embed/2400174/',
2375 'md5': '3397979512c682f6b85b3b04989df224',
2376 'info_dict': {
2377 'id': '2400174',
2378 'display_id': 'french-boy-pantsed',
2379 'ext': 'mp4',
2380 'title': 'French Boy Pantsed - ThisVid.com',
2381 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
2382 }
2383 }, {
2384 # KVS Player
2385 'url': 'https://youix.com/video/leningrad-zoj/',
2386 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
2387 'info_dict': {
2388 'id': '18485',
2389 'display_id': 'leningrad-zoj',
2390 'ext': 'mp4',
2391 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
2392 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
2393 }
2394 }, {
2395 # KVS Player
2396 'url': 'https://youix.com/embed/18485',
2397 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
2398 'info_dict': {
2399 'id': '18485',
2400 'display_id': 'leningrad-zoj',
2401 'ext': 'mp4',
2402 'title': 'Ленинград - ЗОЖ',
2403 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
2404 }
2405 }, {
2406 # KVS Player
2407 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
2408 'md5': '94166bdb26b4cb1fb9214319a629fc51',
2409 'info_dict': {
2410 'id': '21217',
2411 'display_id': '40-nochey-40-nights-2016',
2412 'ext': 'mp4',
2413 'title': '40 ночей (2016) - BogMedia.org',
2414 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
2415 }
e4edeb62 2416 },
9980d3d2 2417 {
2418 # KVS Player (for sites that serve kt_player.js via non-https urls)
2419 'url': 'http://www.camhub.world/embed/389508',
2420 'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
2421 'info_dict': {
2422 'id': '389508',
2423 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
2424 'ext': 'mp4',
2425 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
2426 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg',
2427 }
2428 },
e16fefd8
JL
2429 {
2430 # Reddit-hosted video that will redirect and be processed by RedditIE
2431 # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
2432 'url': 'https://v.redd.it/zv89llsvexdz',
2433 'md5': '87f5f02f6c1582654146f830f21f8662',
2434 'info_dict': {
2435 'id': 'zv89llsvexdz',
2436 'ext': 'mp4',
2437 'timestamp': 1501941939.0,
2438 'title': 'That small heart attack.',
2439 'upload_date': '20170805',
2440 'uploader': 'Antw87'
2441 }
2442 },
2443 {
2444 # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
2445 'url': 'https://v.redd.it/33hgok7dfbz71/',
2446 'md5': '7a1d587940242c9bb3bd6eb320b39258',
2447 'info_dict': {
2448 'id': '33hgok7dfbz71',
2449 'ext': 'mp4',
2450 'title': "The game Didn't want me to Knife that Guy I guess",
2451 'uploader': 'paraf1ve',
2452 'timestamp': 1636788683.0,
2453 'upload_date': '20211113'
2454 }
9f517bb1 2455 },
9c634ef8 2456 {
2457 # MainStreaming player
2458 'url': 'https://www.lactv.it/2021/10/03/lac-news24-la-settimana-03-10-2021/',
2459 'info_dict': {
2460 'id': 'EUlZfGWkGpOd',
2461 'title': 'La Settimana ',
2462 'description': '03 Ottobre ore 02:00',
2463 'ext': 'mp4',
2464 'live_status': 'not_live',
2465 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
2466 'duration': 1512
2467 }
2468 },
9f517bb1 2469 {
2470 # Multiple gfycat iframe embeds
2471 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
2472 'info_dict': {
2473 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
2474 'id': 'board'
2475 },
2476 'playlist_count': 8,
2477 },
2478 {
2479 # Multiple gfycat gifs (direct links)
2480 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
2481 'info_dict': {
2482 'title': '옳게 된 크롭 니트 스테이씨 아이사',
2483 'id': 'board'
2484 },
2485 'playlist_count': 6
2486 },
2487 {
2488 # Multiple gfycat embeds, with uppercase "IFR" in urls
2489 'url': 'https://kkzz.kr/?vid=2295',
2490 'info_dict': {
2491 'title': '지방시 앰버서더 에스파 카리나 움짤',
2492 'id': '?vid=2295'
2493 },
2494 'playlist_count': 9
e16fefd8 2495 }
cfe50f04 2496 ]
9b122384 2497
9b122384
PH
2498 def report_following_redirect(self, new_url):
2499 """Report information extraction."""
79649588 2500 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 2501
aa9369a2 2502 def report_detected(self, name):
2503 self._downloader.write_debug(f'Identified a {name}')
2504
4fc946b5
PH
2505 def _extract_rss(self, url, video_id, doc):
2506 playlist_title = doc.find('./channel/title').text
2507 playlist_desc_el = doc.find('./channel/description')
2508 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
2509
29f7c58a 2510 NS_MAP = {
2511 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
2512 }
2513
76c73715
PH
2514 entries = []
2515 for it in doc.findall('./channel/item'):
01aec848
BG
2516 next_url = None
2517 enclosure_nodes = it.findall('./enclosure')
2518 for e in enclosure_nodes:
2519 next_url = e.attrib.get('url')
2520 if next_url:
2521 break
2522
76c73715 2523 if not next_url:
01aec848 2524 next_url = xpath_text(it, 'link', fatal=False)
76c73715
PH
2525
2526 if not next_url:
2527 continue
2528
29f7c58a 2529 def itunes(key):
2530 return xpath_text(
2531 it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
2532 default=None)
2533
2534 duration = itunes('duration')
2535 explicit = (itunes('explicit') or '').lower()
2536 if explicit in ('true', 'yes'):
2537 age_limit = 18
2538 elif explicit in ('false', 'no'):
2539 age_limit = 0
2540 else:
2541 age_limit = None
2542
76c73715 2543 entries.append({
413c1f8e 2544 '_type': 'url_transparent',
76c73715
PH
2545 'url': next_url,
2546 'title': it.find('title').text,
29f7c58a 2547 'description': xpath_text(it, 'description', default=None),
2548 'timestamp': unified_timestamp(
2549 xpath_text(it, 'pubDate', default=None)),
2550 'duration': int_or_none(duration) or parse_duration(duration),
2551 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
2552 'episode': itunes('title'),
2553 'episode_number': int_or_none(itunes('episode')),
2554 'season_number': int_or_none(itunes('season')),
2555 'age_limit': age_limit,
76c73715 2556 })
4fc946b5
PH
2557
2558 return {
2559 '_type': 'playlist',
2560 'id': url,
2561 'title': playlist_title,
2562 'description': playlist_desc,
2563 'entries': entries,
2564 }
2565
c8e9a235
PH
2566 def _extract_camtasia(self, url, video_id, webpage):
2567 """ Returns None if no camtasia video can be found. """
2568
2569 camtasia_cfg = self._search_regex(
2570 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
2571 webpage, 'camtasia configuration file', default=None)
2572 if camtasia_cfg is None:
2573 return None
2574
2575 title = self._html_search_meta('DC.title', webpage, fatal=True)
2576
2577 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
2578 camtasia_cfg = self._download_xml(
2579 camtasia_url, video_id,
2580 note='Downloading camtasia configuration',
2581 errnote='Failed to download camtasia configuration')
2582 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
2583
2584 entries = []
2585 for n in fileset_node.getchildren():
2586 url_n = n.find('./uri')
2587 if url_n is None:
2588 continue
2589
2590 entries.append({
2591 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
2592 'title': '%s - %s' % (title, n.tag),
2593 'url': compat_urlparse.urljoin(url, url_n.text),
2594 'duration': float_or_none(n.find('./duration').text),
2595 })
2596
2597 return {
2598 '_type': 'playlist',
2599 'entries': entries,
2600 'title': title,
2601 }
2602
a318f59d 2603 def _kvs_getrealurl(self, video_url, license_code):
2604 if not video_url.startswith('function/0/'):
2605 return video_url # not obfuscated
2606
2607 url_path, _, url_query = video_url.partition('?')
2608 urlparts = url_path.split('/')[2:]
2609 license = self._kvs_getlicensetoken(license_code)
2610 newmagic = urlparts[5][:32]
2611
2612 for o in range(len(newmagic) - 1, -1, -1):
2613 new = ''
2614 l = (o + sum([int(n) for n in license[o:]])) % 32
2615
2616 for i in range(0, len(newmagic)):
2617 if i == o:
2618 new += newmagic[l]
2619 elif i == l:
2620 new += newmagic[o]
2621 else:
2622 new += newmagic[i]
2623 newmagic = new
2624
2625 urlparts[5] = newmagic + urlparts[5][32:]
2626 return '/'.join(urlparts) + '?' + url_query
2627
2628 def _kvs_getlicensetoken(self, license):
2629 modlicense = license.replace('$', '').replace('0', '1')
2630 center = int(len(modlicense) / 2)
2631 fronthalf = int(modlicense[:center + 1])
2632 backhalf = int(modlicense[center:])
2633
2634 modlicense = str(4 * abs(fronthalf - backhalf))
2635 retval = ''
2636 for o in range(0, center + 1):
2637 for i in range(1, 5):
2638 retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
2639 return retval
2640
9b122384 2641 def _real_extract(self, url):
ebd3c7b3 2642 if url.startswith('//'):
d226c560 2643 return self.url_result(self.http_scheme() + url)
ebd3c7b3 2644
a7130543
JMF
2645 parsed_url = compat_urlparse.urlparse(url)
2646 if not parsed_url.scheme:
a06916d9 2647 default_search = self.get_param('default_search')
04b4d394 2648 if default_search is None:
1f7ccb90 2649 default_search = 'fixup_error'
04b4d394 2650
1f7ccb90 2651 if default_search in ('auto', 'auto_warning', 'fixup_error'):
9c1da4a9 2652 if re.match(r'^[^\s/]+\.[^\s/]+/', url):
6a39ee13 2653 self.report_warning('The url doesn\'t specify the protocol, trying with http')
04b4d394 2654 return self.url_result('http://' + url)
1f7ccb90 2655 elif default_search != 'fixup_error':
9c1fc022 2656 if default_search == 'auto_warning':
0e67ab0d
PH
2657 if re.match(r'^(?:url|URL)$', url):
2658 raise ExtractorError(
7a5c1cfe 2659 'Invalid URL: %r . Call yt-dlp like this: yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
0e67ab0d
PH
2660 expected=True)
2661 else:
6a39ee13 2662 self.report_warning(
7571c02c 2663 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 2664 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
2665
2666 if default_search in ('error', 'fixup_error'):
7571c02c 2667 raise ExtractorError(
b74e86f4 2668 '%r is not a valid URL. '
7a5c1cfe 2669 'Set --default-search "ytsearch" (or run yt-dlp "ytsearch:%s" ) to search YouTube'
b74e86f4 2670 % (url, url), expected=True)
04b4d394 2671 else:
f2f2c0c2
PH
2672 if ':' not in default_search:
2673 default_search += ':'
04b4d394 2674 return self.url_result(default_search + url)
4d54ef20
PH
2675
2676 url, smuggled_data = unsmuggle_url(url)
2677 force_videoid = None
d6e6a422 2678 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
2679 if smuggled_data and 'force_videoid' in smuggled_data:
2680 force_videoid = smuggled_data['force_videoid']
2681 video_id = force_videoid
2682 else:
9dcd6fd3 2683 video_id = self._generic_id(url)
3d83a1ae 2684
79649588 2685 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 2686
ebab4520 2687 head_req = HEADRequest(url)
23be51d8 2688 head_response = self._request_webpage(
ebab4520
PH
2689 head_req, video_id,
2690 note=False, errnote='Could not send HEAD request to %s' % url,
2691 fatal=False)
42393ce2 2692
23be51d8 2693 if head_response is not False:
42393ce2 2694 # Check for redirect
7947a1f7 2695 new_url = head_response.geturl()
42393ce2
PH
2696 if url != new_url:
2697 self.report_following_redirect(new_url)
4d54ef20
PH
2698 if force_videoid:
2699 new_url = smuggle_url(
2700 new_url, {'force_videoid': force_videoid})
cecaaf3f 2701 return self.url_result(new_url)
42393ce2 2702
23be51d8
PH
2703 full_response = None
2704 if head_response is False:
5c2266df 2705 request = sanitized_Request(url)
58bde34a
S
2706 request.add_header('Accept-Encoding', '*')
2707 full_response = self._request_webpage(request, video_id)
23be51d8
PH
2708 head_response = full_response
2709
f930e0c7
S
2710 info_dict = {
2711 'id': video_id,
9dcd6fd3 2712 'title': self._generic_title(url),
29f7c58a 2713 'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
f930e0c7
S
2714 }
2715
23be51d8 2716 # Check for direct link to a video
955737b2 2717 content_type = head_response.headers.get('Content-Type', '').lower()
263eff95 2718 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
23be51d8 2719 if m:
aa9369a2 2720 self.report_detected('direct video link')
4e363703 2721 format_id = compat_str(m.group('format_id'))
c26326c1 2722 subtitles = {}
f930e0c7 2723 if format_id.endswith('mpegurl'):
c26326c1 2724 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
cf1f13b8 2725 elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
2726 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id)
f930e0c7
S
2727 elif format_id == 'f4m':
2728 formats = self._extract_f4m_formats(url, video_id)
eadc3ccd 2729 else:
2730 formats = [{
4e363703 2731 'format_id': format_id,
eadc3ccd 2732 'url': url,
2733 'vcodec': 'none' if m.group('type') == 'audio' else None
2734 }]
de6c51e8 2735 info_dict['direct'] = True
19dbaeec 2736 self._sort_formats(formats)
de6c51e8 2737 info_dict['formats'] = formats
c26326c1 2738 info_dict['subtitles'] = subtitles
f930e0c7 2739 return info_dict
42393ce2 2740
a06916d9 2741 if not self.get_param('test', False) and not is_intentional:
2742 force = self.get_param('force_generic_extractor', False)
6a39ee13 2743 self.report_warning(
2fece970 2744 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 2745
4e262a88 2746 if not full_response:
5c2266df 2747 request = sanitized_Request(url)
58bde34a
S
2748 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
2749 # making it impossible to download only chunk of the file (yet we need only 512kB to
7a5c1cfe 2750 # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
58bde34a
S
2751 # that will always result in downloading the whole file that is not desirable.
2752 # Therefore for extraction pass we have to override Accept-Encoding to any in order
2753 # to accept raw bytes and being able to download only a chunk.
2754 # It may probably better to solve this by checking Content-Type for application/octet-stream
2755 # after HEAD request finishes, but not sure if we can rely on this.
2756 request.add_header('Accept-Encoding', '*')
2757 full_response = self._request_webpage(request, video_id)
4e262a88 2758
5940862d
S
2759 first_bytes = full_response.read(512)
2760
2761 # Is it an M3U playlist?
0d769bcb 2762 if first_bytes.startswith(b'#EXTM3U'):
aa9369a2 2763 self.report_detected('M3U playlist')
da1c94ee 2764 info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
19dbaeec 2765 self._sort_formats(info_dict['formats'])
5940862d
S
2766 return info_dict
2767
4e262a88
PH
2768 # Maybe it's a direct link to a video?
2769 # Be careful not to download the whole thing!
61ca9a80 2770 if not is_html(first_bytes):
6a39ee13 2771 self.report_warning(
4e262a88 2772 'URL could be a direct video link, returning it as such.')
f930e0c7 2773 info_dict.update({
4e262a88
PH
2774 'direct': True,
2775 'url': url,
f930e0c7
S
2776 })
2777 return info_dict
4e262a88
PH
2778
2779 webpage = self._webpage_read_content(
2780 full_response, url, video_id, prefix=first_bytes)
2781
2181983a 2782 if '<title>DPG Media Privacy Gate</title>' in webpage:
2783 webpage = self._download_webpage(url, video_id)
2784
9b122384 2785 self.report_extraction(video_id)
887c6acd 2786
1b840245 2787 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
4fc946b5 2788 try:
61241abb 2789 try:
2790 doc = compat_etree_fromstring(webpage)
2791 except compat_xml_parse_error:
2792 doc = compat_etree_fromstring(webpage.encode('utf-8'))
4fc946b5 2793 if doc.tag == 'rss':
aa9369a2 2794 self.report_detected('RSS feed')
4fc946b5 2795 return self._extract_rss(url, video_id, doc)
cc99a77a 2796 elif doc.tag == 'SmoothStreamingMedia':
7a450a3b 2797 info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
aa9369a2 2798 self.report_detected('ISM manifest')
cc99a77a
S
2799 self._sort_formats(info_dict['formats'])
2800 return info_dict
e5e8d20a 2801 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
19dbaeec 2802 smil = self._parse_smil(doc, url, video_id)
aa9369a2 2803 self.report_detected('SMIL file')
19dbaeec
S
2804 self._sort_formats(smil['formats'])
2805 return smil
729accb4 2806 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
aa9369a2 2807 self.report_detected('XSPF playlist')
96b8b9ab 2808 return self.playlist_result(
47a5cb77
S
2809 self._parse_xspf(
2810 doc, video_id, xspf_url=url,
7947a1f7 2811 xspf_base_url=full_response.geturl()),
96b8b9ab 2812 video_id)
1b840245 2813 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
7de27caf 2814 info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
d3f8b76b 2815 doc,
7947a1f7 2816 mpd_base_url=full_response.geturl().rpartition('/')[0],
86f4d14f 2817 mpd_url=url)
aa9369a2 2818 self.report_detected('DASH manifest')
19dbaeec 2819 self._sort_formats(info_dict['formats'])
f930e0c7
S
2820 return info_dict
2821 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
2822 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
aa9369a2 2823 self.report_detected('F4M manifest')
19dbaeec 2824 self._sort_formats(info_dict['formats'])
f930e0c7 2825 return info_dict
f7300c5c 2826 except compat_xml_parse_error:
4fc946b5
PH
2827 pass
2828
c8e9a235
PH
2829 # Is it a Camtasia project?
2830 camtasia_res = self._extract_camtasia(url, video_id, webpage)
2831 if camtasia_res is not None:
aa9369a2 2832 self.report_detected('Camtasia video')
c8e9a235
PH
2833 return camtasia_res
2834
14390730 2835 # Sometimes embedded video player is hidden behind percent encoding
067aa17e 2836 # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
14390730 2837 # Unescaping the whole page allows to handle those cases in a generic way
29f7c58a 2838 # FIXME: unescaping the whole page may break URLs, commenting out for now.
2839 # There probably should be a second run of generic extractor on unescaped webpage.
2840 # webpage = compat_urllib_parse_unquote(webpage)
1f7659db 2841
7cb51b5d
S
2842 # Unescape squarespace embeds to be detected by generic extractor,
2843 # see https://github.com/ytdl-org/youtube-dl/issues/21294
2844 webpage = re.sub(
2845 r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
2846 lambda x: unescapeHTML(x.group(0)), webpage)
d78657fd 2847
887c6acd
PH
2848 # it's tempting to parse this further, but you would
2849 # have to take into account all the variations like
2850 # Video Title - Site Name
2851 # Site Name | Video Title
2852 # Video Title - Tagline | Site Name
2853 # and so on and so forth; it's just not practical
6f41b2bc
S
2854 video_title = self._og_search_title(
2855 webpage, default=None) or self._html_search_regex(
79649588
PH
2856 r'(?s)<title>(.*?)</title>', webpage, 'video title',
2857 default='video')
ef4fd848 2858
4d805e06
PH
2859 # Try to detect age limit automatically
2860 age_limit = self._rta_search(webpage)
2861 # And then there are the jokers who advertise that they use RTA,
2862 # but actually don't.
2863 AGE_LIMIT_MARKERS = [
197224b7 2864 r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
4d805e06
PH
2865 ]
2866 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2867 age_limit = 18
2868
ef4fd848
PH
2869 # video uploader is domain name
2870 video_uploader = self._search_regex(
79649588 2871 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 2872
6f41b2bc
S
2873 video_description = self._og_search_description(webpage, default=None)
2874 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2875
b311b0ea
S
2876 info_dict.update({
2877 'title': video_title,
2878 'description': video_description,
2879 'thumbnail': video_thumbnail,
2880 'age_limit': age_limit,
2881 })
2882
aa9369a2 2883 self._downloader.write_debug('Looking for video embeds')
2884
1f4b722b 2885 # Look for Brightcove Legacy Studio embeds
4fcaa4f4 2886 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
99877772 2887 if bc_urls:
99877772
PH
2888 entries = [{
2889 '_type': 'url',
2890 'url': smuggle_url(bc_url, {'Referer': url}),
3b7d9aa4 2891 'ie_key': 'BrightcoveLegacy'
99877772
PH
2892 } for bc_url in bc_urls]
2893
2894 return {
2895 '_type': 'playlist',
2896 'title': video_title,
2897 'id': video_id,
2898 'entries': entries,
2899 }
cfe50f04 2900
f6519f89 2901 # Look for Brightcove New Studio embeds
0254f93b 2902 bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
f6519f89 2903 if bc_urls:
5399ab3f
S
2904 return self.playlist_from_matches(
2905 bc_urls, video_id, video_title,
2906 getter=lambda x: smuggle_url(x, {'referrer': url}),
2907 ie='BrightcoveNew')
ed126900 2908
4e826cd9
S
2909 # Look for Nexx embeds
2910 nexx_urls = NexxIE._extract_urls(webpage)
2911 if nexx_urls:
2912 return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
2913
3f59b015
S
2914 # Look for Nexx iFrame embeds
2915 nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
2916 if nexx_embed_urls:
2917 return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
2918
4d8819d2
S
2919 # Look for ThePlatform embeds
2920 tp_urls = ThePlatformIE._extract_urls(webpage)
2921 if tp_urls:
46b18f23 2922 return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
4d8819d2 2923
29f7c58a 2924 arc_urls = ArcPublishingIE._extract_urls(webpage)
2925 if arc_urls:
2926 return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
2927
2181983a 2928 mychannels_urls = MedialaanIE._extract_urls(webpage)
2929 if mychannels_urls:
2930 return self.playlist_from_matches(
2931 mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
2932
59b8ab58
PH
2933 # Look for embedded rtl.nl player
2934 matches = re.findall(
2637fadc 2935 r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
59b8ab58
PH
2936 webpage)
2937 if matches:
46b18f23 2938 return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
59b8ab58 2939
09b9c45e
S
2940 vimeo_urls = VimeoIE._extract_urls(url, webpage)
2941 if vimeo_urls:
46b18f23 2942 return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
7115ca84 2943
29f7c58a 2944 vhx_url = VHXEmbedIE._extract_url(webpage)
2945 if vhx_url:
2946 return self.url_result(vhx_url, VHXEmbedIE.ie_key())
2947
df0c8151 2948 # Invidious Instances
2949 # https://github.com/yt-dlp/yt-dlp/issues/195
2950 # https://github.com/iv-org/invidious/pull/1730
2951 youtube_url = self._search_regex(
2952 r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2953 webpage, 'youtube link', default=None)
2954 if youtube_url:
2955 return self.url_result(youtube_url, YoutubeIE.ie_key())
2956
66c9fa36
S
2957 # Look for YouTube embeds
2958 youtube_urls = YoutubeIE._extract_urls(webpage)
2959 if youtube_urls:
46b18f23 2960 return self.playlist_from_matches(
66c9fa36 2961 youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
7deef1ba 2962
ad213a1d 2963 matches = DailymotionIE._extract_urls(webpage)
355e4fd0 2964 if matches:
46b18f23 2965 return self.playlist_from_matches(matches, video_id, video_title)
355e4fd0 2966
8489578d
NJ
2967 # Look for embedded Dailymotion playlist player (#3822)
2968 m = re.search(
2969 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2970 if m:
2971 playlists = re.findall(
2972 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2973 if playlists:
46b18f23
JH
2974 return self.playlist_from_matches(
2975 playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
8489578d 2976
71a1db89
S
2977 # Look for DailyMail embeds
2978 dailymail_urls = DailyMailIE._extract_urls(webpage)
2979 if dailymail_urls:
2980 return self.playlist_from_matches(
2981 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
2982
be7dacf9
S
2983 # Look for Teachable embeds, must be before Wistia
2984 teachable_url = TeachableIE._extract_url(webpage, url)
2985 if teachable_url:
2986 return self.url_result(teachable_url)
2987
ef4fd848 2988 # Look for embedded Wistia player
fda6d237
S
2989 wistia_urls = WistiaIE._extract_urls(webpage)
2990 if wistia_urls:
2991 playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
2992 for entry in playlist['entries']:
2993 entry.update({
2994 '_type': 'url_transparent',
2995 'uploader': video_uploader,
2996 })
2997 return playlist
5f6a1245 2998
bab19a8e
S
2999 # Look for SVT player
3000 svt_url = SVTIE._extract_url(webpage)
3001 if svt_url:
3002 return self.url_result(svt_url, 'SVT')
3003
c19f7764
JMF
3004 # Look for Bandcamp pages with custom domain
3005 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
3006 if mobj is not None:
3007 burl = unescapeHTML(mobj.group(1))
09804265
JMF
3008 # Don't set the extractor because it can be a track url or an album
3009 return self.url_result(burl)
c19f7764 3010
f25571ff
PH
3011 # Look for embedded Vevo player
3012 mobj = re.search(
3013 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
3014 if mobj is not None:
3015 return self.url_result(mobj.group('url'))
796df3c6
S
3016
3017 # Look for embedded Viddler player
cb454b33
S
3018 mobj = re.search(
3019 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
3020 webpage)
796df3c6
S
3021 if mobj is not None:
3022 return self.url_result(mobj.group('url'))
f25571ff 3023
3378d67a
S
3024 # Look for NYTimes player
3025 mobj = re.search(
3026 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
3027 webpage)
3028 if mobj is not None:
3029 return self.url_result(mobj.group('url'))
3030
cefdf970
S
3031 # Look for Libsyn player
3032 mobj = re.search(
3033 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
3034 if mobj is not None:
3035 return self.url_result(mobj.group('url'))
3036
c0d0b01f 3037 # Look for Ooyala videos
3089bc74
S
3038 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
3039 or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
3040 or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
3041 or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
3042 or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 3043 if mobj is not None:
9837cb75
RA
3044 embed_token = self._search_regex(
3045 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
3046 webpage, 'ooyala embed token', default=None)
3047 return OoyalaIE._build_url_result(smuggle_url(
3048 mobj.group('ec'), {
3049 'domain': url,
3050 'embed_token': embed_token,
3051 }))
c0d0b01f 3052
f076b638 3053 # Look for multiple Ooyala embeds on SBN network websites
3054 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
3055 if mobj is not None:
3056 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
3057 if embeds:
46b18f23
JH
3058 return self.playlist_from_matches(
3059 embeds, video_id, video_title,
3060 getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
f076b638 3061
aa94a6d3 3062 # Look for Aparat videos
48099643 3063 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
3064 if mobj is not None:
3065 return self.url_result(mobj.group(1), 'Aparat')
3066
c93c2ab1 3067 # Look for MPORA videos
c3f51436 3068 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
3069 if mobj is not None:
3070 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 3071
9834872b 3072 # Look for embedded Facebook player
0646e34c
S
3073 facebook_urls = FacebookIE._extract_urls(webpage)
3074 if facebook_urls:
3075 return self.playlist_from_matches(facebook_urls, video_id, video_title)
9834872b 3076
ca97a56e
S
3077 # Look for embedded VK player
3078 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
3079 if mobj is not None:
3080 return self.url_result(mobj.group('url'), 'VK')
3081
33d4fdab 3082 # Look for embedded Odnoklassniki player
416c3ca7
RA
3083 odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
3084 if odnoklassniki_url:
3085 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
33d4fdab 3086
b73612a2 3087 # Look for sibnet embedded player
3088 sibnet_urls = VKIE._extract_sibnet_urls(webpage)
3089 if sibnet_urls:
3090 return self.playlist_from_matches(sibnet_urls, video_id, video_title)
3091
0364fa8b
S
3092 # Look for embedded ivi player
3093 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
3094 if mobj is not None:
3095 return self.url_result(mobj.group('url'), 'Ivi')
3096
db1f3888
PH
3097 # Look for embedded Huffington Post player
3098 mobj = re.search(
c3f51436 3099 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
3100 if mobj is not None:
3101 return self.url_result(mobj.group('url'), 'HuffPost')
3102
1b86cc41 3103 # Look for embed.ly
3104 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
3105 if mobj is not None:
3106 return self.url_result(mobj.group('url'))
3107 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
3108 if mobj is not None:
f7e6f7fa 3109 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 3110
60cc4dc4
PH
3111 # Look for funnyordie embed
3112 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
3113 if matches:
46b18f23
JH
3114 return self.playlist_from_matches(
3115 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 3116
bc2ca1bb 3117 # Look for Simplecast embeds
3118 simplecast_urls = SimplecastIE._extract_urls(webpage)
3119 if simplecast_urls:
3120 return self.playlist_from_matches(
3121 simplecast_urls, video_id, video_title)
3122
db546cf8
S
3123 # Look for BBC iPlayer embed
3124 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
3125 if matches:
46b18f23 3126 return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
db546cf8 3127
93d020dd
S
3128 # Look for embedded RUTV player
3129 rutv_url = RUTVIE._extract_url(webpage)
3130 if rutv_url:
3131 return self.url_result(rutv_url, 'RUTV')
3132
494f20cb 3133 # Look for embedded TVC player
b8599718
S
3134 tvc_url = TVCIE._extract_url(webpage)
3135 if tvc_url:
3136 return self.url_result(tvc_url, 'TVC')
494f20cb 3137
d40a3b5b 3138 # Look for embedded SportBox player
476cf548 3139 sportbox_urls = SportBoxIE._extract_urls(webpage)
d40a3b5b 3140 if sportbox_urls:
476cf548 3141 return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
d40a3b5b 3142
2bb5b6d0
S
3143 # Look for embedded XHamster player
3144 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
3145 if xhamster_urls:
46b18f23 3146 return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2bb5b6d0 3147
2c9ca782
S
3148 # Look for embedded TNAFlixNetwork player
3149 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
3150 if tnaflix_urls:
46b18f23 3151 return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2c9ca782 3152
b52c9ef1
S
3153 # Look for embedded PornHub player
3154 pornhub_urls = PornHubIE._extract_urls(webpage)
3155 if pornhub_urls:
46b18f23 3156 return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
b52c9ef1 3157
37e7a71c
S
3158 # Look for embedded DrTuber player
3159 drtuber_urls = DrTuberIE._extract_urls(webpage)
3160 if drtuber_urls:
46b18f23 3161 return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
37e7a71c 3162
e28ed498
S
3163 # Look for embedded RedTube player
3164 redtube_urls = RedTubeIE._extract_urls(webpage)
3165 if redtube_urls:
46b18f23 3166 return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
e28ed498 3167
06993715
S
3168 # Look for embedded Tube8 player
3169 tube8_urls = Tube8IE._extract_urls(webpage)
3170 if tube8_urls:
3171 return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
3172
4e7b5bba
S
3173 # Look for embedded Mofosex player
3174 mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
3175 if mofosex_urls:
3176 return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
3177
8fae1a04
S
3178 # Look for embedded Spankwire player
3179 spankwire_urls = SpankwireIE._extract_urls(webpage)
3180 if spankwire_urls:
3181 return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
3182
52c4c515
S
3183 # Look for embedded YouPorn player
3184 youporn_urls = YouPornIE._extract_urls(webpage)
3185 if youporn_urls:
3186 return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
3187
9872d311
S
3188 # Look for embedded Tvigle player
3189 mobj = re.search(
3190 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
3191 if mobj is not None:
3192 return self.url_result(mobj.group('url'), 'Tvigle')
3193
7e2ede98 3194 # Look for embedded TED player
4259402c 3195 ted_urls = TedEmbedIE._extract_urls(webpage)
3196 if ted_urls:
3197 return self.playlist_from_matches(ted_urls, video_id, video_title, ie=TedEmbedIE.ie_key())
7e2ede98 3198
5c386252 3199 # Look for embedded Ustream videos
d77ac737
YCH
3200 ustream_url = UstreamIE._extract_url(webpage)
3201 if ustream_url:
3202 return self.url_result(ustream_url, UstreamIE.ie_key())
5c386252 3203
893f8832 3204 # Look for embedded arte.tv player
8bdd16b4 3205 arte_urls = ArteTVEmbedIE._extract_urls(webpage)
3206 if arte_urls:
3207 return self.playlist_from_matches(arte_urls, video_id, video_title)
893f8832 3208
cbd55ade
S
3209 # Look for embedded francetv player
3210 mobj = re.search(
3211 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
3212 webpage)
3213 if mobj is not None:
3214 return self.url_result(mobj.group('url'))
3215
e6c2d9ad 3216 # Look for embedded Myvi.ru player
6dd94d3a 3217 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
3218 if myvi_url:
3219 return self.url_result(myvi_url)
3220
dfb1b146 3221 # Look for embedded soundcloud player
548c3957 3222 soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
94aae015 3223 if soundcloud_urls:
548c3957 3224 return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
20991253 3225
027e2312
S
3226 # Look for tunein player
3227 tunein_urls = TuneInBaseIE._extract_urls(webpage)
3228 if tunein_urls:
46b18f23 3229 return self.playlist_from_matches(tunein_urls, video_id, video_title)
027e2312 3230
c5cd249e 3231 # Look for embedded mtvservices player
46fde8a1
S
3232 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
3233 if mtvservices_url:
3234 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 3235
49807b4a
S
3236 # Look for embedded yahoo player
3237 mobj = re.search(
3238 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
3239 webpage)
3240 if mobj is not None:
3241 return self.url_result(mobj.group('url'), 'Yahoo')
3242
2ef6fcb5
PH
3243 # Look for embedded sbs.com.au player
3244 mobj = re.search(
e98b8e79
PH
3245 r'''(?x)
3246 (?:
3247 <meta\s+property="og:video"\s+content=|
3248 <iframe[^>]+?src=
3249 )
3250 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
3251 webpage)
3252 if mobj is not None:
3253 return self.url_result(mobj.group('url'), 'SBS')
3254
42bdd9d0
PH
3255 # Look for embedded Cinchcast player
3256 mobj = re.search(
3257 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
3258 webpage)
3259 if mobj is not None:
3260 return self.url_result(mobj.group('url'), 'Cinchcast')
3261
1a94ff68 3262 mobj = re.search(
5263cdfc 3263 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 3264 webpage)
8001607e
YCH
3265 if not mobj:
3266 mobj = re.search(
1418a043 3267 r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
8001607e 3268 webpage)
1a94ff68
S
3269 if mobj is not None:
3270 return self.url_result(mobj.group('url'), 'MLB')
3271
1419fafd 3272 mobj = re.search(
dd467d33 3273 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
3274 webpage)
3275 if mobj is not None:
3276 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
3277
af63fed7 3278 mobj = re.search(
78d3b3e2 3279 r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
af63fed7
PH
3280 webpage)
3281 if mobj is not None:
3282 return self.url_result(mobj.group('url'), 'Livestream')
3283
255fca5e
S
3284 # Look for Zapiks embed
3285 mobj = re.search(
3286 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
3287 if mobj is not None:
3288 return self.url_result(mobj.group('url'), 'Zapiks')
3289
e3216b82 3290 # Look for Kaltura embeds
562de77f
S
3291 kaltura_urls = KalturaIE._extract_urls(webpage)
3292 if kaltura_urls:
3293 return self.playlist_from_matches(
3294 kaltura_urls, video_id, video_title,
3295 getter=lambda x: smuggle_url(x, {'source_url': url}),
3296 ie=KalturaIE.ie_key())
e3216b82 3297
665e9452 3298 # Look for EaglePlatform embeds
06a96da1
S
3299 eagleplatform_url = EaglePlatformIE._extract_url(webpage)
3300 if eagleplatform_url:
665e9452 3301 return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
135c9c42 3302
665e9452 3303 # Look for ClipYou (uses EaglePlatform) embeds
d47ae7f6
S
3304 mobj = re.search(
3305 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
3306 if mobj is not None:
3307 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
3308
f8388757 3309 # Look for Pladform embeds
45dad7ba
S
3310 pladform_url = PladformIE._extract_url(webpage)
3311 if pladform_url:
3312 return self.url_result(pladform_url)
f8388757 3313
ff18735c
S
3314 # Look for Videomore embeds
3315 videomore_url = VideomoreIE._extract_url(webpage)
3316 if videomore_url:
3317 return self.url_result(videomore_url)
3318
83f1481b
S
3319 # Look for Webcaster embeds
3320 webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
3321 if webcaster_url:
3322 return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
3323
2dcc114f
S
3324 # Look for Playwire embeds
3325 mobj = re.search(
3326 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
3327 if mobj is not None:
3328 return self.url_result(mobj.group('url'))
3329
ad320e9b
NJ
3330 # Look for 5min embeds
3331 mobj = re.search(
3332 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
3333 if mobj is not None:
3334 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
3335
18153f1b
S
3336 # Look for Crooks and Liars embeds
3337 mobj = re.search(
3338 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
3339 if mobj is not None:
3340 return self.url_result(mobj.group('url'))
3341
a2edf2e7
YCH
3342 # Look for NBC Sports VPlayer embeds
3343 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
3344 if nbc_sports_url:
3345 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
3346
de3eb07e
YCH
3347 # Look for NBC News embeds
3348 nbc_news_embed_url = re.search(
3349 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
3350 if nbc_news_embed_url:
3351 return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
3352
653789af 3353 # Look for Google Drive embeds
5b251628 3354 google_drive_url = GoogleDriveIE._extract_url(webpage)
653789af 3355 if google_drive_url:
3356 return self.url_result(google_drive_url, 'GoogleDrive')
3357
418c5cc3
YCH
3358 # Look for UDN embeds
3359 mobj = re.search(
2637fadc 3360 r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
418c5cc3
YCH
3361 if mobj is not None:
3362 return self.url_result(
0a160363 3363 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 3364
2fe1b5bd
YCH
3365 # Look for Senate ISVP iframe
3366 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
3367 if senate_isvp_url:
25c3a734 3368 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 3369
55adb63e
RA
3370 # Look for Kinja embeds
3371 kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
3372 if kinja_embed_urls:
3373 return self.playlist_from_matches(
3374 kinja_embed_urls, video_id, video_title)
3375
1ac1c4c2
S
3376 # Look for OnionStudios embeds
3377 onionstudios_url = OnionStudiosIE._extract_url(webpage)
3378 if onionstudios_url:
3379 return self.url_result(onionstudios_url)
3380
764f5de2
PW
3381 # Look for Blogger embeds
3382 blogger_urls = BloggerIE._extract_urls(webpage)
3383 if blogger_urls:
3384 return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key())
3385
67167920 3386 # Look for ViewLift embeds
3387 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
3388 if viewlift_url:
3389 return self.url_result(viewlift_url)
eedd20ef 3390
7cb09524 3391 # Look for JWPlatform embeds
b0ead0e0
S
3392 jwplatform_urls = JWPlatformIE._extract_urls(webpage)
3393 if jwplatform_urls:
3394 return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
7cb09524 3395
aecfcd4e
S
3396 # Look for Digiteka embeds
3397 digiteka_url = DigitekaIE._extract_url(webpage)
3398 if digiteka_url:
3399 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
6aeba407 3400
1979969f
S
3401 # Look for Arkena embeds
3402 arkena_url = ArkenaIE._extract_url(webpage)
3403 if arkena_url:
3404 return self.url_result(arkena_url, ArkenaIE.ie_key())
3405
b1c35797
RA
3406 # Look for Piksel embeds
3407 piksel_url = PikselIE._extract_url(webpage)
3408 if piksel_url:
3409 return self.url_result(piksel_url, PikselIE.ie_key())
3410
1bf996fa 3411 # Look for Limelight embeds
e5d39886
S
3412 limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
3413 if limelight_urls:
3414 return self.playlist_result(
3415 limelight_urls, video_id, video_title, video_description)
3416
7986c3ab
S
3417 # Look for Anvato embeds
3418 anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
3419 if anvato_urls:
3420 return self.playlist_result(
3421 anvato_urls, video_id, video_title, video_description)
3422
a5158f38
YCH
3423 # Look for AdobeTVVideo embeds
3424 mobj = re.search(
3425 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
3426 webpage)
3427 if mobj is not None:
3428 return self.url_result(
3429 self._proto_relative_url(unescapeHTML(mobj.group(1))),
3430 'AdobeTVVideo')
3431
088e1aac
YCH
3432 # Look for Vine embeds
3433 mobj = re.search(
3434 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
3435 webpage)
3436 if mobj is not None:
3437 return self.url_result(
3438 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
3439
217d5ae0
RA
3440 # Look for VODPlatform embeds
3441 mobj = re.search(
bd2c211f 3442 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
217d5ae0
RA
3443 webpage)
3444 if mobj is not None:
3445 return self.url_result(
93b84045 3446 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
217d5ae0 3447
7d273a38
RA
3448 # Look for Mangomolo embeds
3449 mobj = re.search(
755541a4
RA
3450 r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
3451 (?:
3452 admin\.mangomolo\.com/analytics/index\.php/customers/embed|
3453 player\.mangomolo\.com/v1
3454 )/
7d273a38
RA
3455 (?:
3456 video\?.*?\bid=(?P<video_id>\d+)|
755541a4 3457 (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
7d273a38
RA
3458 ).+?)\1''', webpage)
3459 if mobj is not None:
3460 info = {
3461 '_type': 'url_transparent',
3462 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
3463 'title': video_title,
3464 'description': video_description,
3465 'thumbnail': video_thumbnail,
3466 'uploader': video_uploader,
3467 }
3468 video_id = mobj.group('video_id')
3469 if video_id:
3470 info.update({
3471 'ie_key': 'MangomoloVideo',
3472 'id': video_id,
3473 })
3474 else:
3475 info.update({
3476 'ie_key': 'MangomoloLive',
3477 'id': mobj.group('channel_id'),
3478 })
3479 return info
3480
5a51775a
YCH
3481 # Look for Instagram embeds
3482 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
3483 if instagram_embed_url is not None:
11e60fca
S
3484 return self.url_result(
3485 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
5a51775a 3486
5d39176f
S
3487 # Look for 3Q SDN embeds
3488 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
3489 if threeqsdn_url:
6f41b2bc
S
3490 return {
3491 '_type': 'url_transparent',
3492 'ie_key': ThreeQSDNIE.ie_key(),
3493 'url': self._proto_relative_url(threeqsdn_url),
3494 'title': video_title,
3495 'description': video_description,
3496 'thumbnail': video_thumbnail,
3497 'uploader': video_uploader,
3498 }
5d39176f 3499
2a1321a2
S
3500 # Look for VBOX7 embeds
3501 vbox7_url = Vbox7IE._extract_url(webpage)
3502 if vbox7_url:
3503 return self.url_result(vbox7_url, Vbox7IE.ie_key())
3504
b0c8f2e9
DR
3505 # Look for DBTV embeds
3506 dbtv_urls = DBTVIE._extract_urls(webpage)
3507 if dbtv_urls:
46b18f23 3508 return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
b0c8f2e9 3509
e186a9ec
S
3510 # Look for Videa embeds
3511 videa_urls = VideaIE._extract_urls(webpage)
3512 if videa_urls:
46b18f23 3513 return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
e186a9ec 3514
b687c85e
S
3515 # Look for 20 minuten embeds
3516 twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
3517 if twentymin_urls:
46b18f23
JH
3518 return self.playlist_from_matches(
3519 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
b687c85e 3520
6ef3e65a
S
3521 # Look for VideoPress embeds
3522 videopress_urls = VideoPressIE._extract_urls(webpage)
3523 if videopress_urls:
46b18f23
JH
3524 return self.playlist_from_matches(
3525 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
6ef3e65a 3526
eb3079b6
S
3527 # Look for Rutube embeds
3528 rutube_urls = RutubeIE._extract_urls(webpage)
3529 if rutube_urls:
46b18f23 3530 return self.playlist_from_matches(
2583c0b5 3531 rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
6ef3e65a 3532
71738b14
ZM
3533 # Look for Glomex embeds
3534 glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url))
3535 if glomex_urls:
3536 return self.playlist_from_matches(
3537 glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key())
3538
32b95bb6
ZM
3539 # Look for megatv.com embeds
3540 megatvcom_urls = list(MegaTVComEmbedIE._extract_urls(webpage))
3541 if megatvcom_urls:
3542 return self.playlist_from_matches(
3543 megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key())
3544
55719459
JH
3545 # Look for WashingtonPost embeds
3546 wapo_urls = WashingtonPostIE._extract_urls(webpage)
3547 if wapo_urls:
3548 return self.playlist_from_matches(
3549 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
3550
5d29af3d 3551 # Look for Mediaset embeds
8fd12a08 3552 mediaset_urls = MediasetIE._extract_urls(self, webpage)
5d29af3d
S
3553 if mediaset_urls:
3554 return self.playlist_from_matches(
3555 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
3556
73cf76a9
S
3557 # Look for JOJ.sk embeds
3558 joj_urls = JojIE._extract_urls(webpage)
3559 if joj_urls:
3560 return self.playlist_from_matches(
3561 joj_urls, video_id, video_title, ie=JojIE.ie_key())
3562
24e966e8
PH
3563 # Look for megaphone.fm embeds
3564 mpfn_urls = MegaphoneIE._extract_urls(webpage)
3565 if mpfn_urls:
3566 return self.playlist_from_matches(
3567 mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
3568
1663bd6e
S
3569 # Look for vzaar embeds
3570 vzaar_urls = VzaarIE._extract_urls(webpage)
3571 if vzaar_urls:
3572 return self.playlist_from_matches(
3573 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
3574
26bae2d9
S
3575 channel9_urls = Channel9IE._extract_urls(webpage)
3576 if channel9_urls:
3577 return self.playlist_from_matches(
3578 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
3579
0987f2dd
T
3580 vshare_urls = VShareIE._extract_urls(webpage)
3581 if vshare_urls:
3582 return self.playlist_from_matches(
3583 vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
3584
8056c854 3585 # Look for Mediasite embeds
2ca7ed41
S
3586 mediasite_urls = MediasiteIE._extract_urls(webpage)
3587 if mediasite_urls:
3588 entries = [
3589 self.url_result(smuggle_url(
3590 compat_urlparse.urljoin(url, mediasite_url),
3591 {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
3592 for mediasite_url in mediasite_urls]
3593 return self.playlist_result(entries, video_id, video_title)
8056c854 3594
7d540621
S
3595 springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
3596 if springboardplatform_urls:
3597 return self.playlist_from_matches(
3598 springboardplatform_urls, video_id, video_title,
3599 ie=SpringboardPlatformIE.ie_key())
3600
4c780fbd
S
3601 yapfiles_urls = YapFilesIE._extract_urls(webpage)
3602 if yapfiles_urls:
3603 return self.playlist_from_matches(
3604 yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
3605
86c8cfc5
S
3606 vice_urls = ViceIE._extract_urls(webpage)
3607 if vice_urls:
3608 return self.playlist_from_matches(
3609 vice_urls, video_id, video_title, ie=ViceIE.ie_key())
3610
178ee883
S
3611 xfileshare_urls = XFileShareIE._extract_urls(webpage)
3612 if xfileshare_urls:
3613 return self.playlist_from_matches(
3614 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
3615
660a230b
S
3616 cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
3617 if cloudflarestream_urls:
3618 return self.playlist_from_matches(
3619 cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
3620
8b4b400a 3621 peertube_urls = PeerTubeIE._extract_urls(webpage, url)
6bd499e8
S
3622 if peertube_urls:
3623 return self.playlist_from_matches(
3624 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
3625
aee36ca8
S
3626 indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
3627 if indavideo_urls:
3628 return self.playlist_from_matches(
3629 indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
3630
cfd7f2a6
S
3631 apa_urls = APAIE._extract_urls(webpage)
3632 if apa_urls:
3633 return self.playlist_from_matches(
3634 apa_urls, video_id, video_title, ie=APAIE.ie_key())
3635
f51f526b
S
3636 foxnews_urls = FoxNewsIE._extract_urls(webpage)
3637 if foxnews_urls:
3638 return self.playlist_from_matches(
3639 foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
3640
2e4350ee 3641 sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
d3431dcb
S
3642 r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
3643 webpage)]
3644 if sharevideos_urls:
3645 return self.playlist_from_matches(
3646 sharevideos_urls, video_id, video_title)
3647
9d1b2138
S
3648 viqeo_urls = ViqeoIE._extract_urls(webpage)
3649 if viqeo_urls:
3650 return self.playlist_from_matches(
3651 viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
57c68ec4
S
3652
3653 expressen_urls = ExpressenIE._extract_urls(webpage)
3654 if expressen_urls:
3655 return self.playlist_from_matches(
3656 expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
9d1b2138 3657
83852e57
S
3658 zype_urls = ZypeIE._extract_urls(webpage)
3659 if zype_urls:
3660 return self.playlist_from_matches(
3661 zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
3662
feee67ae 3663 gedi_urls = GediDigitalIE._extract_urls(webpage)
902784a2 3664 if gedi_urls:
3665 return self.playlist_from_matches(
feee67ae 3666 gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
902784a2 3667
feee67ae 3668 # Look for RCS media group embeds
a85e131b 3669 rcs_urls = RCSEmbedsIE._extract_urls(webpage)
3670 if rcs_urls:
3671 return self.playlist_from_matches(
3672 rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
3673
e4edeb62 3674 wimtv_urls = WimTVIE._extract_urls(webpage)
3675 if wimtv_urls:
3676 return self.playlist_from_matches(
3677 wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
3678
097f1663 3679 bitchute_urls = BitChuteIE._extract_urls(webpage)
3680 if bitchute_urls:
3681 return self.playlist_from_matches(
3682 bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
3683
62852977 3684 rumble_urls = RumbleEmbedIE._extract_urls(webpage)
3685 if len(rumble_urls) == 1:
3686 return self.url_result(rumble_urls[0], RumbleEmbedIE.ie_key())
3687 if rumble_urls:
3688 return self.playlist_from_matches(
3689 rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
3690
1a20d295
ZM
3691 # Look for (tvopen|ethnos).gr embeds
3692 tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
3693 if tvopengr_urls:
3694 return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
3695
56bb56f3
LL
3696 tvp_urls = TVPEmbedIE._extract_urls(webpage)
3697 if tvp_urls:
3698 return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
3699
9c634ef8 3700 # Look for MainStreaming embeds
3701 mainstreaming_urls = MainStreamingIE._extract_urls(webpage)
3702 if mainstreaming_urls:
3703 return self.playlist_from_matches(mainstreaming_urls, video_id, video_title, ie=MainStreamingIE.ie_key())
3704
9f517bb1 3705 # Look for Gfycat Embeds
3706 gfycat_urls = GfycatIE._extract_urls(webpage)
3707 if gfycat_urls:
3708 return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
9c634ef8 3709
bd264412
YCH
3710 # Look for HTML5 media
3711 entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
3712 if entries:
aa9369a2 3713 self.report_detected('HTML5 media')
9ce1ac40 3714 if len(entries) == 1:
3715 entries[0].update({
bd264412
YCH
3716 'id': video_id,
3717 'title': video_title,
3718 })
9ce1ac40 3719 else:
3720 for num, entry in enumerate(entries, start=1):
3721 entry.update({
3722 'id': '%s-%s' % (video_id, num),
3723 'title': '%s (%d)' % (video_title, num),
3724 })
3725 for entry in entries:
bd264412 3726 self._sort_formats(entry['formats'])
9ce1ac40 3727 return self.playlist_result(entries, video_id, video_title)
bd264412 3728
c73e330e
RU
3729 jwplayer_data = self._find_jwplayer_data(
3730 webpage, video_id, transform_source=js_to_json)
3731 if jwplayer_data:
5e7bbac3 3732 if isinstance(jwplayer_data.get('playlist'), str):
aa9369a2 3733 self.report_detected('JW Player playlist')
5e7bbac3 3734 return {
3735 **info_dict,
3736 '_type': 'url',
3737 'ie_key': JWPlatformIE.ie_key(),
3738 'url': jwplayer_data['playlist'],
3739 }
3d08f63d
MYM
3740 try:
3741 info = self._parse_jwplayer_data(
3742 jwplayer_data, video_id, require_title=False, base_url=url)
aa9369a2 3743 self.report_detected('JW Player data')
3d08f63d
MYM
3744 return merge_dicts(info, info_dict)
3745 except ExtractorError:
067aa17e 3746 # See https://github.com/ytdl-org/youtube-dl/pull/16735
3d08f63d 3747 pass
a4a554a7 3748
63d990d2
S
3749 # Video.js embed
3750 mobj = re.search(
c5b7014a 3751 r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
63d990d2
S
3752 webpage)
3753 if mobj is not None:
3754 sources = self._parse_json(
3755 mobj.group(1), video_id, transform_source=js_to_json,
3756 fatal=False) or []
c5b7014a
S
3757 if not isinstance(sources, list):
3758 sources = [sources]
63d990d2 3759 formats = []
da1c94ee 3760 subtitles = {}
63d990d2 3761 for source in sources:
e0b6e988
S
3762 src = source.get('src')
3763 if not src or not isinstance(src, compat_str):
63d990d2
S
3764 continue
3765 src = compat_urlparse.urljoin(url, src)
3766 src_type = source.get('type')
3767 if isinstance(src_type, compat_str):
3768 src_type = src_type.lower()
3769 ext = determine_ext(src).lower()
3770 if src_type == 'video/youtube':
3771 return self.url_result(src, YoutubeIE.ie_key())
3772 if src_type == 'application/dash+xml' or ext == 'mpd':
da1c94ee
F
3773 fmts, subs = self._extract_mpd_formats_and_subtitles(
3774 src, video_id, mpd_id='dash', fatal=False)
3775 formats.extend(fmts)
3776 self._merge_subtitles(subs, target=subtitles)
63d990d2 3777 elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
da1c94ee 3778 fmts, subs = self._extract_m3u8_formats_and_subtitles(
63d990d2 3779 src, video_id, 'mp4', entry_protocol='m3u8_native',
da1c94ee
F
3780 m3u8_id='hls', fatal=False)
3781 formats.extend(fmts)
3782 self._merge_subtitles(subs, target=subtitles)
63d990d2
S
3783 else:
3784 formats.append({
3785 'url': src,
3089bc74
S
3786 'ext': (mimetype2ext(src_type)
3787 or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
b73612a2 3788 'http_headers': {
3789 'Referer': full_response.geturl(),
3790 },
63d990d2 3791 })
da1c94ee 3792 if formats or subtitles:
aa9369a2 3793 self.report_detected('video.js embed')
63d990d2
S
3794 self._sort_formats(formats)
3795 info_dict['formats'] = formats
da1c94ee 3796 info_dict['subtitles'] = subtitles
63d990d2
S
3797 return info_dict
3798
ff17be3a 3799 # Looking for http://schema.org/VideoObject
fa0b816e 3800 json_ld = self._search_json_ld(webpage, video_id, default={})
ff17be3a 3801 if json_ld.get('url'):
aa9369a2 3802 self.report_detected('JSON LD')
e6ae51c1 3803 if determine_ext(json_ld.get('url')) == 'm3u8':
3804 json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
3805 json_ld['url'], video_id, 'mp4')
3806 json_ld.pop('url')
ff17be3a
S
3807 return merge_dicts(json_ld, info_dict)
3808
ced659bb 3809 def check_video(vurl):
a0f71985
PH
3810 if YoutubeIE.suitable(vurl):
3811 return True
b7a8c1bc
S
3812 if RtmpIE.suitable(vurl):
3813 return True
ced659bb
S
3814 vpath = compat_urlparse.urlparse(vurl).path
3815 vext = determine_ext(vpath)
0ee79a37 3816 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
ced659bb
S
3817
3818 def filter_video(urls):
3819 return list(filter(check_video, urls))
3820
9b122384 3821 # Start with something easy: JW Player in SWFObject
ced659bb 3822 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
aa9369a2 3823 if found:
3824 self.report_detected('JW Player in SFWObject')
3825 else:
d981cef6 3826 # Look for gorilla-vid style embedding
ced659bb 3827 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
3828 (?:
3829 jw_plugins|
3830 JWPlayerOptions|
3831 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
3832 )
a0f71985
PH
3833 .*?
3834 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
aa9369a2 3835 if found:
3836 self.report_detected('JW Player embed')
a318f59d 3837 if not found:
3838 # Look for generic KVS player
9980d3d2 3839 found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
a318f59d 3840 if found:
aa9369a2 3841 self.report_detected('KWS Player')
a318f59d 3842 if found.group('maj_ver') not in ['4', '5']:
3843 self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
3844 flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
3845 flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
3846
3847 # extract the part after the last / as the display_id from the
3848 # canonical URL.
3849 display_id = self._search_regex(
3850 r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
3851 r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
3852 webpage, 'display_id', fatal=False
3853 )
3854 title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
3855
3856 thumbnail = flashvars['preview_url']
3857 if thumbnail.startswith('//'):
3858 protocol, _, _ = url.partition('/')
3859 thumbnail = protocol + thumbnail
3860
11c86170 3861 url_keys = list(filter(re.compile(r'video_url|video_alt_url\d+').fullmatch, flashvars.keys()))
a318f59d 3862 formats = []
11c86170 3863 for key in url_keys:
3864 if '/get_file/' not in flashvars[key]:
3865 continue
3866 format_id = flashvars.get(f'{key}_text', key)
3867 formats.append({
3868 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
3869 'format_id': format_id,
3870 'ext': 'mp4',
3871 **(parse_resolution(format_id) or parse_resolution(flashvars[key]))
3872 })
3873 if not formats[-1].get('height'):
3874 formats[-1]['quality'] = 1
3875
a318f59d 3876 self._sort_formats(formats)
3877
3878 return {
3879 'id': flashvars['video_id'],
3880 'display_id': display_id,
3881 'title': title,
3882 'thumbnail': thumbnail,
3883 'formats': formats,
3884 }
b30b8698 3885 if not found:
9b122384 3886 # Broaden the search a little bit
ced659bb 3887 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
aa9369a2 3888 if found:
3889 self.report_detected('video file')
b30b8698
PH
3890 if not found:
3891 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 3892 found = filter_video(re.findall(
54a9328b 3893 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
aa9369a2 3894 if found:
3895 self.report_detected('JW Player JS loader')
4d805e06
PH
3896 if not found:
3897 # Flow player
ced659bb 3898 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
3899 flowplayer\("[^"]+",\s*
3900 \{[^}]+?\}\s*,
52585fd6 3901 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 3902 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 3903 ''', webpage))
aa9369a2 3904 if found:
3905 self.report_detected('Flow Player')
501f13fb
PH
3906 if not found:
3907 # Cinerama player
3908 found = re.findall(
3909 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
aa9369a2 3910 if found:
3911 self.report_detected('Cinerama player')
b30b8698 3912 if not found:
9b122384 3913 # Try to find twitter cards info
371ddb14
S
3914 # twitter:player:stream should be checked before twitter:player since
3915 # it is expected to contain a raw stream (see
3916 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
ced659bb
S
3917 found = filter_video(re.findall(
3918 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
aa9369a2 3919 if found:
3920 self.report_detected('Twitter card')
b30b8698 3921 if not found:
9b122384
PH
3922 # We look for Open Graph info:
3923 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 3924 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
3925 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
3926 if m_video_type is not None:
b73612a2 3927 found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
aa9369a2 3928 if found:
3929 self.report_detected('Open Graph video info')
b30b8698 3930 if not found:
ed9a25dd 3931 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 3932 found = re.search(
89ef304b 3933 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 3934 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 3935 webpage)
84f81016
S
3936 if not found:
3937 # Look also in Refresh HTTP header
3938 refresh_header = head_response.headers.get('Refresh')
3939 if refresh_header:
6c91a5a7
S
3940 # In python 2 response HTTP headers are bytestrings
3941 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
3942 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 3943 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 3944 if found:
b37317d8 3945 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
54b960f3
S
3946 if new_url != url:
3947 self.report_following_redirect(new_url)
3948 return {
3949 '_type': 'url',
3950 'url': new_url,
3951 }
3952 else:
3953 found = None
371ddb14
S
3954
3955 if not found:
3956 # twitter:player is a https URL to iframe player that may or may not
7a5c1cfe 3957 # be supported by yt-dlp thus this is checked the very last (see
371ddb14
S
3958 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3959 embed_url = self._html_search_meta('twitter:player', webpage, default=None)
02d01e15 3960 if embed_url and embed_url != url:
aa9369a2 3961 self.report_detected('twitter:player iframe')
371ddb14
S
3962 return self.url_result(embed_url)
3963
b30b8698 3964 if not found:
416c7fcb 3965 raise UnsupportedError(url)
9b122384 3966
b30b8698 3967 entries = []
4a120778 3968 for video_url in orderedSet(found):
949b6497 3969 video_url = unescapeHTML(video_url)
6cc37c69 3970 video_url = video_url.replace('\\/', '/')
b30b8698 3971 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 3972 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 3973
b30b8698
PH
3974 # Sometimes, jwplayer extraction will result in a YouTube URL
3975 if YoutubeIE.suitable(video_url):
3976 entries.append(self.url_result(video_url, 'Youtube'))
3977 continue
9b122384 3978
b30b8698
PH
3979 # here's a fun little line of code for you:
3980 video_id = os.path.splitext(video_id)[0]
fc9713a1 3981
28602e74
YCH
3982 entry_info_dict = {
3983 'id': video_id,
3984 'uploader': video_uploader,
3985 'title': video_title,
3986 'age_limit': age_limit,
3987 }
3988
5620f840
S
3989 if RtmpIE.suitable(video_url):
3990 entry_info_dict.update({
3991 '_type': 'url_transparent',
3992 'ie_key': RtmpIE.ie_key(),
3993 'url': video_url,
3994 })
3995 entries.append(entry_info_dict)
3996 continue
3997
729accb4
S
3998 ext = determine_ext(video_url)
3999 if ext == 'smil':
da1c94ee 4000 entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
729accb4
S
4001 elif ext == 'xspf':
4002 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
750b9ff0 4003 elif ext == 'm3u8':
da1c94ee 4004 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
79a35085 4005 elif ext == 'mpd':
da1c94ee 4006 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
3f2f4a94
S
4007 elif ext == 'f4m':
4008 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
4119a96c 4009 elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
26aae566
S
4010 # Just matching .ism/manifest is not enough to be reliably sure
4011 # whether it's actually an ISM manifest or some other streaming
4012 # manifest since there are various streaming URL formats
4013 # possible (see [1]) as well as some other shenanigans like
4014 # .smil/manifest URLs that actually serve an ISM (see [2]) and
4015 # so on.
4016 # Thus the most reasonable way to solve this is to delegate
4017 # to generic extractor in order to look into the contents of
4018 # the manifest itself.
4019 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
4020 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
4021 entry_info_dict = self.url_result(
4022 smuggle_url(video_url, {'to_generic': True}),
4023 GenericIE.ie_key())
d6fd958c 4024 else:
28602e74
YCH
4025 entry_info_dict['url'] = video_url
4026
19dbaeec
S
4027 if entry_info_dict.get('formats'):
4028 self._sort_formats(entry_info_dict['formats'])
4029
28602e74 4030 entries.append(entry_info_dict)
b30b8698
PH
4031
4032 if len(entries) == 1:
669f0e7c 4033 return entries[0]
b30b8698
PH
4034 else:
4035 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
4036 # 'url' results don't have a title
4037 if e.get('title') is not None:
4038 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
4039 return {
4040 '_type': 'playlist',
4041 'entries': entries,
4042 }