]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/generic.py
[MainStreaming] Add extractor (#2180)
[yt-dlp.git] / yt_dlp / extractor / generic.py
CommitLineData
dcdb292f 1# coding: utf-8
cfe50f04 2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
f7854627 12 compat_etree_fromstring,
4e363703 13 compat_str,
1ddb9456 14 compat_urllib_parse_unquote,
a5caba1e 15 compat_urlparse,
f7300c5c 16 compat_xml_parse_error,
8c25f81b
PH
17)
18from ..utils import (
b759a0d4 19 determine_ext,
9b122384 20 ExtractorError,
c8e9a235 21 float_or_none,
aa94a6d3 22 HEADRequest,
c76eb41b 23 int_or_none,
61ca9a80 24 is_html,
a4a554a7 25 js_to_json,
63d990d2 26 KNOWN_EXTENSIONS,
6cc62232 27 merge_dicts,
63d990d2 28 mimetype2ext,
ed2d6a19 29 orderedSet,
c76eb41b 30 parse_duration,
5c2266df 31 sanitized_Request,
9d4660ca
PH
32 smuggle_url,
33 unescapeHTML,
29f7c58a 34 unified_timestamp,
4d54ef20 35 unsmuggle_url,
416c7fcb 36 UnsupportedError,
29f7c58a 37 url_or_none,
38 xpath_attr,
76c73715 39 xpath_text,
c76eb41b 40 xpath_with_ns,
9b122384 41)
b7a8c1bc 42from .commonprotocols import RtmpIE
ed126900 43from .brightcove import (
4fcaa4f4 44 BrightcoveLegacyIE,
5c17f0a6 45 BrightcoveNewIE,
ed126900 46)
3f59b015
S
47from .nexx import (
48 NexxIE,
49 NexxEmbedIE,
50)
a2edf2e7 51from .nbc import NBCSportsVPlayerIE
c0d0b01f 52from .ooyala import OoyalaIE
93d020dd 53from .rutv import RUTVIE
954c1d05 54from .tvc import TVCIE
476cf548 55from .sportbox import SportBoxIE
6dd94d3a 56from .myvi import MyviIE
1419fafd 57from .condenast import CondeNastIE
418c5cc3 58from .udn import UDNEmbedIE
909b0d66 59from .senategov import SenateISVPIE
bab19a8e 60from .svt import SVTIE
65d161c4 61from .pornhub import PornHubIE
2bb5b6d0 62from .xhamster import XHamsterEmbedIE
2c9ca782 63from .tnaflix import TNAFlixNetworkEmbedIE
37e7a71c 64from .drtuber import DrTuberIE
e28ed498 65from .redtube import RedTubeIE
06993715 66from .tube8 import Tube8IE
4e7b5bba 67from .mofosex import MofosexEmbedIE
8fae1a04 68from .spankwire import SpankwireIE
52c4c515 69from .youporn import YouPornIE
29f7c58a 70from .vimeo import (
71 VimeoIE,
72 VHXEmbedIE,
73)
3c4fbfec 74from .dailymotion import DailymotionIE
71a1db89 75from .dailymail import DailyMailIE
1ac1c4c2 76from .onionstudios import OnionStudiosIE
67167920 77from .viewlift import ViewLiftEmbedIE
46fde8a1 78from .mtv import MTVServicesEmbeddedIE
45dad7ba 79from .pladform import PladformIE
ff18735c 80from .videomore import VideomoreIE
83f1481b 81from .webcaster import WebcasterFeedIE
5b251628 82from .googledrive import GoogleDriveIE
7cb09524 83from .jwplatform import JWPlatformIE
aecfcd4e 84from .digiteka import DigitekaIE
1979969f 85from .arkena import ArkenaIE
5a51775a 86from .instagram import InstagramIE
5d39176f 87from .threeqsdn import ThreeQSDNIE
4d8819d2 88from .theplatform import ThePlatformIE
c287f2bc 89from .kaltura import KalturaIE
06a96da1 90from .eagleplatform import EaglePlatformIE
fd6ca382 91from .facebook import FacebookIE
548c3957 92from .soundcloud import SoundcloudEmbedIE
027e2312 93from .tunein import TuneInBaseIE
2a1321a2 94from .vbox7 import Vbox7IE
b0c8f2e9 95from .dbtv import DBTVIE
b1c35797 96from .piksel import PikselIE
e186a9ec 97from .videa import VideaIE
b687c85e 98from .twentymin import TwentyMinutenIE
d77ac737 99from .ustream import UstreamIE
8bdd16b4 100from .arte import ArteTVEmbedIE
6ef3e65a 101from .videopress import VideoPressIE
eb3079b6 102from .rutube import RutubeIE
e5d39886 103from .limelight import LimelightBaseIE
7986c3ab 104from .anvato import AnvatoIE
55719459 105from .washingtonpost import WashingtonPostIE
58bb4402 106from .wistia import WistiaIE
5d29af3d 107from .mediaset import MediasetIE
73cf76a9 108from .joj import JojIE
24e966e8 109from .megaphone import MegaphoneIE
41918eaa 110from .vzaar import VzaarIE
26bae2d9 111from .channel9 import Channel9IE
0987f2dd 112from .vshare import VShareIE
2ca7ed41 113from .mediasite import MediasiteIE
7d540621 114from .springboardplatform import SpringboardPlatformIE
4c780fbd 115from .yapfiles import YapFilesIE
86c8cfc5 116from .vice import ViceIE
178ee883 117from .xfileshare import XFileShareIE
660a230b 118from .cloudflarestream import CloudflareStreamIE
6bd499e8 119from .peertube import PeerTubeIE
5ee7ae5c 120from .teachable import TeachableIE
aee36ca8 121from .indavideo import IndavideoEmbedIE
cfd7f2a6 122from .apa import APAIE
f51f526b 123from .foxnews import FoxNewsIE
9d1b2138 124from .viqeo import ViqeoIE
57c68ec4 125from .expressen import ExpressenIE
83852e57 126from .zype import ZypeIE
416c3ca7 127from .odnoklassniki import OdnoklassnikiIE
b73612a2 128from .vk import VKIE
55adb63e 129from .kinja import KinjaEmbedIE
feee67ae 130from .gedidigital import GediDigitalIE
a85e131b 131from .rcs import RCSEmbedsIE
097f1663 132from .bitchute import BitChuteIE
62852977 133from .rumble import RumbleEmbedIE
29f7c58a 134from .arcpublishing import ArcPublishingIE
2181983a 135from .medialaan import MedialaanIE
bc2ca1bb 136from .simplecast import SimplecastIE
e4edeb62 137from .wimtv import WimTVIE
56bb56f3 138from .tvp import TVPEmbedIE
764f5de2 139from .blogger import BloggerIE
9c634ef8 140from .mainstreaming import MainStreamingIE
9f517bb1 141from .gfycat import GfycatIE
9b122384 142
0838239e 143
9b122384 144class GenericIE(InfoExtractor):
79649588 145 IE_DESC = 'Generic downloader that works on some sites'
9b122384 146 _VALID_URL = r'.*'
79649588 147 IE_NAME = 'generic'
cfe50f04 148 _TESTS = [
c5fa81fe
S
149 # Direct link to a video
150 {
151 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
152 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
153 'info_dict': {
154 'id': 'trailer',
155 'ext': 'mp4',
156 'title': 'trailer',
157 'upload_date': '20100513',
158 }
159 },
c5138a7c 160 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
161 {
162 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
163 'md5': '128c42e68b13950268b648275386fc74',
164 'info_dict': {
165 'id': 'FictionJunction-Parallel_Hearts',
166 'ext': 'flac',
167 'title': 'FictionJunction-Parallel_Hearts',
168 'upload_date': '20140522',
169 },
170 'expected_warnings': [
171 'URL could be a direct video link, returning it as such.'
39efc6e3
YCH
172 ],
173 'skip': 'URL invalid',
c5fa81fe
S
174 },
175 # Direct download with broken HEAD
176 {
177 'url': 'http://ai-radio.org:8000/radio.opus',
178 'info_dict': {
179 'id': 'radio',
180 'ext': 'opus',
181 'title': 'radio',
182 },
183 'params': {
184 'skip_download': True, # infinite live stream
185 },
186 'expected_warnings': [
ef0e4e7b
YCH
187 r'501.*Not Implemented',
188 r'400.*Bad Request',
c5fa81fe
S
189 ],
190 },
191 # Direct link with incorrect MIME type
192 {
193 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
194 'md5': '4ccbebe5f36706d85221f204d7eb5913',
195 'info_dict': {
196 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
197 'id': '5_Lennart_Poettering_-_Systemd',
198 'ext': 'webm',
199 'title': '5_Lennart_Poettering_-_Systemd',
200 'upload_date': '20141120',
201 },
202 'expected_warnings': [
203 'URL could be a direct video link, returning it as such.'
204 ]
205 },
206 # RSS feed
207 {
208 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
209 'info_dict': {
210 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
211 'title': 'Zero Punctuation',
212 'description': 're:.*groundbreaking video review series.*'
213 },
214 'playlist_mincount': 11,
215 },
216 # RSS feed with enclosure
217 {
218 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
219 'info_dict': {
29f7c58a 220 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
221 'title': 'MSNBC Rachel Maddow (video)',
222 'description': 're:.*her unique approach to storytelling.*',
223 },
224 'playlist': [{
225 'info_dict': {
226 'ext': 'mov',
227 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
228 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
229 'description': 're:.*her unique approach to storytelling.*',
230 'upload_date': '20201204',
231 },
232 }],
233 },
234 # RSS feed with item with description and thumbnails
235 {
236 'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
237 'info_dict': {
238 'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
239 'title': 're:.*100% Hydrogen.*',
240 'description': 're:.*In this episode.*',
241 },
242 'playlist': [{
243 'info_dict': {
244 'ext': 'm4a',
245 'id': 'c1c879525ce2cb640b344507e682c36d',
246 'title': 're:Hydrogen!',
247 'description': 're:.*In this episode we are going.*',
248 'timestamp': 1567977776,
249 'upload_date': '20190908',
250 'duration': 459,
251 'thumbnail': r're:^https?://.*\.jpg$',
252 'episode_number': 1,
253 'season_number': 1,
254 'age_limit': 0,
255 },
256 }],
257 'params': {
258 'skip_download': True,
259 },
c5fa81fe 260 },
01aec848
BG
261 # RSS feed with enclosures and unsupported link URLs
262 {
263 'url': 'http://www.hellointernet.fm/podcast?format=rss',
264 'info_dict': {
265 'id': 'http://www.hellointernet.fm/podcast?format=rss',
266 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
267 'title': 'Hello Internet',
268 },
269 'playlist_mincount': 100,
270 },
8765222d
S
271 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
272 {
273 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
274 'info_dict': {
275 'id': 'smil',
276 'ext': 'mp4',
277 'title': 'Automatics, robotics and biocybernetics',
278 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
e327b736 279 'upload_date': '20130627',
8765222d
S
280 'formats': 'mincount:16',
281 'subtitles': 'mincount:1',
282 },
283 'params': {
284 'force_generic_extractor': True,
285 'skip_download': True,
286 },
287 },
288 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
289 {
290 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
291 'info_dict': {
292 'id': 'hds',
293 'ext': 'flv',
294 'title': 'hds',
295 'formats': 'mincount:1',
296 },
297 'params': {
298 'skip_download': True,
299 },
300 },
301 # SMIL from https://www.restudy.dk/video/play/id/1637
302 {
303 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
304 'info_dict': {
305 'id': 'video_1637',
306 'ext': 'flv',
307 'title': 'video_1637',
308 'formats': 'mincount:3',
309 },
310 'params': {
311 'skip_download': True,
312 },
313 },
314 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
315 {
316 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
317 'info_dict': {
318 'id': 'smil-service',
319 'ext': 'flv',
320 'title': 'smil-service',
321 'formats': 'mincount:1',
322 },
323 'params': {
324 'skip_download': True,
325 },
326 },
327 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
328 {
329 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
330 'info_dict': {
331 'id': '4719370',
332 'ext': 'mp4',
333 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
334 'formats': 'mincount:3',
335 },
336 'params': {
337 'skip_download': True,
338 },
339 },
1de5cd3b
S
340 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
341 {
342 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
343 'info_dict': {
344 'id': 'mZlp2ctYIUEB',
345 'ext': 'mp4',
346 'title': 'Tikibad ontruimd wegens brand',
347 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
ec85ded8 348 'thumbnail': r're:^https?://.*\.jpg$',
1de5cd3b
S
349 'duration': 33,
350 },
351 'params': {
352 'skip_download': True,
353 },
354 },
9d939cec
S
355 # MPD from http://dash-mse-test.appspot.com/media.html
356 {
357 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
358 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
359 'info_dict': {
360 'id': 'car-20120827-manifest',
361 'ext': 'mp4',
362 'title': 'car-20120827-manifest',
363 'formats': 'mincount:9',
0738187f 364 'upload_date': '20130904',
9d939cec 365 },
9d939cec 366 },
20938f76
S
367 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
368 {
369 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
370 'info_dict': {
371 'id': 'content',
372 'ext': 'mp4',
373 'title': 'content',
374 'formats': 'mincount:8',
375 },
376 'params': {
377 # m3u8 downloads
378 'skip_download': True,
39efc6e3
YCH
379 },
380 'skip': 'video gone',
20938f76 381 },
edd9b71c
S
382 # m3u8 served with Content-Type: text/plain
383 {
384 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
385 'info_dict': {
386 'id': 'index',
387 'ext': 'mp4',
388 'title': 'index',
389 'upload_date': '20140720',
390 'formats': 'mincount:11',
391 },
392 'params': {
393 # m3u8 downloads
394 'skip_download': True,
39efc6e3
YCH
395 },
396 'skip': 'video gone',
edd9b71c 397 },
c5fa81fe
S
398 # google redirect
399 {
400 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
401 'info_dict': {
402 'id': 'cmQHVoWB5FY',
403 'ext': 'mp4',
404 'upload_date': '20130224',
405 'uploader_id': 'TheVerge',
ec85ded8 406 'description': r're:^Chris Ziegler takes a look at the\.*',
c5fa81fe
S
407 'uploader': 'The Verge',
408 'title': 'First Firefox OS phones side-by-side',
409 },
410 'params': {
411 'skip_download': False,
412 }
413 },
6c91a5a7
S
414 {
415 # redirect in Refresh HTTP header
416 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
417 'info_dict': {
418 'id': 'pO8h3EaFRdo',
419 'ext': 'mp4',
420 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
421 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
422 'upload_date': '20150917',
423 'uploader_id': 'brtvofficial',
424 'uploader': 'Boiler Room',
425 },
426 'params': {
427 'skip_download': False,
428 },
429 },
cfe50f04 430 {
79649588 431 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 432 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 433 'info_dict': {
d360a146
S
434 'id': '13601338388002',
435 'ext': 'mp4',
79649588
PH
436 'uploader': 'www.hodiho.fr',
437 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
438 }
439 },
c19f7764
JMF
440 # bandcamp page with custom domain
441 {
79649588
PH
442 'add_ie': ['Bandcamp'],
443 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 444 'info_dict': {
fd50bf62
S
445 'id': '3235767654',
446 'ext': 'mp3',
79649588
PH
447 'title': 'The Pony Mash',
448 'uploader': 'M_Pallante',
c19f7764 449 },
79649588 450 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 451 },
eeb165e6 452 {
53a664ed
S
453 # embedded brightcove video
454 # it also tests brightcove videos that need to set the 'Referer'
455 # in the http requests
3b7d9aa4 456 'add_ie': ['BrightcoveLegacy'],
79649588
PH
457 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
458 'info_dict': {
459 'id': '2765128793001',
460 'ext': 'mp4',
461 'title': 'Le cours de bourse : l’analyse technique',
462 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
463 'uploader': 'BFM BUSINESS',
eeb165e6 464 },
79649588
PH
465 'params': {
466 'skip_download': True,
eeb165e6
JMF
467 },
468 },
53a664ed
S
469 {
470 # embedded with itemprop embedURL and video id spelled as `idVideo`
471 'add_id': ['BrightcoveLegacy'],
472 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
473 'info_dict': {
474 'id': '5255628253001',
475 'ext': 'mp4',
476 'title': 'md5:37c519b1128915607601e75a87995fc0',
477 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
478 'uploader': 'BFM BUSINESS',
479 'uploader_id': '876450612001',
480 'timestamp': 1482255315,
481 'upload_date': '20161220',
482 },
483 'params': {
484 'skip_download': True,
485 },
486 },
17ab4d3b 487 {
067aa17e 488 # https://github.com/ytdl-org/youtube-dl/issues/2253
17ab4d3b 489 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
490 'md5': '0ba9446db037002366bab3b3eb30c88c',
491 'info_dict': {
fd50bf62
S
492 'id': '3101154703001',
493 'ext': 'mp4',
17ab4d3b
PH
494 'title': 'Still no power',
495 'uploader': 'thestar.com',
496 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
497 },
3b7d9aa4 498 'add_ie': ['BrightcoveLegacy'],
39efc6e3 499 'skip': 'video gone',
17ab4d3b 500 },
0479c625
S
501 {
502 'url': 'http://www.championat.com/video/football/v/87/87499.html',
503 'md5': 'fb973ecf6e4a78a67453647444222983',
504 'info_dict': {
505 'id': '3414141473001',
506 'ext': 'mp4',
507 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
508 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
509 'uploader': 'Championat',
510 },
511 },
bdf97017 512 {
067aa17e 513 # https://github.com/ytdl-org/youtube-dl/issues/3541
3b7d9aa4 514 'add_ie': ['BrightcoveLegacy'],
bdf97017
NJ
515 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
516 'info_dict': {
517 'id': '3866516442001',
37aab278 518 'ext': 'mp4',
bdf97017
NJ
519 'title': 'Leer mij vrouwen kennen: Aflevering 1',
520 'description': 'Leer mij vrouwen kennen: Aflevering 1',
521 'uploader': 'SBS Broadcasting',
522 },
37aab278 523 'skip': 'Restricted to Netherlands',
bdf97017 524 'params': {
37aab278 525 'skip_download': True, # m3u8 download
bdf97017
NJ
526 },
527 },
06d0ad9a
YCH
528 {
529 # Brightcove video in <iframe>
530 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
531 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
532 'info_dict': {
533 'id': '5360463607001',
534 'ext': 'mp4',
535 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
536 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
537 'uploader': 'United Nations',
538 'uploader_id': '1362235914001',
539 'timestamp': 1489593889,
540 'upload_date': '20170315',
541 },
542 'add_ie': ['BrightcoveLegacy'],
543 },
16e2c8f7
YCH
544 {
545 # Brightcove with alternative playerID key
546 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
547 'info_dict': {
548 'id': 'nmeth.2062_SV1',
549 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
550 },
551 'playlist': [{
552 'info_dict': {
553 'id': '2228375078001',
554 'ext': 'mp4',
555 'title': 'nmeth.2062-sv1',
556 'description': 'nmeth.2062-sv1',
557 'timestamp': 1363357591,
558 'upload_date': '20130315',
559 'uploader': 'Nature Publishing Group',
560 'uploader_id': '1964492299001',
561 },
562 }],
563 },
40158f55
JH
564 {
565 # Brightcove with UUID in videoPlayer
566 'url': 'http://www8.hp.com/cn/zh/home.html',
567 'info_dict': {
568 'id': '5255815316001',
569 'ext': 'mp4',
570 'title': 'Sprocket Video - China',
571 'description': 'Sprocket Video - China',
572 'uploader': 'HP-Video Gallery',
573 'timestamp': 1482263210,
574 'upload_date': '20161220',
575 'uploader_id': '1107601872001',
576 },
577 'params': {
578 'skip_download': True, # m3u8 download
579 },
580 'skip': 'video rotates...weekly?',
581 },
582 {
583 # Brightcove:new type [2].
584 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
585 'md5': '2b35148fcf48da41c9fb4591650784f3',
586 'info_dict': {
587 'id': '5348741021001',
588 'ext': 'mp4',
589 'upload_date': '20170306',
590 'uploader_id': '4191638492001',
591 'timestamp': 1488769918,
592 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
593
594 },
595 },
596 {
597 # Alternative brightcove <video> attributes
598 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
599 'info_dict': {
600 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
601 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
602 },
603 'playlist': [{
604 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
605 'info_dict': {
606 'id': '5311302538001',
607 'ext': 'mp4',
608 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
609 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
610 'timestamp': 1486321708,
611 'upload_date': '20170205',
612 'uploader_id': '800000640001',
613 },
614 'only_matching': True,
615 }],
616 },
b68a812e
S
617 {
618 # Brightcove with UUID in videoPlayer
619 'url': 'http://www8.hp.com/cn/zh/home.html',
620 'info_dict': {
621 'id': '5255815316001',
622 'ext': 'mp4',
623 'title': 'Sprocket Video - China',
624 'description': 'Sprocket Video - China',
625 'uploader': 'HP-Video Gallery',
626 'timestamp': 1482263210,
627 'upload_date': '20161220',
628 'uploader_id': '1107601872001',
629 },
630 'params': {
631 'skip_download': True, # m3u8 download
632 },
633 },
c0d0b01f
JMF
634 # ooyala video
635 {
79649588 636 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 637 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
638 'info_dict': {
639 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
640 'ext': 'mp4',
3486df38 641 'title': '2cc213299525360.mov', # that's what we get
53e06b25 642 'duration': 238.231,
c0d0b01f 643 },
87830900 644 'add_ie': ['Ooyala'],
c0d0b01f 645 },
bf94d763
S
646 {
647 # ooyala video embedded with http://player.ooyala.com/iframe.js
648 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
649 'info_dict': {
650 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
651 'ext': 'mp4',
652 'title': '"Steve Jobs: Man in the Machine" trailer',
653 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
53e06b25 654 'duration': 135.427,
bf94d763
S
655 },
656 'params': {
657 'skip_download': True,
658 },
39efc6e3 659 'skip': 'movie expired',
bf94d763 660 },
198d4cb4
GR
661 # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
662 {
663 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
664 'info_dict': {
665 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
666 'ext': 'mp4',
667 'title': 'Steampunk Fest Comes to Honesdale',
668 'duration': 43.276,
669 },
670 'params': {
671 'skip_download': True,
672 }
673 },
1b86cc41 674 # embed.ly video
675 {
676 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
677 'info_dict': {
678 'id': '9ODmcdjQcHQ',
679 'ext': 'mp4',
0a5bce56
PH
680 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
681 'upload_date': '20140225',
682 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
683 'uploader': 'Tested',
684 'uploader_id': 'testedcom',
1b86cc41 685 },
686 # No need to test YoutubeIE here
687 'params': {
688 'skip_download': True,
689 },
690 },
60cc4dc4
PH
691 # funnyordie embed
692 {
693 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
694 'info_dict': {
695 'id': '18e820ec3f',
696 'ext': 'mp4',
697 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
698 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 699 },
39efc6e3
YCH
700 # HEAD requests lead to endless 301, while GET is OK
701 'expected_warnings': ['301'],
60cc4dc4 702 },
93d020dd
S
703 # RUTV embed
704 {
705 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
706 'info_dict': {
707 'id': '776940',
708 'ext': 'mp4',
709 'title': 'Охотское море стало целиком российским',
710 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
711 },
712 'params': {
713 # m3u8 download
714 'skip_download': True,
715 },
aab74fa1 716 },
f37bdbe5
S
717 # TVC embed
718 {
719 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
720 'info_dict': {
721 'id': '55304',
722 'ext': 'mp4',
723 'title': 'Дошкольное воспитание',
724 },
725 },
b827a601
S
726 # SportBox embed
727 {
728 'url': 'http://www.vestifinance.ru/articles/25753',
729 'info_dict': {
730 'id': '25753',
05d1e7aa 731 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
b827a601
S
732 },
733 'playlist': [{
734 'info_dict': {
735 'id': '370908',
736 'title': 'Госзаказ. День 3',
737 'ext': 'mp4',
738 }
739 }, {
740 'info_dict': {
741 'id': '370905',
742 'title': 'Госзаказ. День 2',
743 'ext': 'mp4',
744 }
745 }, {
746 'info_dict': {
747 'id': '370902',
748 'title': 'Госзаказ. День 1',
749 'ext': 'mp4',
750 }
751 }],
752 'params': {
753 # m3u8 download
754 'skip_download': True,
755 },
756 },
bf20b9c5
S
757 # Myvi.ru embed
758 {
759 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
760 'info_dict': {
761 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
762 'ext': 'mp4',
763 'title': 'Ужастики, русский трейлер (2015)',
ec85ded8 764 'thumbnail': r're:^https?://.*\.jpg$',
bf20b9c5
S
765 'duration': 153,
766 }
767 },
c76799c5
S
768 # XHamster embed
769 {
770 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
771 'info_dict': {
772 'id': 'showthread',
773 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
774 },
775 'playlist_mincount': 7,
39efc6e3
YCH
776 # This forum does not allow <iframe> syntaxes anymore
777 # Now HTML tags are displayed as-is
778 'skip': 'No videos on this page',
c76799c5 779 },
aab74fa1
PH
780 # Embedded TED video
781 {
782 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 783 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 784 'info_dict': {
a8eb5a8e 785 'id': '1969',
aab74fa1 786 'ext': 'mp4',
a8eb5a8e
PH
787 'title': 'Hidden miracles of the natural world',
788 'uploader': 'Louie Schwartzberg',
789 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 790 }
60cc4dc4 791 },
d95e35d6
S
792 # nowvideo embed hidden behind percent encoding
793 {
794 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
795 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
796 'info_dict': {
797 'id': '06e53103ca9aa',
798 'ext': 'flv',
799 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
800 'description': 'No description',
801 },
0f2a2ba1 802 },
893f8832
PH
803 # arte embed
804 {
805 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
806 'md5': '7653032cbb25bf6c80d80f217055fa43',
807 'info_dict': {
808 'id': '048195-004_PLUS7-F',
809 'ext': 'flv',
810 'title': 'X:enius',
811 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
812 'upload_date': '20140320',
813 },
814 'params': {
815 'skip_download': 'Requires rtmpdump'
39efc6e3
YCH
816 },
817 'skip': 'video gone',
893f8832 818 },
cbd55ade
S
819 # francetv embed
820 {
821 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
822 'info_dict': {
823 'id': 'EV_30231',
824 'ext': 'mp4',
825 'title': 'Alcaline, le concert avec Calogero',
826 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
827 'upload_date': '20150226',
828 'timestamp': 1424989860,
829 'duration': 5400,
830 },
831 'params': {
832 # m3u8 downloads
833 'skip_download': True,
834 },
835 'expected_warnings': [
836 'Forbidden'
837 ]
838 },
fa35cdad
PH
839 # Condé Nast embed
840 {
841 'url': 'http://www.wired.com/2014/04/honda-asimo/',
842 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
843 'info_dict': {
844 'id': '53501be369702d3275860000',
845 'ext': 'mp4',
846 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
847 }
ebd3c7b3
PH
848 },
849 # Dailymotion embed
850 {
851 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
852 'md5': '441aeeb82eb72c422c7f14ec533999cd',
853 'info_dict': {
854 'id': 'k2mm4bCdJ6CQ2i7c8o2',
855 'ext': 'mp4',
856 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
0738187f 857 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
ebd3c7b3 858 'uploader': 'Spi0n',
0738187f
YCH
859 'uploader_id': 'xgditw',
860 'upload_date': '20140425',
861 'timestamp': 1398441542,
ebd3c7b3
PH
862 },
863 'add_ie': ['Dailymotion'],
2b88feed 864 },
71a1db89
S
865 # DailyMail embed
866 {
867 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
868 'info_dict': {
869 'id': '1495629',
870 'ext': 'mp4',
871 'title': 'Care worker punches elderly dementia patient in head 11 times',
872 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
873 },
874 'add_ie': ['DailyMail'],
875 'params': {
876 'skip_download': True,
877 },
878 },
2b88feed
PH
879 # YouTube embed
880 {
881 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
882 'info_dict': {
883 'id': 'FXRb4ykk4S0',
884 'ext': 'mp4',
885 'title': 'The NBL Auction 2014',
886 'uploader': 'BADMINTON England',
887 'uploader_id': 'BADMINTONEvents',
888 'upload_date': '20140603',
889 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
890 },
891 'add_ie': ['Youtube'],
892 'params': {
893 'skip_download': True,
894 }
895 },
a0566bbf 896 # MTVServices embed
c5cd249e 897 {
1fa309da
YCH
898 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
899 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
c5cd249e 900 'info_dict': {
1fa309da 901 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
c5cd249e 902 'ext': 'mp4',
1fa309da
YCH
903 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
904 'description': 'Two valets share their love for movie star Liam Neesons.',
05d1e7aa
YCH
905 'timestamp': 1349922600,
906 'upload_date': '20121011',
c5cd249e
JMF
907 },
908 },
61013473 909 # YouTube embed via <data-embed-url="">
910 {
911 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 912 'info_dict': {
a8eb5a8e 913 'id': '4vAffPZIT44',
61013473 914 'ext': 'mp4',
a8eb5a8e 915 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
916 'uploader': 'Gameloft',
917 'uploader_id': 'gameloft',
a8eb5a8e
PH
918 'upload_date': '20140828',
919 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
920 },
921 'params': {
922 'skip_download': True,
61013473 923 }
c8e9a235 924 },
61568e50
JH
925 # YouTube <object> embed
926 {
927 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
928 'md5': '516718101ec834f74318df76259fb3cc',
929 'info_dict': {
930 'id': 'msN87y-iEx0',
931 'ext': 'webm',
932 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
933 'upload_date': '20080526',
934 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
935 'uploader': 'Christopher Sykes',
936 'uploader_id': 'ChristopherJSykes',
937 },
938 'add_ie': ['Youtube'],
939 },
c8e9a235
PH
940 # Camtasia studio
941 {
942 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
943 'playlist': [{
944 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
945 'info_dict': {
946 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
947 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
948 'ext': 'flv',
949 'duration': 2235.90,
950 }
951 }, {
952 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
953 'info_dict': {
954 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
955 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
956 'ext': 'flv',
957 'duration': 2235.93,
958 }
959 }],
960 'info_dict': {
961 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
962 }
4d805e06
PH
963 },
964 # Flowplayer
965 {
966 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
967 'md5': '9d65602bf31c6e20014319c7d07fba27',
968 'info_dict': {
969 'id': '5123ea6d5e5a7',
970 'ext': 'mp4',
971 'age_limit': 18,
972 'uploader': 'www.handjobhub.com',
d6d9186f 973 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 974 }
0990305d 975 },
22a6f150 976 # Multiple brightcove videos
067aa17e 977 # https://github.com/ytdl-org/youtube-dl/issues/2283
22a6f150
PH
978 {
979 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
980 'info_dict': {
981 'id': 'always-never',
982 'title': 'Always / Never - The New Yorker',
983 },
984 'playlist_count': 3,
985 'params': {
986 'extract_flat': False,
987 'skip_download': True,
988 }
1a94ff68
S
989 },
990 # MLB embed
991 {
992 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
993 'md5': '96f09a37e44da40dd083e12d9a683327',
994 'info_dict': {
995 'id': '33322633',
996 'ext': 'mp4',
997 'title': 'Ump changes call to ball',
998 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
999 'duration': 48,
1000 'timestamp': 1401537900,
1001 'upload_date': '20140531',
ec85ded8 1002 'thumbnail': r're:^https?://.*\.jpg$',
1a94ff68
S
1003 },
1004 },
746c67d7
NJ
1005 # Wistia embed
1006 {
6c114b12
S
1007 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
1008 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
746c67d7 1009 'info_dict': {
6c114b12 1010 'id': '6e2wtrbdaf',
746c67d7 1011 'ext': 'mov',
6c114b12
S
1012 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
1013 'description': 'a Paywall Videos video from Remilon',
1014 'duration': 644.072,
1015 'uploader': 'study.com',
1016 'timestamp': 1459678540,
1017 'upload_date': '20160403',
1018 'filesize': 24687186,
746c67d7
NJ
1019 },
1020 },
52cffcb1 1021 {
1022 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
1023 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
1024 'info_dict': {
1025 'id': 'uxjb0lwrcz',
1026 'ext': 'mp4',
6c114b12 1027 'title': 'Conversation about Hexagonal Rails Part 1',
0738187f 1028 'description': 'a Martin Fowler video from ThoughtWorks',
52cffcb1 1029 'duration': 1715.0,
85d7b765 1030 'uploader': 'thoughtworks.wistia.com',
0738187f 1031 'timestamp': 1401832161,
6c114b12 1032 'upload_date': '20140603',
70b7e3fb 1033 },
52cffcb1 1034 },
7ded6545
S
1035 # Wistia standard embed (async)
1036 {
1037 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
1038 'info_dict': {
1039 'id': '807fafadvk',
1040 'ext': 'mp4',
1041 'title': 'Drip Brennan Dunn Workshop',
1042 'description': 'a JV Webinars video from getdrip-1',
1043 'duration': 4986.95,
7ded6545 1044 'timestamp': 1463607249,
6c114b12 1045 'upload_date': '20160518',
7ded6545
S
1046 },
1047 'params': {
1048 'skip_download': True,
1049 }
1050 },
ac645ac7
PH
1051 # Soundcloud embed
1052 {
1053 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
1054 'info_dict': {
1055 'id': '174391317',
1056 'ext': 'mp3',
1057 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
1058 'uploader': 'Sophos Security',
1059 'title': 'Chet Chat 171 - Oct 29, 2014',
1060 'upload_date': '20141029',
1061 }
af63fed7 1062 },
db19df6c
S
1063 # Soundcloud multiple embeds
1064 {
1065 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
1066 'info_dict': {
1067 'id': '52809',
1068 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
1069 },
1070 'playlist_mincount': 7,
1071 },
027e2312
S
1072 # TuneIn station embed
1073 {
1074 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
1075 'info_dict': {
1076 'id': '204146',
1077 'ext': 'mp3',
1078 'title': 'CNRV',
1079 'location': 'Paris, France',
1080 'is_live': True,
1081 },
1082 'params': {
1083 # Live stream
1084 'skip_download': True,
1085 },
1086 },
af63fed7
PH
1087 # Livestream embed
1088 {
1089 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
1090 'info_dict': {
1091 'id': '67864563',
1092 'ext': 'flv',
1093 'upload_date': '20141112',
1094 'title': 'Rosetta #CometLanding webcast HL 10',
1095 }
1096 },
78d3b3e2
YCH
1097 # Another Livestream embed, without 'new.' in URL
1098 {
1099 'url': 'https://www.freespeech.org/',
1100 'info_dict': {
1101 'id': '123537347',
1102 'ext': 'mp4',
1103 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
1104 },
1105 'params': {
1106 # Live stream
1107 'skip_download': True,
1108 },
1109 },
65f3a228
PH
1110 # LazyYT
1111 {
e8e4cc5a 1112 'url': 'https://skiplagged.com/',
65f3a228 1113 'info_dict': {
e8e4cc5a
JH
1114 'id': 'skiplagged',
1115 'title': 'Skiplagged: The smart way to find cheap flights',
65f3a228 1116 },
e8e4cc5a
JH
1117 'playlist_mincount': 1,
1118 'add_ie': ['Youtube'],
4e262a88 1119 },
42bdd9d0
PH
1120 # Cinchcast embed
1121 {
1122 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1123 'info_dict': {
1124 'id': '7141703',
1125 'ext': 'mp3',
1126 'upload_date': '20141126',
1127 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1128 }
1129 },
501f13fb
PH
1130 # Cinerama player
1131 {
1132 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1133 'info_dict': {
1134 'id': '730m_DandD_1901_512k',
1135 'ext': 'mp4',
1136 'uploader': 'www.abc.net.au',
1137 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1138 }
796df3c6
S
1139 },
1140 # embedded viddler video
1141 {
1142 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1143 'info_dict': {
1144 'id': '4d03aad9',
1145 'ext': 'mp4',
1146 'uploader': 'deadspin',
1147 'title': 'WALL-TO-GORTAT',
1148 'timestamp': 1422285291,
1149 'upload_date': '20150126',
1150 },
1151 'add_ie': ['Viddler'],
a0f71985 1152 },
2051acde
S
1153 # Libsyn embed
1154 {
1155 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1156 'info_dict': {
1157 'id': '3377616',
1158 'ext': 'mp3',
1159 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1160 'description': 'md5:601cb790edd05908957dae8aaa866465',
1161 'upload_date': '20150220',
1162 },
326fa4e6 1163 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
2051acde 1164 },
a0f71985
PH
1165 # jwplayer YouTube
1166 {
1167 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1168 'info_dict': {
1169 'id': 'Mrj4DVp2zeA',
1170 'ext': 'mp4',
f37e3f99 1171 'upload_date': '20150212',
a0f71985 1172 'uploader': 'The National Archives UK',
2637fadc 1173 'description': 'md5:8078af856dca76edc42910b61273dbbf',
a0f71985
PH
1174 'uploader_id': 'NationalArchives08',
1175 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1176 },
59b8ab58 1177 },
5620f840
S
1178 # jwplayer rtmp
1179 {
6899b1d9 1180 'url': 'http://www.suffolk.edu/sjc/live.php',
5620f840 1181 'info_dict': {
6899b1d9 1182 'id': 'live',
5620f840
S
1183 'ext': 'flv',
1184 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1185 'uploader': 'www.suffolk.edu',
1186 },
1187 'params': {
1188 'skip_download': True,
2637fadc 1189 },
6899b1d9 1190 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
5620f840 1191 },
5e7bbac3 1192 # jwplayer with only the json URL
1193 {
1194 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454',
1195 'info_dict': {
1196 'id': 'TljWkvWH',
1197 'ext': 'mp4',
1198 'upload_date': '20180306',
1199 'title': 'md5:91eb1862f6526415214f62c00b453936',
1200 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa',
1201 'timestamp': 1520367225,
1202 },
1203 'params': {
1204 'skip_download': True,
1205 },
1206 },
a4a554a7
YCH
1207 # Complex jwplayer
1208 {
1209 'url': 'http://www.indiedb.com/games/king-machine/videos',
1210 'info_dict': {
1211 'id': 'videos',
1212 'ext': 'mp4',
1213 'title': 'king machine trailer 1',
2637fadc 1214 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
a4a554a7
YCH
1215 'thumbnail': r're:^https?://.*\.jpg$',
1216 },
1217 },
03486dbb
RU
1218 {
1219 # JWPlayer config passed as variable
1220 'url': 'http://www.txxx.com/videos/3326530/ariele/',
1221 'info_dict': {
1222 'id': '3326530_hq',
1223 'ext': 'mp4',
1224 'title': 'ARIELE | Tube Cup',
1225 'uploader': 'www.txxx.com',
1226 'age_limit': 18,
1227 },
1228 'params': {
1229 'skip_download': True,
1230 }
1231 },
939be9ad
JH
1232 {
1233 # JWPlatform iframe
2fac2e91 1234 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
939be9ad 1235 'info_dict': {
2fac2e91 1236 'id': 'AG26UQXM',
939be9ad 1237 'ext': 'mp4',
2fac2e91
AG
1238 'upload_date': '20160719',
1239 'timestamp': 468923808,
1240 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
939be9ad 1241 },
805f5bf7 1242 'add_ie': [JWPlatformIE.ie_key()],
939be9ad 1243 },
63d990d2 1244 {
c5b7014a 1245 # Video.js embed, multiple formats
63d990d2
S
1246 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
1247 'info_dict': {
1248 'id': 'yygqldloqIk',
1249 'ext': 'mp4',
1250 'title': 'SolidWorks. Урок 6 Настройка чертежа',
1251 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
1252 'upload_date': '20130314',
1253 'uploader': 'PROстое3D',
1254 'uploader_id': 'PROstoe3D',
1255 },
1256 'params': {
1257 'skip_download': True,
1258 },
1259 },
c5b7014a
S
1260 {
1261 # Video.js embed, single format
1262 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
1263 'info_dict': {
1264 'id': 'watch',
1265 'ext': 'mp4',
1266 'title': 'Step 1 - Good Foundation',
1267 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
1268 },
1269 'params': {
1270 'skip_download': True,
1271 },
1272 },
59b8ab58
PH
1273 # rtl.nl embed
1274 {
1275 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1276 'playlist_mincount': 5,
1277 'info_dict': {
1278 'id': 'aanslagen-kopenhagen',
2637fadc 1279 'title': 'Aanslagen Kopenhagen',
59b8ab58 1280 }
255fca5e
S
1281 },
1282 # Zapiks embed
1283 {
1284 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1285 'info_dict': {
1286 'id': '118046',
1287 'ext': 'mp4',
1288 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1289 }
1290 },
66e568de
S
1291 # Kaltura embed (different embed code)
1292 {
1293 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1294 'info_dict': {
1295 'id': '1_a52wc67y',
1296 'ext': 'flv',
1297 'upload_date': '20150127',
1298 'uploader_id': 'PremierMedia',
1299 'timestamp': int,
1300 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1301 },
1302 },
87703231
YCH
1303 # Kaltura embed with single quotes
1304 {
1305 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1306 'info_dict': {
1307 'id': '0_izeg5utt',
1308 'ext': 'mp4',
1309 'title': '35871',
1310 'timestamp': 1355743100,
1311 'upload_date': '20121217',
e30991f9 1312 'uploader_id': 'cplapp@learn360.com',
87703231
YCH
1313 },
1314 'add_ie': ['Kaltura'],
1315 },
427cd050
S
1316 {
1317 # Kaltura embedded via quoted entry_id
1318 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1319 'info_dict': {
1320 'id': '0_utuok90b',
1321 'ext': 'mp4',
1322 'title': '06_matthew_brender_raj_dutt',
1323 'timestamp': 1466638791,
1324 'upload_date': '20160622',
1325 },
1326 'add_ie': ['Kaltura'],
1327 'expected_warnings': [
1328 'Could not send HEAD request'
1329 ],
1330 'params': {
1331 'skip_download': True,
1332 }
1333 },
8ab7e6c4
YCH
1334 {
1335 # Kaltura embedded, some fileExt broken (#11480)
1336 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1337 'info_dict': {
1338 'id': '1_sgtvehim',
1339 'ext': 'mp4',
1340 'title': 'Our "Standard Models" of particle physics and cosmology',
1341 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1342 'timestamp': 1321158993,
1343 'upload_date': '20111113',
1344 'uploader_id': 'kps1',
1345 },
1346 'add_ie': ['Kaltura'],
1347 },
a01825a5
JH
1348 {
1349 # Kaltura iframe embed
1350 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1351 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1352 'info_dict': {
1353 'id': '0_f2cfbpwy',
1354 'ext': 'mp4',
1355 'title': 'I. M. Pei: A Centennial Celebration',
1356 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1357 'upload_date': '20170403',
1358 'uploader_id': 'batchUser',
1359 'timestamp': 1491232186,
1360 },
1361 'add_ie': ['Kaltura'],
1362 },
c21692fa
S
1363 {
1364 # Kaltura iframe embed, more sophisticated
1365 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
1366 'info_dict': {
1367 'id': '1_9gzouybz',
1368 'ext': 'mp4',
1369 'title': 'lecture-05sep2017',
1370 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
1371 'upload_date': '20170913',
1372 'uploader_id': 'eps2',
1373 'timestamp': 1505340777,
1374 },
1375 'params': {
1376 'skip_download': True,
1377 },
1378 'add_ie': ['Kaltura'],
1379 },
e30991f9
S
1380 {
1381 # meta twitter:player
1382 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
1383 'info_dict': {
1384 'id': '0_01b42zps',
1385 'ext': 'mp4',
1386 'title': 'Main Twerk (Video)',
1387 'upload_date': '20171208',
1388 'uploader_id': 'sebastian.salinas@thechive.com',
1389 'timestamp': 1512713057,
1390 },
1391 'params': {
1392 'skip_download': True,
1393 },
1394 'add_ie': ['Kaltura'],
1395 },
250b042c
S
1396 # referrer protected EaglePlatform embed
1397 {
1398 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
1399 'info_dict': {
1400 'id': '582306',
1401 'ext': 'mp4',
1402 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1403 'thumbnail': r're:^https?://.*\.jpg$',
1404 'duration': 3382,
1405 'view_count': int,
1406 },
1407 'params': {
1408 'skip_download': True,
1409 },
135c9c42 1410 },
665e9452 1411 # ClipYou (EaglePlatform) embed (custom URL)
d47ae7f6
S
1412 {
1413 'url': 'http://muz-tv.ru/play/7129/',
4645432d 1414 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
d47ae7f6
S
1415 'info_dict': {
1416 'id': '12820',
1417 'ext': 'mp4',
1418 'title': "'O Sole Mio",
ec85ded8 1419 'thumbnail': r're:^https?://.*\.jpg$',
d47ae7f6
S
1420 'duration': 216,
1421 'view_count': int,
1422 },
250b042c
S
1423 'params': {
1424 'skip_download': True,
1425 },
2637fadc 1426 'skip': 'This video is unavailable.',
d47ae7f6 1427 },
f8388757
S
1428 # Pladform embed
1429 {
1430 'url': 'http://muz-tv.ru/kinozal/view/7400/',
1431 'info_dict': {
1432 'id': '100183293',
1433 'ext': 'mp4',
62259846 1434 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757 1435 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
ec85ded8 1436 'thumbnail': r're:^https?://.*\.jpg$',
f8388757
S
1437 'duration': 694,
1438 'age_limit': 0,
1439 },
2637fadc 1440 'skip': 'HTTP Error 404: Not Found',
f8388757 1441 },
c798f15b
S
1442 # Playwire embed
1443 {
1444 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1445 'info_dict': {
1446 'id': '3519514',
1447 'ext': 'mp4',
1448 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
ec85ded8 1449 'thumbnail': r're:^https?://.*\.png$',
c798f15b
S
1450 'duration': 45.115,
1451 },
1452 },
ad320e9b
NJ
1453 # 5min embed
1454 {
1455 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1456 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1457 'info_dict': {
1458 'id': '518726732',
1459 'ext': 'mp4',
1460 'title': 'Facebook Creates "On This Day" | Crunch Report',
2637fadc
RA
1461 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
1462 'timestamp': 1427237531,
1463 'uploader': 'Crunch Report',
1464 'upload_date': '20150324',
1465 },
1466 'params': {
1467 # m3u8 download
1468 'skip_download': True,
ad320e9b
NJ
1469 },
1470 },
a4257017
S
1471 # Crooks and Liars embed
1472 {
1473 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1474 'info_dict': {
1475 'id': '8RUoRhRi',
1476 'ext': 'mp4',
1477 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1478 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1479 'timestamp': 1428207000,
1480 'upload_date': '20150405',
1481 'uploader': 'Heather',
1482 },
1483 },
1484 # Crooks and Liars external embed
1485 {
1486 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1487 'info_dict': {
1488 'id': 'MTE3MjUtMzQ2MzA',
1489 'ext': 'mp4',
1490 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1491 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1492 'timestamp': 1265032391,
1493 'upload_date': '20100201',
1494 'uploader': 'Heather',
1495 },
1496 },
facecb84 1497 # NBC Sports vplayer embed
a2edf2e7 1498 {
facecb84 1499 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 1500 'info_dict': {
facecb84
S
1501 'id': 'ln7x1qSThw4k',
1502 'ext': 'flv',
1503 'title': "PFT Live: New leader in the 'new-look' defense",
1504 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
0738187f
YCH
1505 'uploader': 'NBCU-SPORTS',
1506 'upload_date': '20140107',
1507 'timestamp': 1389118457,
a2edf2e7 1508 },
2637fadc 1509 'skip': 'Invalid Page URL',
418c5cc3 1510 },
de3eb07e
YCH
1511 # NBC News embed
1512 {
1513 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1514 'md5': '1aa589c675898ae6d37a17913cf68d66',
1515 'info_dict': {
2637fadc 1516 'id': 'x_dtl_oa_LettermanliftPR_160608',
de3eb07e 1517 'ext': 'mp4',
2637fadc 1518 'title': 'David Letterman: A Preview',
de3eb07e 1519 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
2637fadc
RA
1520 'upload_date': '20160609',
1521 'timestamp': 1465431544,
1522 'uploader': 'NBCU-NEWS',
de3eb07e
YCH
1523 },
1524 },
418c5cc3
YCH
1525 # UDN embed
1526 {
811586eb 1527 'url': 'https://video.udn.com/news/300346',
01c58f84 1528 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 1529 'info_dict': {
01c58f84 1530 'id': '300346',
418c5cc3 1531 'ext': 'mp4',
01c58f84 1532 'title': '中一中男師變性 全校師生力挺',
ec85ded8 1533 'thumbnail': r're:^https?://.*\.jpg$',
811586eb
YCH
1534 },
1535 'params': {
1536 # m3u8 download
1537 'skip_download': True,
1538 },
2637fadc 1539 'expected_warnings': ['Failed to parse JSON Expecting value'],
edfcf7ab 1540 },
b26733ba
YCH
1541 # Brightcove URL in single quotes
1542 {
1543 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1544 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1545 'info_dict': {
1546 'id': '4255764656001',
1547 'ext': 'mp4',
1548 'title': 'SN Presents: Russell Martin, World Citizen',
1549 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1550 'uploader': 'Rogers Sportsnet',
0738187f
YCH
1551 'uploader_id': '1704050871',
1552 'upload_date': '20150525',
1553 'timestamp': 1432570283,
b26733ba 1554 },
756f574e 1555 },
55adb63e 1556 # Kinja embed
8084be78
S
1557 {
1558 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1559 'info_dict': {
55adb63e 1560 'id': '106351',
8084be78
S
1561 'ext': 'mp4',
1562 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
55adb63e 1563 'description': 'Migrated from OnionStudios',
ec85ded8 1564 'thumbnail': r're:^https?://.*\.jpe?g$',
55adb63e
RA
1565 'uploader': 'clickhole',
1566 'upload_date': '20150527',
1567 'timestamp': 1432744860,
8084be78
S
1568 }
1569 },
b8c1cc1a
S
1570 # SnagFilms embed
1571 {
1572 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1573 'info_dict': {
1574 'id': '74849a00-85a9-11e1-9660-123139220831',
1575 'ext': 'mp4',
1576 'title': '#whilewewatch',
1577 }
1578 },
a5158f38
YCH
1579 # AdobeTVVideo embed
1580 {
1581 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1582 'md5': '43662b577c018ad707a63766462b1e87',
1583 'info_dict': {
1584 'id': '2456',
1585 'ext': 'mp4',
1586 'title': 'New experience with Acrobat DC',
1587 'description': 'New experience with Acrobat DC',
1588 'duration': 248.667,
1589 },
1f812580 1590 },
ed126900 1591 # BrightcoveInPageEmbed embed
1592 {
1593 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1594 'info_dict': {
1595 'id': '4238694884001',
1596 'ext': 'flv',
1597 'title': 'Tabletop: Dread, Last Thoughts',
1598 'description': 'Tabletop: Dread, Last Thoughts',
1599 'duration': 51690,
1600 },
750b9ff0 1601 },
d10fe835
YCH
1602 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1603 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1604 {
1605 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1606 'info_dict': {
1607 'id': '4785848093001',
1608 'ext': 'mp4',
1609 'title': 'The Cardinal Pell Interview',
1610 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1611 'uploader': 'GlobeCast Australia - GlobeStream',
0738187f
YCH
1612 'uploader_id': '2733773828001',
1613 'upload_date': '20160304',
1614 'timestamp': 1457083087,
d10fe835
YCH
1615 },
1616 'params': {
1617 # m3u8 downloads
1618 'skip_download': True,
1619 },
1620 },
9edf47df
S
1621 {
1622 # Brightcove embed with whitespace around attribute names
1623 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1624 'info_dict': {
1625 'id': '3167554373001',
1626 'ext': 'mp4',
1627 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1628 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1629 'uploader_id': '1079349493',
1630 'upload_date': '20140207',
1631 'timestamp': 1391810548,
1632 },
1633 'params': {
1634 'skip_download': True,
1635 },
1636 },
134c207e
YCH
1637 # Another form of arte.tv embed
1638 {
1639 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1640 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1641 'info_dict': {
1642 'id': '030273-562_PLUS7-F',
1643 'ext': 'mp4',
1644 'title': 'ARTE Reportage - Nulle part, en France',
1645 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1646 'upload_date': '20160409',
1647 },
1648 },
4a120778
YCH
1649 # Duplicated embedded video URLs
1650 {
1651 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1652 'info_dict': {
1653 'id': '149298443_480_16c25b74_2',
1654 'ext': 'mp4',
1655 'title': 'vs. Blue Orange Spring Game',
1656 'uploader': 'www.hudl.com',
1657 },
1658 },
371ddb14
S
1659 # twitter:player:stream embed
1660 {
1661 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1662 'info_dict': {
1663 'id': 'master',
1664 'ext': 'mp4',
1665 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1666 'uploader': 'www.rtl.be',
1667 },
1668 'params': {
1669 # m3u8 downloads
1670 'skip_download': True,
1671 },
1672 },
32917907
RA
1673 # twitter:player embed
1674 {
1675 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1676 'md5': 'a3e0df96369831de324f0778e126653c',
1677 'info_dict': {
1678 'id': '4909620399001',
1679 'ext': 'mp4',
1680 'title': 'What Do Black Holes Sound Like?',
1681 'description': 'what do black holes sound like',
1682 'upload_date': '20160524',
1683 'uploader_id': '29913724001',
1684 'timestamp': 1464107587,
1685 'uploader': 'TheAtlantic',
1686 },
1687 'add_ie': ['BrightcoveLegacy'],
fd6ca382
YCH
1688 },
1689 # Facebook <iframe> embed
1690 {
1691 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
dbf0157a 1692 'md5': 'fbcde74f534176ecb015849146dd3aee',
fd6ca382
YCH
1693 'info_dict': {
1694 'id': '599637780109885',
1695 'ext': 'mp4',
1696 'title': 'Facebook video #599637780109885',
1697 },
1698 },
fd1c5fba
S
1699 # Facebook <iframe> embed, plugin video
1700 {
1701 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
1702 'info_dict': {
1703 'id': '1754168231264132',
1704 'ext': 'mp4',
1705 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
1706 'uploader': 'Tariq Ramadan (official)',
1707 'timestamp': 1496758379,
1708 'upload_date': '20170606',
1709 },
1710 'params': {
1711 'skip_download': True,
1712 },
1713 },
fd6ca382
YCH
1714 # Facebook API embed
1715 {
1716 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
dbf0157a 1717 'md5': 'a47372ee61b39a7b90287094d447d94e',
fd6ca382
YCH
1718 'info_dict': {
1719 'id': '10153467542406923',
1720 'ext': 'mp4',
1721 'title': 'Facebook video #10153467542406923',
1722 },
7deef1ba
YCH
1723 },
1724 # Wordpress "YouTube Video Importer" plugin
1725 {
1726 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
dbf0157a 1727 'md5': 'd16797741b560b485194eddda8121b48',
7deef1ba
YCH
1728 'info_dict': {
1729 'id': 'HNTXWDXV9Is',
1730 'ext': 'mp4',
1731 'title': 'Blue Devils Drumline Stanford lot 2016',
1732 'upload_date': '20160627',
1733 'uploader_id': 'GENOCIDE8GENERAL10',
1734 'uploader': 'cylus cyrus',
1735 },
1736 },
81953d1a
RA
1737 {
1738 # video stored on custom kaltura server
1739 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1740 'md5': '537617d06e64dfed891fa1593c4b30cc',
1741 'info_dict': {
1742 'id': '0_1iotm5bh',
1743 'ext': 'mp4',
1744 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1745 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1746 'uploader_id': 'videos.expansion@el-mundo.net',
1747 'upload_date': '20150429',
1748 'timestamp': 1430303472,
1749 },
1750 'add_ie': ['Kaltura'],
1751 },
562de77f
S
1752 {
1753 # multiple kaltura embeds, nsfw
1754 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
1755 'info_dict': {
1756 'id': 'kamila-avec-video-jaime-sadomie',
1757 'title': "Kamila avec vídeo “J'aime sadomie”",
1758 },
1759 'playlist_count': 8,
1760 },
c03adf90
YCH
1761 {
1762 # Non-standard Vimeo embed
1763 'url': 'https://openclassrooms.com/courses/understanding-the-web',
1764 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1765 'info_dict': {
1766 'id': '148867247',
1767 'ext': 'mp4',
1768 'title': 'Understanding the web - Teaser',
1769 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1770 'upload_date': '20151214',
1771 'uploader': 'OpenClassrooms',
1772 'uploader_id': 'openclassrooms',
1773 },
1774 'add_ie': ['Vimeo'],
1775 },
a5ff05df
S
1776 {
1777 # generic vimeo embed that requires original URL passed as Referer
1778 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1779 'only_matching': True,
1780 },
1979969f
S
1781 {
1782 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1783 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1784 'info_dict': {
1785 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1786 'ext': 'mp4',
1787 'title': 'Big Buck Bunny',
1788 'description': 'Royalty free test video',
1789 'timestamp': 1432816365,
1790 'upload_date': '20150528',
1791 'is_live': False,
1792 },
1793 'params': {
1794 'skip_download': True,
1795 },
1796 'add_ie': [ArkenaIE.ie_key()],
1797 },
2a1321a2
S
1798 {
1799 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1800 'info_dict': {
1801 'id': '1c7141f46c',
1802 'ext': 'mp4',
1803 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1804 },
1805 'params': {
1806 'skip_download': True,
1807 },
1808 'add_ie': [Vbox7IE.ie_key()],
1809 },
b0c8f2e9
DR
1810 {
1811 # DBTV embeds
1812 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
fd3ec986
S
1813 'info_dict': {
1814 'id': '43254897',
1815 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1816 },
b0c8f2e9
DR
1817 'playlist_mincount': 3,
1818 },
e186a9ec
S
1819 {
1820 # Videa embeds
1821 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1822 'info_dict': {
1823 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1824 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1825 },
1826 'playlist_mincount': 2,
1827 },
b687c85e
S
1828 {
1829 # 20 minuten embed
1830 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1831 'info_dict': {
1832 'id': '523629',
1833 'ext': 'mp4',
1834 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1835 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1836 },
1837 'params': {
1838 'skip_download': True,
1839 },
1840 'add_ie': [TwentyMinutenIE.ie_key()],
6ef3e65a
S
1841 },
1842 {
1843 # VideoPress embed
1844 'url': 'https://en.support.wordpress.com/videopress/',
1845 'info_dict': {
1846 'id': 'OcobLTqC',
1847 'ext': 'm4v',
1848 'title': 'IMG_5786',
1849 'timestamp': 1435711927,
1850 'upload_date': '20150701',
1851 },
1852 'params': {
1853 'skip_download': True,
1854 },
1855 'add_ie': [VideoPressIE.ie_key()],
fef51645 1856 },
eb3079b6
S
1857 {
1858 # Rutube embed
1859 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1860 'info_dict': {
1861 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1862 'ext': 'flv',
1863 'title': 'Магаззино: Казань 2',
1864 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1865 'uploader': 'Магаззино',
1866 'upload_date': '20170228',
1867 'uploader_id': '996642',
1868 },
1869 'params': {
1870 'skip_download': True,
1871 },
1872 'add_ie': [RutubeIE.ie_key()],
1873 },
fef51645
YCH
1874 {
1875 # ThePlatform embedded with whitespaces in URLs
1876 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1877 'only_matching': True,
1878 },
97952bdb
JH
1879 {
1880 # Senate ISVP iframe https
1881 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1882 'md5': 'fb8c70b0b515e5037981a2492099aab8',
1883 'info_dict': {
1884 'id': 'govtaff020316',
1885 'ext': 'mp4',
1886 'title': 'Integrated Senate Video Player',
1887 },
1888 'add_ie': [SenateISVPIE.ie_key()],
1889 },
ab87c260
S
1890 {
1891 # Limelight embeds (1 channel embed + 4 media embeds)
1892 'url': 'http://www.sedona.com/FacilitatorTraining2017',
1893 'info_dict': {
1894 'id': 'FacilitatorTraining2017',
1895 'title': 'Facilitator Training 2017',
1896 },
1897 'playlist_mincount': 5,
1898 },
eb02940c
S
1899 {
1900 # Limelight embed (LimelightPlayerUtil.embed)
1901 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
1902 'info_dict': {
1903 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
1904 'ext': 'mp4',
1905 'title': '07448641',
1906 'timestamp': 1499890639,
1907 'upload_date': '20170712',
1908 },
1909 'params': {
1910 'skip_download': True,
1911 },
1912 'add_ie': ['LimelightMedia'],
1913 },
7986c3ab
S
1914 {
1915 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1916 'info_dict': {
1917 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1918 'title': 'Standoff with Walnut Creek murder suspect ends',
1919 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1920 },
1921 'playlist_mincount': 4,
1922 },
55719459
JH
1923 {
1924 # WashingtonPost embed
1925 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1926 'info_dict': {
1927 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1928 'ext': 'mp4',
1929 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1930 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1931 'timestamp': 1455216756,
1932 'uploader': 'The Washington Post',
1933 'upload_date': '20160211',
1934 },
1935 'add_ie': [WashingtonPostIE.ie_key()],
1936 },
2b8e6a68
S
1937 {
1938 # Mediaset embed
1939 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
1940 'info_dict': {
1941 'id': '720642',
1942 'ext': 'mp4',
1943 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
1944 },
1945 'params': {
1946 'skip_download': True,
1947 },
1948 'add_ie': [MediasetIE.ie_key()],
1949 },
73cf76a9
S
1950 {
1951 # JOJ.sk embeds
1952 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1953 'info_dict': {
1954 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1955 'title': 'Slovenskom sa prehnala vlna silných búrok',
1956 },
1957 'playlist_mincount': 5,
1958 'add_ie': [JojIE.ie_key()],
1959 },
4328ddf8
S
1960 {
1961 # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
1962 'url': 'https://tvrain.ru/amp/418921/',
1963 'md5': 'cc00413936695987e8de148b67d14f1d',
1964 'info_dict': {
1965 'id': '418921',
1966 'ext': 'mp4',
1967 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1968 },
1969 },
41918eaa 1970 {
1971 # vzaar embed
1663bd6e
S
1972 'url': 'http://help.vzaar.com/article/165-embedding-video',
1973 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
41918eaa 1974 'info_dict': {
1663bd6e 1975 'id': '8707641',
41918eaa 1976 'ext': 'mp4',
1663bd6e 1977 'title': 'Building A Business Online: Principal Chairs Q & A',
41918eaa 1978 },
1979 },
9ce1ac40 1980 {
1981 # multiple HTML5 videos on one page
1982 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
1983 'info_dict': {
1984 'id': 'keyscenarios',
1985 'title': 'Rescue Kit 14 Free Edition - Getting started',
1986 },
1987 'playlist_count': 4,
0987f2dd
T
1988 },
1989 {
1990 # vshare embed
7a5c1cfe 1991 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
0987f2dd
T
1992 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
1993 'info_dict': {
1994 'id': '0f64ce6',
1995 'title': 'vl14062007715967',
1996 'ext': 'mp4',
1997 }
2ca7ed41
S
1998 },
1999 {
2000 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
2001 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
2002 'info_dict': {
2003 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
2004 'ext': 'mp4',
2005 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
2006 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
2007 'timestamp': 1474354800,
2008 'upload_date': '20160920',
2009 }
7d540621
S
2010 },
2011 {
2012 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
2013 'info_dict': {
2014 'id': '1731611',
2015 'ext': 'mp4',
2016 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
2017 'description': 'md5:eb5f23826a027ba95277d105f248b825',
2018 'timestamp': 1516100691,
2019 'upload_date': '20180116',
2020 },
2021 'params': {
2022 'skip_download': True,
2023 },
2024 'add_ie': [SpringboardPlatformIE.ie_key()],
ea696249 2025 },
4c780fbd
S
2026 {
2027 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
2028 'info_dict': {
2029 'id': 'vMDE4NzI1Mjgt690b',
2030 'ext': 'mp4',
2031 'title': 'Котята',
2032 },
2033 'add_ie': [YapFilesIE.ie_key()],
2034 'params': {
2035 'skip_download': True,
2036 },
1fc37ca3 2037 },
660a230b
S
2038 {
2039 # CloudflareStream embed
2040 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
2041 'info_dict': {
2042 'id': '31c9291ab41fac05471db4e73aa11717',
2043 'ext': 'mp4',
2044 'title': '31c9291ab41fac05471db4e73aa11717',
2045 },
2046 'add_ie': [CloudflareStreamIE.ie_key()],
2047 'params': {
2048 'skip_download': True,
2049 },
2050 },
6bd499e8
S
2051 {
2052 # PeerTube embed
2053 'url': 'https://joinpeertube.org/fr/home/',
2054 'info_dict': {
2055 'id': 'home',
2056 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
2057 },
2058 'playlist_count': 2,
2059 },
aee36ca8
S
2060 {
2061 # Indavideo embed
2062 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
2063 'info_dict': {
2064 'id': '1693903',
2065 'ext': 'mp4',
2066 'title': 'Így kell otthon hamburgert sütni',
2067 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
2068 'timestamp': 1426330212,
2069 'upload_date': '20150314',
2070 'uploader': 'StreetKitchen',
2071 'uploader_id': '546363',
2072 },
2073 'add_ie': [IndavideoEmbedIE.ie_key()],
2074 'params': {
2075 'skip_download': True,
2076 },
2077 },
cfd7f2a6
S
2078 {
2079 # APA embed via JWPlatform embed
2080 'url': 'http://www.vol.at/blue-man-group/5593454',
2081 'info_dict': {
2082 'id': 'jjv85FdZ',
2083 'ext': 'mp4',
2084 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
2085 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2086 'thumbnail': r're:^https?://.*\.jpg$',
2087 'duration': 254,
2088 'timestamp': 1519211149,
2089 'upload_date': '20180221',
2090 },
2091 'params': {
2092 'skip_download': True,
2093 },
2094 },
1fc37ca3
SO
2095 {
2096 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
2097 'md5': 'b68d276de422ab07ee1d49388103f457',
2098 'info_dict': {
2099 'id': '83645793',
2100 'title': 'Lock up and get excited',
1fc37ca3 2101 'ext': 'mp4'
d3431dcb
S
2102 },
2103 'skip': 'TODO: fix nested playlists processing in tests',
2104 },
9d1b2138
S
2105 {
2106 # Viqeo embeds
2107 'url': 'https://viqeo.tv/',
2108 'info_dict': {
2109 'id': 'viqeo',
2110 'title': 'All-new video platform',
2111 },
2112 'playlist_count': 6,
2113 },
d78657fd
BM
2114 {
2115 # Squarespace video embed, 2019-08-28
2116 'url': 'http://ootboxford.com',
2117 'info_dict': {
2118 'id': 'Tc7b_JGdZfw',
2119 'title': 'Out of the Blue, at Childish Things 10',
7cb51b5d
S
2120 'ext': 'mp4',
2121 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
2122 'uploader_id': 'helendouglashouse',
2123 'uploader': 'Helen & Douglas House',
2124 'upload_date': '20140328',
d78657fd
BM
2125 },
2126 'params': {
2127 'skip_download': True,
2128 },
2129 },
29f7c58a 2130 # {
2131 # # Zype embed
2132 # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
2133 # 'info_dict': {
2134 # 'id': '5b400b834b32992a310622b9',
2135 # 'ext': 'mp4',
2136 # 'title': 'Smoky Barbecue Favorites',
2137 # 'thumbnail': r're:^https?://.*\.jpe?g',
2138 # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
2139 # 'upload_date': '20170909',
2140 # 'timestamp': 1504915200,
2141 # },
2142 # 'add_ie': [ZypeIE.ie_key()],
2143 # 'params': {
2144 # 'skip_download': True,
2145 # },
2146 # },
e0b6e988
S
2147 {
2148 # videojs embed
2149 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
2150 'info_dict': {
2151 'id': 'shell',
2152 'ext': 'mp4',
2153 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
2154 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
2155 'thumbnail': r're:^https?://.*\.jpg$',
2156 },
2157 'params': {
2158 'skip_download': True,
2159 },
2160 'expected_warnings': ['Failed to download MPD manifest'],
2161 },
5e3da0d4
RA
2162 {
2163 # DailyMotion embed with DM.player
2164 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
2165 'info_dict': {
2166 'id': 'k6aKkGHd9FJs4mtJN39',
2167 'ext': 'mp4',
2168 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
2169 'description': 'This video is private.',
2170 'uploader_id': 'x1jf30l',
2171 'uploader': 'beIN SPORTS USA',
2172 'upload_date': '20190528',
2173 'timestamp': 1559062971,
2174 },
2175 'params': {
2176 'skip_download': True,
2177 },
2178 },
764f5de2
PW
2179 {
2180 # blogger embed
2181 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
2182 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
2183 'info_dict': {
2184 'id': 'BLOGGER-video-3c740e3a49197e16-796',
2185 'ext': 'mp4',
2186 'title': 'Blogger',
2187 'thumbnail': r're:^https?://.*',
2188 },
2189 },
6e6b70d6
S
2190 # {
2191 # # TODO: find another test
2192 # # http://schema.org/VideoObject
2193 # 'url': 'https://flipagram.com/f/nyvTSJMKId',
2194 # 'md5': '888dcf08b7ea671381f00fab74692755',
2195 # 'info_dict': {
2196 # 'id': 'nyvTSJMKId',
2197 # 'ext': 'mp4',
2198 # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
2199 # 'description': '#love for cats.',
2200 # 'timestamp': 1461244995,
2201 # 'upload_date': '20160421',
2202 # },
2203 # 'params': {
2204 # 'force_generic_extractor': True,
2205 # },
29f7c58a 2206 # },
2207 {
2208 # VHX Embed
2209 'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
2210 'info_dict': {
2211 'id': '858208',
2212 'ext': 'mp4',
2213 'title': 'Untitled',
2214 'uploader_id': 'user80538407',
2215 'uploader': 'OTT Videos',
2216 },
2217 },
2218 {
2219 # ArcPublishing PoWa video player
2220 'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
2221 'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
2222 'info_dict': {
2223 'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
2224 'ext': 'mp4',
2225 'title': 'Senate candidates wave to voters on Anchorage streets',
2226 'description': 'md5:91f51a6511f090617353dc720318b20e',
2227 'timestamp': 1604378735,
2228 'upload_date': '20201103',
2229 'duration': 1581,
2230 },
2231 },
2181983a 2232 {
2233 # MyChannels SDK embed
2234 # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
2235 'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
2236 'md5': '90c0699c37006ef18e198c032d81739c',
2237 'info_dict': {
2238 'id': '194165',
2239 'ext': 'mp4',
2240 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
2241 'timestamp': 1611740340,
2242 'upload_date': '20210127',
2243 'duration': 159,
2244 },
2245 },
bc2ca1bb 2246 {
2247 # Simplecast player embed
2248 'url': 'https://www.bio.org/podcast',
2249 'info_dict': {
2250 'id': 'podcast',
2251 'title': 'I AM BIO Podcast | BIO',
2252 },
2253 'playlist_mincount': 52,
2254 },
e4edeb62 2255 {
b73612a2 2256 # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
2257 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
2258 'only_matching': True,
2259 }, {
e4edeb62 2260 # WimTv embed player
2261 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
2262 'info_dict': {
2263 'id': 'wearefmi-pt-2-2021',
2264 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
2265 },
2266 'playlist_count': 1,
a318f59d 2267 }, {
2268 # KVS Player
2269 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
2270 'info_dict': {
2271 'id': '105',
2272 'display_id': 'kelis-4th-of-july',
2273 'ext': 'mp4',
2274 'title': 'Kelis - 4th Of July',
2275 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
2276 },
2277 'params': {
2278 'skip_download': True,
2279 },
2280 }, {
2281 # KVS Player
2282 'url': 'https://www.kvs-demo.com/embed/105/',
2283 'info_dict': {
2284 'id': '105',
2285 'display_id': 'kelis-4th-of-july',
2286 'ext': 'mp4',
2287 'title': 'Kelis - 4th Of July / Embed Player',
2288 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
2289 },
2290 'params': {
2291 'skip_download': True,
2292 },
2293 }, {
2294 # KVS Player
2295 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
2296 'md5': '3397979512c682f6b85b3b04989df224',
2297 'info_dict': {
2298 'id': '2400174',
2299 'display_id': 'french-boy-pantsed',
2300 'ext': 'mp4',
2301 'title': 'French Boy Pantsed - ThisVid.com',
2302 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
2303 }
2304 }, {
2305 # KVS Player
2306 'url': 'https://thisvid.com/embed/2400174/',
2307 'md5': '3397979512c682f6b85b3b04989df224',
2308 'info_dict': {
2309 'id': '2400174',
2310 'display_id': 'french-boy-pantsed',
2311 'ext': 'mp4',
2312 'title': 'French Boy Pantsed - ThisVid.com',
2313 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
2314 }
2315 }, {
2316 # KVS Player
2317 'url': 'https://youix.com/video/leningrad-zoj/',
2318 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
2319 'info_dict': {
2320 'id': '18485',
2321 'display_id': 'leningrad-zoj',
2322 'ext': 'mp4',
2323 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
2324 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
2325 }
2326 }, {
2327 # KVS Player
2328 'url': 'https://youix.com/embed/18485',
2329 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
2330 'info_dict': {
2331 'id': '18485',
2332 'display_id': 'leningrad-zoj',
2333 'ext': 'mp4',
2334 'title': 'Ленинград - ЗОЖ',
2335 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
2336 }
2337 }, {
2338 # KVS Player
2339 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
2340 'md5': '94166bdb26b4cb1fb9214319a629fc51',
2341 'info_dict': {
2342 'id': '21217',
2343 'display_id': '40-nochey-40-nights-2016',
2344 'ext': 'mp4',
2345 'title': '40 ночей (2016) - BogMedia.org',
2346 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
2347 }
e4edeb62 2348 },
9980d3d2 2349 {
2350 # KVS Player (for sites that serve kt_player.js via non-https urls)
2351 'url': 'http://www.camhub.world/embed/389508',
2352 'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
2353 'info_dict': {
2354 'id': '389508',
2355 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
2356 'ext': 'mp4',
2357 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
2358 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg',
2359 }
2360 },
e16fefd8
JL
2361 {
2362 # Reddit-hosted video that will redirect and be processed by RedditIE
2363 # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
2364 'url': 'https://v.redd.it/zv89llsvexdz',
2365 'md5': '87f5f02f6c1582654146f830f21f8662',
2366 'info_dict': {
2367 'id': 'zv89llsvexdz',
2368 'ext': 'mp4',
2369 'timestamp': 1501941939.0,
2370 'title': 'That small heart attack.',
2371 'upload_date': '20170805',
2372 'uploader': 'Antw87'
2373 }
2374 },
2375 {
2376 # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
2377 'url': 'https://v.redd.it/33hgok7dfbz71/',
2378 'md5': '7a1d587940242c9bb3bd6eb320b39258',
2379 'info_dict': {
2380 'id': '33hgok7dfbz71',
2381 'ext': 'mp4',
2382 'title': "The game Didn't want me to Knife that Guy I guess",
2383 'uploader': 'paraf1ve',
2384 'timestamp': 1636788683.0,
2385 'upload_date': '20211113'
2386 }
9f517bb1 2387 },
9c634ef8 2388 {
2389 # MainStreaming player
2390 'url': 'https://www.lactv.it/2021/10/03/lac-news24-la-settimana-03-10-2021/',
2391 'info_dict': {
2392 'id': 'EUlZfGWkGpOd',
2393 'title': 'La Settimana ',
2394 'description': '03 Ottobre ore 02:00',
2395 'ext': 'mp4',
2396 'live_status': 'not_live',
2397 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
2398 'duration': 1512
2399 }
2400 },
9f517bb1 2401 {
2402 # Multiple gfycat iframe embeds
2403 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
2404 'info_dict': {
2405 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
2406 'id': 'board'
2407 },
2408 'playlist_count': 8,
2409 },
2410 {
2411 # Multiple gfycat gifs (direct links)
2412 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
2413 'info_dict': {
2414 'title': '옳게 된 크롭 니트 스테이씨 아이사',
2415 'id': 'board'
2416 },
2417 'playlist_count': 6
2418 },
2419 {
2420 # Multiple gfycat embeds, with uppercase "IFR" in urls
2421 'url': 'https://kkzz.kr/?vid=2295',
2422 'info_dict': {
2423 'title': '지방시 앰버서더 에스파 카리나 움짤',
2424 'id': '?vid=2295'
2425 },
2426 'playlist_count': 9
e16fefd8 2427 }
cfe50f04 2428 ]
9b122384 2429
9b122384
PH
2430 def report_following_redirect(self, new_url):
2431 """Report information extraction."""
79649588 2432 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 2433
aa9369a2 2434 def report_detected(self, name):
2435 self._downloader.write_debug(f'Identified a {name}')
2436
4fc946b5
PH
2437 def _extract_rss(self, url, video_id, doc):
2438 playlist_title = doc.find('./channel/title').text
2439 playlist_desc_el = doc.find('./channel/description')
2440 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
2441
29f7c58a 2442 NS_MAP = {
2443 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
2444 }
2445
76c73715
PH
2446 entries = []
2447 for it in doc.findall('./channel/item'):
01aec848
BG
2448 next_url = None
2449 enclosure_nodes = it.findall('./enclosure')
2450 for e in enclosure_nodes:
2451 next_url = e.attrib.get('url')
2452 if next_url:
2453 break
2454
76c73715 2455 if not next_url:
01aec848 2456 next_url = xpath_text(it, 'link', fatal=False)
76c73715
PH
2457
2458 if not next_url:
2459 continue
2460
29f7c58a 2461 def itunes(key):
2462 return xpath_text(
2463 it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
2464 default=None)
2465
2466 duration = itunes('duration')
2467 explicit = (itunes('explicit') or '').lower()
2468 if explicit in ('true', 'yes'):
2469 age_limit = 18
2470 elif explicit in ('false', 'no'):
2471 age_limit = 0
2472 else:
2473 age_limit = None
2474
76c73715 2475 entries.append({
413c1f8e 2476 '_type': 'url_transparent',
76c73715
PH
2477 'url': next_url,
2478 'title': it.find('title').text,
29f7c58a 2479 'description': xpath_text(it, 'description', default=None),
2480 'timestamp': unified_timestamp(
2481 xpath_text(it, 'pubDate', default=None)),
2482 'duration': int_or_none(duration) or parse_duration(duration),
2483 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
2484 'episode': itunes('title'),
2485 'episode_number': int_or_none(itunes('episode')),
2486 'season_number': int_or_none(itunes('season')),
2487 'age_limit': age_limit,
76c73715 2488 })
4fc946b5
PH
2489
2490 return {
2491 '_type': 'playlist',
2492 'id': url,
2493 'title': playlist_title,
2494 'description': playlist_desc,
2495 'entries': entries,
2496 }
2497
c8e9a235
PH
2498 def _extract_camtasia(self, url, video_id, webpage):
2499 """ Returns None if no camtasia video can be found. """
2500
2501 camtasia_cfg = self._search_regex(
2502 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
2503 webpage, 'camtasia configuration file', default=None)
2504 if camtasia_cfg is None:
2505 return None
2506
2507 title = self._html_search_meta('DC.title', webpage, fatal=True)
2508
2509 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
2510 camtasia_cfg = self._download_xml(
2511 camtasia_url, video_id,
2512 note='Downloading camtasia configuration',
2513 errnote='Failed to download camtasia configuration')
2514 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
2515
2516 entries = []
2517 for n in fileset_node.getchildren():
2518 url_n = n.find('./uri')
2519 if url_n is None:
2520 continue
2521
2522 entries.append({
2523 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
2524 'title': '%s - %s' % (title, n.tag),
2525 'url': compat_urlparse.urljoin(url, url_n.text),
2526 'duration': float_or_none(n.find('./duration').text),
2527 })
2528
2529 return {
2530 '_type': 'playlist',
2531 'entries': entries,
2532 'title': title,
2533 }
2534
a318f59d 2535 def _kvs_getrealurl(self, video_url, license_code):
2536 if not video_url.startswith('function/0/'):
2537 return video_url # not obfuscated
2538
2539 url_path, _, url_query = video_url.partition('?')
2540 urlparts = url_path.split('/')[2:]
2541 license = self._kvs_getlicensetoken(license_code)
2542 newmagic = urlparts[5][:32]
2543
2544 for o in range(len(newmagic) - 1, -1, -1):
2545 new = ''
2546 l = (o + sum([int(n) for n in license[o:]])) % 32
2547
2548 for i in range(0, len(newmagic)):
2549 if i == o:
2550 new += newmagic[l]
2551 elif i == l:
2552 new += newmagic[o]
2553 else:
2554 new += newmagic[i]
2555 newmagic = new
2556
2557 urlparts[5] = newmagic + urlparts[5][32:]
2558 return '/'.join(urlparts) + '?' + url_query
2559
2560 def _kvs_getlicensetoken(self, license):
2561 modlicense = license.replace('$', '').replace('0', '1')
2562 center = int(len(modlicense) / 2)
2563 fronthalf = int(modlicense[:center + 1])
2564 backhalf = int(modlicense[center:])
2565
2566 modlicense = str(4 * abs(fronthalf - backhalf))
2567 retval = ''
2568 for o in range(0, center + 1):
2569 for i in range(1, 5):
2570 retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
2571 return retval
2572
9b122384 2573 def _real_extract(self, url):
ebd3c7b3 2574 if url.startswith('//'):
d226c560 2575 return self.url_result(self.http_scheme() + url)
ebd3c7b3 2576
a7130543
JMF
2577 parsed_url = compat_urlparse.urlparse(url)
2578 if not parsed_url.scheme:
a06916d9 2579 default_search = self.get_param('default_search')
04b4d394 2580 if default_search is None:
1f7ccb90 2581 default_search = 'fixup_error'
04b4d394 2582
1f7ccb90 2583 if default_search in ('auto', 'auto_warning', 'fixup_error'):
9c1da4a9 2584 if re.match(r'^[^\s/]+\.[^\s/]+/', url):
6a39ee13 2585 self.report_warning('The url doesn\'t specify the protocol, trying with http')
04b4d394 2586 return self.url_result('http://' + url)
1f7ccb90 2587 elif default_search != 'fixup_error':
9c1fc022 2588 if default_search == 'auto_warning':
0e67ab0d
PH
2589 if re.match(r'^(?:url|URL)$', url):
2590 raise ExtractorError(
7a5c1cfe 2591 'Invalid URL: %r . Call yt-dlp like this: yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
0e67ab0d
PH
2592 expected=True)
2593 else:
6a39ee13 2594 self.report_warning(
7571c02c 2595 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 2596 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
2597
2598 if default_search in ('error', 'fixup_error'):
7571c02c 2599 raise ExtractorError(
b74e86f4 2600 '%r is not a valid URL. '
7a5c1cfe 2601 'Set --default-search "ytsearch" (or run yt-dlp "ytsearch:%s" ) to search YouTube'
b74e86f4 2602 % (url, url), expected=True)
04b4d394 2603 else:
f2f2c0c2
PH
2604 if ':' not in default_search:
2605 default_search += ':'
04b4d394 2606 return self.url_result(default_search + url)
4d54ef20
PH
2607
2608 url, smuggled_data = unsmuggle_url(url)
2609 force_videoid = None
d6e6a422 2610 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
2611 if smuggled_data and 'force_videoid' in smuggled_data:
2612 force_videoid = smuggled_data['force_videoid']
2613 video_id = force_videoid
2614 else:
9dcd6fd3 2615 video_id = self._generic_id(url)
3d83a1ae 2616
79649588 2617 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 2618
ebab4520 2619 head_req = HEADRequest(url)
23be51d8 2620 head_response = self._request_webpage(
ebab4520
PH
2621 head_req, video_id,
2622 note=False, errnote='Could not send HEAD request to %s' % url,
2623 fatal=False)
42393ce2 2624
23be51d8 2625 if head_response is not False:
42393ce2 2626 # Check for redirect
7947a1f7 2627 new_url = head_response.geturl()
42393ce2
PH
2628 if url != new_url:
2629 self.report_following_redirect(new_url)
4d54ef20
PH
2630 if force_videoid:
2631 new_url = smuggle_url(
2632 new_url, {'force_videoid': force_videoid})
cecaaf3f 2633 return self.url_result(new_url)
42393ce2 2634
23be51d8
PH
2635 full_response = None
2636 if head_response is False:
5c2266df 2637 request = sanitized_Request(url)
58bde34a
S
2638 request.add_header('Accept-Encoding', '*')
2639 full_response = self._request_webpage(request, video_id)
23be51d8
PH
2640 head_response = full_response
2641
f930e0c7
S
2642 info_dict = {
2643 'id': video_id,
9dcd6fd3 2644 'title': self._generic_title(url),
29f7c58a 2645 'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
f930e0c7
S
2646 }
2647
23be51d8 2648 # Check for direct link to a video
955737b2 2649 content_type = head_response.headers.get('Content-Type', '').lower()
263eff95 2650 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
23be51d8 2651 if m:
aa9369a2 2652 self.report_detected('direct video link')
4e363703 2653 format_id = compat_str(m.group('format_id'))
c26326c1 2654 subtitles = {}
f930e0c7 2655 if format_id.endswith('mpegurl'):
c26326c1 2656 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
cf1f13b8 2657 elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
2658 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id)
f930e0c7
S
2659 elif format_id == 'f4m':
2660 formats = self._extract_f4m_formats(url, video_id)
eadc3ccd 2661 else:
2662 formats = [{
4e363703 2663 'format_id': format_id,
eadc3ccd 2664 'url': url,
2665 'vcodec': 'none' if m.group('type') == 'audio' else None
2666 }]
de6c51e8 2667 info_dict['direct'] = True
19dbaeec 2668 self._sort_formats(formats)
de6c51e8 2669 info_dict['formats'] = formats
c26326c1 2670 info_dict['subtitles'] = subtitles
f930e0c7 2671 return info_dict
42393ce2 2672
a06916d9 2673 if not self.get_param('test', False) and not is_intentional:
2674 force = self.get_param('force_generic_extractor', False)
6a39ee13 2675 self.report_warning(
2fece970 2676 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 2677
4e262a88 2678 if not full_response:
5c2266df 2679 request = sanitized_Request(url)
58bde34a
S
2680 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
2681 # making it impossible to download only chunk of the file (yet we need only 512kB to
7a5c1cfe 2682 # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
58bde34a
S
2683 # that will always result in downloading the whole file that is not desirable.
2684 # Therefore for extraction pass we have to override Accept-Encoding to any in order
2685 # to accept raw bytes and being able to download only a chunk.
2686 # It may probably better to solve this by checking Content-Type for application/octet-stream
2687 # after HEAD request finishes, but not sure if we can rely on this.
2688 request.add_header('Accept-Encoding', '*')
2689 full_response = self._request_webpage(request, video_id)
4e262a88 2690
5940862d
S
2691 first_bytes = full_response.read(512)
2692
2693 # Is it an M3U playlist?
0d769bcb 2694 if first_bytes.startswith(b'#EXTM3U'):
aa9369a2 2695 self.report_detected('M3U playlist')
da1c94ee 2696 info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
19dbaeec 2697 self._sort_formats(info_dict['formats'])
5940862d
S
2698 return info_dict
2699
4e262a88
PH
2700 # Maybe it's a direct link to a video?
2701 # Be careful not to download the whole thing!
61ca9a80 2702 if not is_html(first_bytes):
6a39ee13 2703 self.report_warning(
4e262a88 2704 'URL could be a direct video link, returning it as such.')
f930e0c7 2705 info_dict.update({
4e262a88
PH
2706 'direct': True,
2707 'url': url,
f930e0c7
S
2708 })
2709 return info_dict
4e262a88
PH
2710
2711 webpage = self._webpage_read_content(
2712 full_response, url, video_id, prefix=first_bytes)
2713
2181983a 2714 if '<title>DPG Media Privacy Gate</title>' in webpage:
2715 webpage = self._download_webpage(url, video_id)
2716
9b122384 2717 self.report_extraction(video_id)
887c6acd 2718
1b840245 2719 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
4fc946b5 2720 try:
61241abb 2721 try:
2722 doc = compat_etree_fromstring(webpage)
2723 except compat_xml_parse_error:
2724 doc = compat_etree_fromstring(webpage.encode('utf-8'))
4fc946b5 2725 if doc.tag == 'rss':
aa9369a2 2726 self.report_detected('RSS feed')
4fc946b5 2727 return self._extract_rss(url, video_id, doc)
cc99a77a 2728 elif doc.tag == 'SmoothStreamingMedia':
7a450a3b 2729 info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
aa9369a2 2730 self.report_detected('ISM manifest')
cc99a77a
S
2731 self._sort_formats(info_dict['formats'])
2732 return info_dict
e5e8d20a 2733 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
19dbaeec 2734 smil = self._parse_smil(doc, url, video_id)
aa9369a2 2735 self.report_detected('SMIL file')
19dbaeec
S
2736 self._sort_formats(smil['formats'])
2737 return smil
729accb4 2738 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
aa9369a2 2739 self.report_detected('XSPF playlist')
96b8b9ab 2740 return self.playlist_result(
47a5cb77
S
2741 self._parse_xspf(
2742 doc, video_id, xspf_url=url,
7947a1f7 2743 xspf_base_url=full_response.geturl()),
96b8b9ab 2744 video_id)
1b840245 2745 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
7de27caf 2746 info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
d3f8b76b 2747 doc,
7947a1f7 2748 mpd_base_url=full_response.geturl().rpartition('/')[0],
86f4d14f 2749 mpd_url=url)
aa9369a2 2750 self.report_detected('DASH manifest')
19dbaeec 2751 self._sort_formats(info_dict['formats'])
f930e0c7
S
2752 return info_dict
2753 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
2754 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
aa9369a2 2755 self.report_detected('F4M manifest')
19dbaeec 2756 self._sort_formats(info_dict['formats'])
f930e0c7 2757 return info_dict
f7300c5c 2758 except compat_xml_parse_error:
4fc946b5
PH
2759 pass
2760
c8e9a235
PH
2761 # Is it a Camtasia project?
2762 camtasia_res = self._extract_camtasia(url, video_id, webpage)
2763 if camtasia_res is not None:
aa9369a2 2764 self.report_detected('Camtasia video')
c8e9a235
PH
2765 return camtasia_res
2766
14390730 2767 # Sometimes embedded video player is hidden behind percent encoding
067aa17e 2768 # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
14390730 2769 # Unescaping the whole page allows to handle those cases in a generic way
29f7c58a 2770 # FIXME: unescaping the whole page may break URLs, commenting out for now.
2771 # There probably should be a second run of generic extractor on unescaped webpage.
2772 # webpage = compat_urllib_parse_unquote(webpage)
1f7659db 2773
7cb51b5d
S
2774 # Unescape squarespace embeds to be detected by generic extractor,
2775 # see https://github.com/ytdl-org/youtube-dl/issues/21294
2776 webpage = re.sub(
2777 r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
2778 lambda x: unescapeHTML(x.group(0)), webpage)
d78657fd 2779
887c6acd
PH
2780 # it's tempting to parse this further, but you would
2781 # have to take into account all the variations like
2782 # Video Title - Site Name
2783 # Site Name | Video Title
2784 # Video Title - Tagline | Site Name
2785 # and so on and so forth; it's just not practical
6f41b2bc
S
2786 video_title = self._og_search_title(
2787 webpage, default=None) or self._html_search_regex(
79649588
PH
2788 r'(?s)<title>(.*?)</title>', webpage, 'video title',
2789 default='video')
ef4fd848 2790
4d805e06
PH
2791 # Try to detect age limit automatically
2792 age_limit = self._rta_search(webpage)
2793 # And then there are the jokers who advertise that they use RTA,
2794 # but actually don't.
2795 AGE_LIMIT_MARKERS = [
197224b7 2796 r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
4d805e06
PH
2797 ]
2798 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2799 age_limit = 18
2800
ef4fd848
PH
2801 # video uploader is domain name
2802 video_uploader = self._search_regex(
79649588 2803 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 2804
6f41b2bc
S
2805 video_description = self._og_search_description(webpage, default=None)
2806 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2807
b311b0ea
S
2808 info_dict.update({
2809 'title': video_title,
2810 'description': video_description,
2811 'thumbnail': video_thumbnail,
2812 'age_limit': age_limit,
2813 })
2814
aa9369a2 2815 self._downloader.write_debug('Looking for video embeds')
2816
1f4b722b 2817 # Look for Brightcove Legacy Studio embeds
4fcaa4f4 2818 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
99877772 2819 if bc_urls:
99877772
PH
2820 entries = [{
2821 '_type': 'url',
2822 'url': smuggle_url(bc_url, {'Referer': url}),
3b7d9aa4 2823 'ie_key': 'BrightcoveLegacy'
99877772
PH
2824 } for bc_url in bc_urls]
2825
2826 return {
2827 '_type': 'playlist',
2828 'title': video_title,
2829 'id': video_id,
2830 'entries': entries,
2831 }
cfe50f04 2832
f6519f89 2833 # Look for Brightcove New Studio embeds
0254f93b 2834 bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
f6519f89 2835 if bc_urls:
5399ab3f
S
2836 return self.playlist_from_matches(
2837 bc_urls, video_id, video_title,
2838 getter=lambda x: smuggle_url(x, {'referrer': url}),
2839 ie='BrightcoveNew')
ed126900 2840
4e826cd9
S
2841 # Look for Nexx embeds
2842 nexx_urls = NexxIE._extract_urls(webpage)
2843 if nexx_urls:
2844 return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
2845
3f59b015
S
2846 # Look for Nexx iFrame embeds
2847 nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
2848 if nexx_embed_urls:
2849 return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
2850
4d8819d2
S
2851 # Look for ThePlatform embeds
2852 tp_urls = ThePlatformIE._extract_urls(webpage)
2853 if tp_urls:
46b18f23 2854 return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
4d8819d2 2855
29f7c58a 2856 arc_urls = ArcPublishingIE._extract_urls(webpage)
2857 if arc_urls:
2858 return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
2859
2181983a 2860 mychannels_urls = MedialaanIE._extract_urls(webpage)
2861 if mychannels_urls:
2862 return self.playlist_from_matches(
2863 mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
2864
59b8ab58
PH
2865 # Look for embedded rtl.nl player
2866 matches = re.findall(
2637fadc 2867 r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
59b8ab58
PH
2868 webpage)
2869 if matches:
46b18f23 2870 return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
59b8ab58 2871
09b9c45e
S
2872 vimeo_urls = VimeoIE._extract_urls(url, webpage)
2873 if vimeo_urls:
46b18f23 2874 return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
7115ca84 2875
29f7c58a 2876 vhx_url = VHXEmbedIE._extract_url(webpage)
2877 if vhx_url:
2878 return self.url_result(vhx_url, VHXEmbedIE.ie_key())
2879
df0c8151 2880 # Invidious Instances
2881 # https://github.com/yt-dlp/yt-dlp/issues/195
2882 # https://github.com/iv-org/invidious/pull/1730
2883 youtube_url = self._search_regex(
2884 r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2885 webpage, 'youtube link', default=None)
2886 if youtube_url:
2887 return self.url_result(youtube_url, YoutubeIE.ie_key())
2888
66c9fa36
S
2889 # Look for YouTube embeds
2890 youtube_urls = YoutubeIE._extract_urls(webpage)
2891 if youtube_urls:
46b18f23 2892 return self.playlist_from_matches(
66c9fa36 2893 youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
7deef1ba 2894
ad213a1d 2895 matches = DailymotionIE._extract_urls(webpage)
355e4fd0 2896 if matches:
46b18f23 2897 return self.playlist_from_matches(matches, video_id, video_title)
355e4fd0 2898
8489578d
NJ
2899 # Look for embedded Dailymotion playlist player (#3822)
2900 m = re.search(
2901 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2902 if m:
2903 playlists = re.findall(
2904 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2905 if playlists:
46b18f23
JH
2906 return self.playlist_from_matches(
2907 playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
8489578d 2908
71a1db89
S
2909 # Look for DailyMail embeds
2910 dailymail_urls = DailyMailIE._extract_urls(webpage)
2911 if dailymail_urls:
2912 return self.playlist_from_matches(
2913 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
2914
be7dacf9
S
2915 # Look for Teachable embeds, must be before Wistia
2916 teachable_url = TeachableIE._extract_url(webpage, url)
2917 if teachable_url:
2918 return self.url_result(teachable_url)
2919
ef4fd848 2920 # Look for embedded Wistia player
fda6d237
S
2921 wistia_urls = WistiaIE._extract_urls(webpage)
2922 if wistia_urls:
2923 playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
2924 for entry in playlist['entries']:
2925 entry.update({
2926 '_type': 'url_transparent',
2927 'uploader': video_uploader,
2928 })
2929 return playlist
5f6a1245 2930
bab19a8e
S
2931 # Look for SVT player
2932 svt_url = SVTIE._extract_url(webpage)
2933 if svt_url:
2934 return self.url_result(svt_url, 'SVT')
2935
c19f7764
JMF
2936 # Look for Bandcamp pages with custom domain
2937 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2938 if mobj is not None:
2939 burl = unescapeHTML(mobj.group(1))
09804265
JMF
2940 # Don't set the extractor because it can be a track url or an album
2941 return self.url_result(burl)
c19f7764 2942
f25571ff
PH
2943 # Look for embedded Vevo player
2944 mobj = re.search(
2945 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2946 if mobj is not None:
2947 return self.url_result(mobj.group('url'))
796df3c6
S
2948
2949 # Look for embedded Viddler player
cb454b33
S
2950 mobj = re.search(
2951 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2952 webpage)
796df3c6
S
2953 if mobj is not None:
2954 return self.url_result(mobj.group('url'))
f25571ff 2955
3378d67a
S
2956 # Look for NYTimes player
2957 mobj = re.search(
2958 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2959 webpage)
2960 if mobj is not None:
2961 return self.url_result(mobj.group('url'))
2962
cefdf970
S
2963 # Look for Libsyn player
2964 mobj = re.search(
2965 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2966 if mobj is not None:
2967 return self.url_result(mobj.group('url'))
2968
c0d0b01f 2969 # Look for Ooyala videos
3089bc74
S
2970 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
2971 or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
2972 or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
2973 or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
2974 or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 2975 if mobj is not None:
9837cb75
RA
2976 embed_token = self._search_regex(
2977 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2978 webpage, 'ooyala embed token', default=None)
2979 return OoyalaIE._build_url_result(smuggle_url(
2980 mobj.group('ec'), {
2981 'domain': url,
2982 'embed_token': embed_token,
2983 }))
c0d0b01f 2984
f076b638 2985 # Look for multiple Ooyala embeds on SBN network websites
2986 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2987 if mobj is not None:
2988 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2989 if embeds:
46b18f23
JH
2990 return self.playlist_from_matches(
2991 embeds, video_id, video_title,
2992 getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
f076b638 2993
aa94a6d3 2994 # Look for Aparat videos
48099643 2995 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
2996 if mobj is not None:
2997 return self.url_result(mobj.group(1), 'Aparat')
2998
c93c2ab1 2999 # Look for MPORA videos
c3f51436 3000 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
3001 if mobj is not None:
3002 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 3003
9834872b 3004 # Look for embedded Facebook player
0646e34c
S
3005 facebook_urls = FacebookIE._extract_urls(webpage)
3006 if facebook_urls:
3007 return self.playlist_from_matches(facebook_urls, video_id, video_title)
9834872b 3008
ca97a56e
S
3009 # Look for embedded VK player
3010 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
3011 if mobj is not None:
3012 return self.url_result(mobj.group('url'), 'VK')
3013
33d4fdab 3014 # Look for embedded Odnoklassniki player
416c3ca7
RA
3015 odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
3016 if odnoklassniki_url:
3017 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
33d4fdab 3018
b73612a2 3019 # Look for sibnet embedded player
3020 sibnet_urls = VKIE._extract_sibnet_urls(webpage)
3021 if sibnet_urls:
3022 return self.playlist_from_matches(sibnet_urls, video_id, video_title)
3023
0364fa8b
S
3024 # Look for embedded ivi player
3025 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
3026 if mobj is not None:
3027 return self.url_result(mobj.group('url'), 'Ivi')
3028
db1f3888
PH
3029 # Look for embedded Huffington Post player
3030 mobj = re.search(
c3f51436 3031 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
3032 if mobj is not None:
3033 return self.url_result(mobj.group('url'), 'HuffPost')
3034
1b86cc41 3035 # Look for embed.ly
3036 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
3037 if mobj is not None:
3038 return self.url_result(mobj.group('url'))
3039 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
3040 if mobj is not None:
f7e6f7fa 3041 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 3042
60cc4dc4
PH
3043 # Look for funnyordie embed
3044 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
3045 if matches:
46b18f23
JH
3046 return self.playlist_from_matches(
3047 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 3048
bc2ca1bb 3049 # Look for Simplecast embeds
3050 simplecast_urls = SimplecastIE._extract_urls(webpage)
3051 if simplecast_urls:
3052 return self.playlist_from_matches(
3053 simplecast_urls, video_id, video_title)
3054
db546cf8
S
3055 # Look for BBC iPlayer embed
3056 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
3057 if matches:
46b18f23 3058 return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
db546cf8 3059
93d020dd
S
3060 # Look for embedded RUTV player
3061 rutv_url = RUTVIE._extract_url(webpage)
3062 if rutv_url:
3063 return self.url_result(rutv_url, 'RUTV')
3064
494f20cb 3065 # Look for embedded TVC player
b8599718
S
3066 tvc_url = TVCIE._extract_url(webpage)
3067 if tvc_url:
3068 return self.url_result(tvc_url, 'TVC')
494f20cb 3069
d40a3b5b 3070 # Look for embedded SportBox player
476cf548 3071 sportbox_urls = SportBoxIE._extract_urls(webpage)
d40a3b5b 3072 if sportbox_urls:
476cf548 3073 return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
d40a3b5b 3074
2bb5b6d0
S
3075 # Look for embedded XHamster player
3076 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
3077 if xhamster_urls:
46b18f23 3078 return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2bb5b6d0 3079
2c9ca782
S
3080 # Look for embedded TNAFlixNetwork player
3081 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
3082 if tnaflix_urls:
46b18f23 3083 return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2c9ca782 3084
b52c9ef1
S
3085 # Look for embedded PornHub player
3086 pornhub_urls = PornHubIE._extract_urls(webpage)
3087 if pornhub_urls:
46b18f23 3088 return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
b52c9ef1 3089
37e7a71c
S
3090 # Look for embedded DrTuber player
3091 drtuber_urls = DrTuberIE._extract_urls(webpage)
3092 if drtuber_urls:
46b18f23 3093 return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
37e7a71c 3094
e28ed498
S
3095 # Look for embedded RedTube player
3096 redtube_urls = RedTubeIE._extract_urls(webpage)
3097 if redtube_urls:
46b18f23 3098 return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
e28ed498 3099
06993715
S
3100 # Look for embedded Tube8 player
3101 tube8_urls = Tube8IE._extract_urls(webpage)
3102 if tube8_urls:
3103 return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
3104
4e7b5bba
S
3105 # Look for embedded Mofosex player
3106 mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
3107 if mofosex_urls:
3108 return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
3109
8fae1a04
S
3110 # Look for embedded Spankwire player
3111 spankwire_urls = SpankwireIE._extract_urls(webpage)
3112 if spankwire_urls:
3113 return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
3114
52c4c515
S
3115 # Look for embedded YouPorn player
3116 youporn_urls = YouPornIE._extract_urls(webpage)
3117 if youporn_urls:
3118 return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
3119
9872d311
S
3120 # Look for embedded Tvigle player
3121 mobj = re.search(
3122 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
3123 if mobj is not None:
3124 return self.url_result(mobj.group('url'), 'Tvigle')
3125
7e2ede98
JMF
3126 # Look for embedded TED player
3127 mobj = re.search(
d7cc31b6 3128 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
3129 if mobj is not None:
3130 return self.url_result(mobj.group('url'), 'TED')
3131
5c386252 3132 # Look for embedded Ustream videos
d77ac737
YCH
3133 ustream_url = UstreamIE._extract_url(webpage)
3134 if ustream_url:
3135 return self.url_result(ustream_url, UstreamIE.ie_key())
5c386252 3136
893f8832 3137 # Look for embedded arte.tv player
8bdd16b4 3138 arte_urls = ArteTVEmbedIE._extract_urls(webpage)
3139 if arte_urls:
3140 return self.playlist_from_matches(arte_urls, video_id, video_title)
893f8832 3141
cbd55ade
S
3142 # Look for embedded francetv player
3143 mobj = re.search(
3144 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
3145 webpage)
3146 if mobj is not None:
3147 return self.url_result(mobj.group('url'))
3148
e6c2d9ad 3149 # Look for embedded Myvi.ru player
6dd94d3a 3150 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
3151 if myvi_url:
3152 return self.url_result(myvi_url)
3153
dfb1b146 3154 # Look for embedded soundcloud player
548c3957 3155 soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
94aae015 3156 if soundcloud_urls:
548c3957 3157 return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
20991253 3158
027e2312
S
3159 # Look for tunein player
3160 tunein_urls = TuneInBaseIE._extract_urls(webpage)
3161 if tunein_urls:
46b18f23 3162 return self.playlist_from_matches(tunein_urls, video_id, video_title)
027e2312 3163
c5cd249e 3164 # Look for embedded mtvservices player
46fde8a1
S
3165 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
3166 if mtvservices_url:
3167 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 3168
49807b4a
S
3169 # Look for embedded yahoo player
3170 mobj = re.search(
3171 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
3172 webpage)
3173 if mobj is not None:
3174 return self.url_result(mobj.group('url'), 'Yahoo')
3175
2ef6fcb5
PH
3176 # Look for embedded sbs.com.au player
3177 mobj = re.search(
e98b8e79
PH
3178 r'''(?x)
3179 (?:
3180 <meta\s+property="og:video"\s+content=|
3181 <iframe[^>]+?src=
3182 )
3183 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
3184 webpage)
3185 if mobj is not None:
3186 return self.url_result(mobj.group('url'), 'SBS')
3187
42bdd9d0
PH
3188 # Look for embedded Cinchcast player
3189 mobj = re.search(
3190 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
3191 webpage)
3192 if mobj is not None:
3193 return self.url_result(mobj.group('url'), 'Cinchcast')
3194
1a94ff68 3195 mobj = re.search(
5263cdfc 3196 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 3197 webpage)
8001607e
YCH
3198 if not mobj:
3199 mobj = re.search(
1418a043 3200 r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
8001607e 3201 webpage)
1a94ff68
S
3202 if mobj is not None:
3203 return self.url_result(mobj.group('url'), 'MLB')
3204
1419fafd 3205 mobj = re.search(
dd467d33 3206 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
3207 webpage)
3208 if mobj is not None:
3209 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
3210
af63fed7 3211 mobj = re.search(
78d3b3e2 3212 r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
af63fed7
PH
3213 webpage)
3214 if mobj is not None:
3215 return self.url_result(mobj.group('url'), 'Livestream')
3216
255fca5e
S
3217 # Look for Zapiks embed
3218 mobj = re.search(
3219 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
3220 if mobj is not None:
3221 return self.url_result(mobj.group('url'), 'Zapiks')
3222
e3216b82 3223 # Look for Kaltura embeds
562de77f
S
3224 kaltura_urls = KalturaIE._extract_urls(webpage)
3225 if kaltura_urls:
3226 return self.playlist_from_matches(
3227 kaltura_urls, video_id, video_title,
3228 getter=lambda x: smuggle_url(x, {'source_url': url}),
3229 ie=KalturaIE.ie_key())
e3216b82 3230
665e9452 3231 # Look for EaglePlatform embeds
06a96da1
S
3232 eagleplatform_url = EaglePlatformIE._extract_url(webpage)
3233 if eagleplatform_url:
665e9452 3234 return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
135c9c42 3235
665e9452 3236 # Look for ClipYou (uses EaglePlatform) embeds
d47ae7f6
S
3237 mobj = re.search(
3238 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
3239 if mobj is not None:
3240 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
3241
f8388757 3242 # Look for Pladform embeds
45dad7ba
S
3243 pladform_url = PladformIE._extract_url(webpage)
3244 if pladform_url:
3245 return self.url_result(pladform_url)
f8388757 3246
ff18735c
S
3247 # Look for Videomore embeds
3248 videomore_url = VideomoreIE._extract_url(webpage)
3249 if videomore_url:
3250 return self.url_result(videomore_url)
3251
83f1481b
S
3252 # Look for Webcaster embeds
3253 webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
3254 if webcaster_url:
3255 return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
3256
2dcc114f
S
3257 # Look for Playwire embeds
3258 mobj = re.search(
3259 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
3260 if mobj is not None:
3261 return self.url_result(mobj.group('url'))
3262
ad320e9b
NJ
3263 # Look for 5min embeds
3264 mobj = re.search(
3265 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
3266 if mobj is not None:
3267 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
3268
18153f1b
S
3269 # Look for Crooks and Liars embeds
3270 mobj = re.search(
3271 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
3272 if mobj is not None:
3273 return self.url_result(mobj.group('url'))
3274
a2edf2e7
YCH
3275 # Look for NBC Sports VPlayer embeds
3276 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
3277 if nbc_sports_url:
3278 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
3279
de3eb07e
YCH
3280 # Look for NBC News embeds
3281 nbc_news_embed_url = re.search(
3282 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
3283 if nbc_news_embed_url:
3284 return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
3285
653789af 3286 # Look for Google Drive embeds
5b251628 3287 google_drive_url = GoogleDriveIE._extract_url(webpage)
653789af 3288 if google_drive_url:
3289 return self.url_result(google_drive_url, 'GoogleDrive')
3290
418c5cc3
YCH
3291 # Look for UDN embeds
3292 mobj = re.search(
2637fadc 3293 r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
418c5cc3
YCH
3294 if mobj is not None:
3295 return self.url_result(
0a160363 3296 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 3297
2fe1b5bd
YCH
3298 # Look for Senate ISVP iframe
3299 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
3300 if senate_isvp_url:
25c3a734 3301 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 3302
55adb63e
RA
3303 # Look for Kinja embeds
3304 kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
3305 if kinja_embed_urls:
3306 return self.playlist_from_matches(
3307 kinja_embed_urls, video_id, video_title)
3308
1ac1c4c2
S
3309 # Look for OnionStudios embeds
3310 onionstudios_url = OnionStudiosIE._extract_url(webpage)
3311 if onionstudios_url:
3312 return self.url_result(onionstudios_url)
3313
764f5de2
PW
3314 # Look for Blogger embeds
3315 blogger_urls = BloggerIE._extract_urls(webpage)
3316 if blogger_urls:
3317 return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key())
3318
67167920 3319 # Look for ViewLift embeds
3320 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
3321 if viewlift_url:
3322 return self.url_result(viewlift_url)
eedd20ef 3323
7cb09524 3324 # Look for JWPlatform embeds
b0ead0e0
S
3325 jwplatform_urls = JWPlatformIE._extract_urls(webpage)
3326 if jwplatform_urls:
3327 return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
7cb09524 3328
aecfcd4e
S
3329 # Look for Digiteka embeds
3330 digiteka_url = DigitekaIE._extract_url(webpage)
3331 if digiteka_url:
3332 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
6aeba407 3333
1979969f
S
3334 # Look for Arkena embeds
3335 arkena_url = ArkenaIE._extract_url(webpage)
3336 if arkena_url:
3337 return self.url_result(arkena_url, ArkenaIE.ie_key())
3338
b1c35797
RA
3339 # Look for Piksel embeds
3340 piksel_url = PikselIE._extract_url(webpage)
3341 if piksel_url:
3342 return self.url_result(piksel_url, PikselIE.ie_key())
3343
1bf996fa 3344 # Look for Limelight embeds
e5d39886
S
3345 limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
3346 if limelight_urls:
3347 return self.playlist_result(
3348 limelight_urls, video_id, video_title, video_description)
3349
7986c3ab
S
3350 # Look for Anvato embeds
3351 anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
3352 if anvato_urls:
3353 return self.playlist_result(
3354 anvato_urls, video_id, video_title, video_description)
3355
a5158f38
YCH
3356 # Look for AdobeTVVideo embeds
3357 mobj = re.search(
3358 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
3359 webpage)
3360 if mobj is not None:
3361 return self.url_result(
3362 self._proto_relative_url(unescapeHTML(mobj.group(1))),
3363 'AdobeTVVideo')
3364
088e1aac
YCH
3365 # Look for Vine embeds
3366 mobj = re.search(
3367 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
3368 webpage)
3369 if mobj is not None:
3370 return self.url_result(
3371 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
3372
217d5ae0
RA
3373 # Look for VODPlatform embeds
3374 mobj = re.search(
bd2c211f 3375 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
217d5ae0
RA
3376 webpage)
3377 if mobj is not None:
3378 return self.url_result(
93b84045 3379 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
217d5ae0 3380
7d273a38
RA
3381 # Look for Mangomolo embeds
3382 mobj = re.search(
755541a4
RA
3383 r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
3384 (?:
3385 admin\.mangomolo\.com/analytics/index\.php/customers/embed|
3386 player\.mangomolo\.com/v1
3387 )/
7d273a38
RA
3388 (?:
3389 video\?.*?\bid=(?P<video_id>\d+)|
755541a4 3390 (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
7d273a38
RA
3391 ).+?)\1''', webpage)
3392 if mobj is not None:
3393 info = {
3394 '_type': 'url_transparent',
3395 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
3396 'title': video_title,
3397 'description': video_description,
3398 'thumbnail': video_thumbnail,
3399 'uploader': video_uploader,
3400 }
3401 video_id = mobj.group('video_id')
3402 if video_id:
3403 info.update({
3404 'ie_key': 'MangomoloVideo',
3405 'id': video_id,
3406 })
3407 else:
3408 info.update({
3409 'ie_key': 'MangomoloLive',
3410 'id': mobj.group('channel_id'),
3411 })
3412 return info
3413
5a51775a
YCH
3414 # Look for Instagram embeds
3415 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
3416 if instagram_embed_url is not None:
11e60fca
S
3417 return self.url_result(
3418 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
5a51775a 3419
5d39176f
S
3420 # Look for 3Q SDN embeds
3421 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
3422 if threeqsdn_url:
6f41b2bc
S
3423 return {
3424 '_type': 'url_transparent',
3425 'ie_key': ThreeQSDNIE.ie_key(),
3426 'url': self._proto_relative_url(threeqsdn_url),
3427 'title': video_title,
3428 'description': video_description,
3429 'thumbnail': video_thumbnail,
3430 'uploader': video_uploader,
3431 }
5d39176f 3432
2a1321a2
S
3433 # Look for VBOX7 embeds
3434 vbox7_url = Vbox7IE._extract_url(webpage)
3435 if vbox7_url:
3436 return self.url_result(vbox7_url, Vbox7IE.ie_key())
3437
b0c8f2e9
DR
3438 # Look for DBTV embeds
3439 dbtv_urls = DBTVIE._extract_urls(webpage)
3440 if dbtv_urls:
46b18f23 3441 return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
b0c8f2e9 3442
e186a9ec
S
3443 # Look for Videa embeds
3444 videa_urls = VideaIE._extract_urls(webpage)
3445 if videa_urls:
46b18f23 3446 return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
e186a9ec 3447
b687c85e
S
3448 # Look for 20 minuten embeds
3449 twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
3450 if twentymin_urls:
46b18f23
JH
3451 return self.playlist_from_matches(
3452 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
b687c85e 3453
6ef3e65a
S
3454 # Look for VideoPress embeds
3455 videopress_urls = VideoPressIE._extract_urls(webpage)
3456 if videopress_urls:
46b18f23
JH
3457 return self.playlist_from_matches(
3458 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
6ef3e65a 3459
eb3079b6
S
3460 # Look for Rutube embeds
3461 rutube_urls = RutubeIE._extract_urls(webpage)
3462 if rutube_urls:
46b18f23 3463 return self.playlist_from_matches(
2583c0b5 3464 rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
6ef3e65a 3465
55719459
JH
3466 # Look for WashingtonPost embeds
3467 wapo_urls = WashingtonPostIE._extract_urls(webpage)
3468 if wapo_urls:
3469 return self.playlist_from_matches(
3470 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
3471
5d29af3d 3472 # Look for Mediaset embeds
8fd12a08 3473 mediaset_urls = MediasetIE._extract_urls(self, webpage)
5d29af3d
S
3474 if mediaset_urls:
3475 return self.playlist_from_matches(
3476 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
3477
73cf76a9
S
3478 # Look for JOJ.sk embeds
3479 joj_urls = JojIE._extract_urls(webpage)
3480 if joj_urls:
3481 return self.playlist_from_matches(
3482 joj_urls, video_id, video_title, ie=JojIE.ie_key())
3483
24e966e8
PH
3484 # Look for megaphone.fm embeds
3485 mpfn_urls = MegaphoneIE._extract_urls(webpage)
3486 if mpfn_urls:
3487 return self.playlist_from_matches(
3488 mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
3489
1663bd6e
S
3490 # Look for vzaar embeds
3491 vzaar_urls = VzaarIE._extract_urls(webpage)
3492 if vzaar_urls:
3493 return self.playlist_from_matches(
3494 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
3495
26bae2d9
S
3496 channel9_urls = Channel9IE._extract_urls(webpage)
3497 if channel9_urls:
3498 return self.playlist_from_matches(
3499 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
3500
0987f2dd
T
3501 vshare_urls = VShareIE._extract_urls(webpage)
3502 if vshare_urls:
3503 return self.playlist_from_matches(
3504 vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
3505
8056c854 3506 # Look for Mediasite embeds
2ca7ed41
S
3507 mediasite_urls = MediasiteIE._extract_urls(webpage)
3508 if mediasite_urls:
3509 entries = [
3510 self.url_result(smuggle_url(
3511 compat_urlparse.urljoin(url, mediasite_url),
3512 {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
3513 for mediasite_url in mediasite_urls]
3514 return self.playlist_result(entries, video_id, video_title)
8056c854 3515
7d540621
S
3516 springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
3517 if springboardplatform_urls:
3518 return self.playlist_from_matches(
3519 springboardplatform_urls, video_id, video_title,
3520 ie=SpringboardPlatformIE.ie_key())
3521
4c780fbd
S
3522 yapfiles_urls = YapFilesIE._extract_urls(webpage)
3523 if yapfiles_urls:
3524 return self.playlist_from_matches(
3525 yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
3526
86c8cfc5
S
3527 vice_urls = ViceIE._extract_urls(webpage)
3528 if vice_urls:
3529 return self.playlist_from_matches(
3530 vice_urls, video_id, video_title, ie=ViceIE.ie_key())
3531
178ee883
S
3532 xfileshare_urls = XFileShareIE._extract_urls(webpage)
3533 if xfileshare_urls:
3534 return self.playlist_from_matches(
3535 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
3536
660a230b
S
3537 cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
3538 if cloudflarestream_urls:
3539 return self.playlist_from_matches(
3540 cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
3541
8b4b400a 3542 peertube_urls = PeerTubeIE._extract_urls(webpage, url)
6bd499e8
S
3543 if peertube_urls:
3544 return self.playlist_from_matches(
3545 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
3546
aee36ca8
S
3547 indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
3548 if indavideo_urls:
3549 return self.playlist_from_matches(
3550 indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
3551
cfd7f2a6
S
3552 apa_urls = APAIE._extract_urls(webpage)
3553 if apa_urls:
3554 return self.playlist_from_matches(
3555 apa_urls, video_id, video_title, ie=APAIE.ie_key())
3556
f51f526b
S
3557 foxnews_urls = FoxNewsIE._extract_urls(webpage)
3558 if foxnews_urls:
3559 return self.playlist_from_matches(
3560 foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
3561
2e4350ee 3562 sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
d3431dcb
S
3563 r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
3564 webpage)]
3565 if sharevideos_urls:
3566 return self.playlist_from_matches(
3567 sharevideos_urls, video_id, video_title)
3568
9d1b2138
S
3569 viqeo_urls = ViqeoIE._extract_urls(webpage)
3570 if viqeo_urls:
3571 return self.playlist_from_matches(
3572 viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
57c68ec4
S
3573
3574 expressen_urls = ExpressenIE._extract_urls(webpage)
3575 if expressen_urls:
3576 return self.playlist_from_matches(
3577 expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
9d1b2138 3578
83852e57
S
3579 zype_urls = ZypeIE._extract_urls(webpage)
3580 if zype_urls:
3581 return self.playlist_from_matches(
3582 zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
3583
feee67ae 3584 gedi_urls = GediDigitalIE._extract_urls(webpage)
902784a2 3585 if gedi_urls:
3586 return self.playlist_from_matches(
feee67ae 3587 gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
902784a2 3588
feee67ae 3589 # Look for RCS media group embeds
a85e131b 3590 rcs_urls = RCSEmbedsIE._extract_urls(webpage)
3591 if rcs_urls:
3592 return self.playlist_from_matches(
3593 rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
3594
e4edeb62 3595 wimtv_urls = WimTVIE._extract_urls(webpage)
3596 if wimtv_urls:
3597 return self.playlist_from_matches(
3598 wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
3599
097f1663 3600 bitchute_urls = BitChuteIE._extract_urls(webpage)
3601 if bitchute_urls:
3602 return self.playlist_from_matches(
3603 bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
3604
62852977 3605 rumble_urls = RumbleEmbedIE._extract_urls(webpage)
3606 if len(rumble_urls) == 1:
3607 return self.url_result(rumble_urls[0], RumbleEmbedIE.ie_key())
3608 if rumble_urls:
3609 return self.playlist_from_matches(
3610 rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
3611
56bb56f3
LL
3612 tvp_urls = TVPEmbedIE._extract_urls(webpage)
3613 if tvp_urls:
3614 return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
3615
9c634ef8 3616 # Look for MainStreaming embeds
3617 mainstreaming_urls = MainStreamingIE._extract_urls(webpage)
3618 if mainstreaming_urls:
3619 return self.playlist_from_matches(mainstreaming_urls, video_id, video_title, ie=MainStreamingIE.ie_key())
3620
9f517bb1 3621 # Look for Gfycat Embeds
3622 gfycat_urls = GfycatIE._extract_urls(webpage)
3623 if gfycat_urls:
3624 return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
9c634ef8 3625
bd264412
YCH
3626 # Look for HTML5 media
3627 entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
3628 if entries:
aa9369a2 3629 self.report_detected('HTML5 media')
9ce1ac40 3630 if len(entries) == 1:
3631 entries[0].update({
bd264412
YCH
3632 'id': video_id,
3633 'title': video_title,
3634 })
9ce1ac40 3635 else:
3636 for num, entry in enumerate(entries, start=1):
3637 entry.update({
3638 'id': '%s-%s' % (video_id, num),
3639 'title': '%s (%d)' % (video_title, num),
3640 })
3641 for entry in entries:
bd264412 3642 self._sort_formats(entry['formats'])
9ce1ac40 3643 return self.playlist_result(entries, video_id, video_title)
bd264412 3644
c73e330e
RU
3645 jwplayer_data = self._find_jwplayer_data(
3646 webpage, video_id, transform_source=js_to_json)
3647 if jwplayer_data:
5e7bbac3 3648 if isinstance(jwplayer_data.get('playlist'), str):
aa9369a2 3649 self.report_detected('JW Player playlist')
5e7bbac3 3650 return {
3651 **info_dict,
3652 '_type': 'url',
3653 'ie_key': JWPlatformIE.ie_key(),
3654 'url': jwplayer_data['playlist'],
3655 }
3d08f63d
MYM
3656 try:
3657 info = self._parse_jwplayer_data(
3658 jwplayer_data, video_id, require_title=False, base_url=url)
aa9369a2 3659 self.report_detected('JW Player data')
3d08f63d
MYM
3660 return merge_dicts(info, info_dict)
3661 except ExtractorError:
067aa17e 3662 # See https://github.com/ytdl-org/youtube-dl/pull/16735
3d08f63d 3663 pass
a4a554a7 3664
63d990d2
S
3665 # Video.js embed
3666 mobj = re.search(
c5b7014a 3667 r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
63d990d2
S
3668 webpage)
3669 if mobj is not None:
3670 sources = self._parse_json(
3671 mobj.group(1), video_id, transform_source=js_to_json,
3672 fatal=False) or []
c5b7014a
S
3673 if not isinstance(sources, list):
3674 sources = [sources]
63d990d2 3675 formats = []
da1c94ee 3676 subtitles = {}
63d990d2 3677 for source in sources:
e0b6e988
S
3678 src = source.get('src')
3679 if not src or not isinstance(src, compat_str):
63d990d2
S
3680 continue
3681 src = compat_urlparse.urljoin(url, src)
3682 src_type = source.get('type')
3683 if isinstance(src_type, compat_str):
3684 src_type = src_type.lower()
3685 ext = determine_ext(src).lower()
3686 if src_type == 'video/youtube':
3687 return self.url_result(src, YoutubeIE.ie_key())
3688 if src_type == 'application/dash+xml' or ext == 'mpd':
da1c94ee
F
3689 fmts, subs = self._extract_mpd_formats_and_subtitles(
3690 src, video_id, mpd_id='dash', fatal=False)
3691 formats.extend(fmts)
3692 self._merge_subtitles(subs, target=subtitles)
63d990d2 3693 elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
da1c94ee 3694 fmts, subs = self._extract_m3u8_formats_and_subtitles(
63d990d2 3695 src, video_id, 'mp4', entry_protocol='m3u8_native',
da1c94ee
F
3696 m3u8_id='hls', fatal=False)
3697 formats.extend(fmts)
3698 self._merge_subtitles(subs, target=subtitles)
63d990d2
S
3699 else:
3700 formats.append({
3701 'url': src,
3089bc74
S
3702 'ext': (mimetype2ext(src_type)
3703 or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
b73612a2 3704 'http_headers': {
3705 'Referer': full_response.geturl(),
3706 },
63d990d2 3707 })
da1c94ee 3708 if formats or subtitles:
aa9369a2 3709 self.report_detected('video.js embed')
63d990d2
S
3710 self._sort_formats(formats)
3711 info_dict['formats'] = formats
da1c94ee 3712 info_dict['subtitles'] = subtitles
63d990d2
S
3713 return info_dict
3714
ff17be3a 3715 # Looking for http://schema.org/VideoObject
fa0b816e 3716 json_ld = self._search_json_ld(webpage, video_id, default={})
ff17be3a 3717 if json_ld.get('url'):
aa9369a2 3718 self.report_detected('JSON LD')
e6ae51c1 3719 if determine_ext(json_ld.get('url')) == 'm3u8':
3720 json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
3721 json_ld['url'], video_id, 'mp4')
3722 json_ld.pop('url')
ff17be3a
S
3723 return merge_dicts(json_ld, info_dict)
3724
ced659bb 3725 def check_video(vurl):
a0f71985
PH
3726 if YoutubeIE.suitable(vurl):
3727 return True
b7a8c1bc
S
3728 if RtmpIE.suitable(vurl):
3729 return True
ced659bb
S
3730 vpath = compat_urlparse.urlparse(vurl).path
3731 vext = determine_ext(vpath)
0ee79a37 3732 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
ced659bb
S
3733
3734 def filter_video(urls):
3735 return list(filter(check_video, urls))
3736
9b122384 3737 # Start with something easy: JW Player in SWFObject
ced659bb 3738 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
aa9369a2 3739 if found:
3740 self.report_detected('JW Player in SFWObject')
3741 else:
d981cef6 3742 # Look for gorilla-vid style embedding
ced659bb 3743 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
3744 (?:
3745 jw_plugins|
3746 JWPlayerOptions|
3747 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
3748 )
a0f71985
PH
3749 .*?
3750 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
aa9369a2 3751 if found:
3752 self.report_detected('JW Player embed')
a318f59d 3753 if not found:
3754 # Look for generic KVS player
9980d3d2 3755 found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
a318f59d 3756 if found:
aa9369a2 3757 self.report_detected('KWS Player')
a318f59d 3758 if found.group('maj_ver') not in ['4', '5']:
3759 self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
3760 flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
3761 flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
3762
3763 # extract the part after the last / as the display_id from the
3764 # canonical URL.
3765 display_id = self._search_regex(
3766 r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
3767 r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
3768 webpage, 'display_id', fatal=False
3769 )
3770 title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
3771
3772 thumbnail = flashvars['preview_url']
3773 if thumbnail.startswith('//'):
3774 protocol, _, _ = url.partition('/')
3775 thumbnail = protocol + thumbnail
3776
3777 formats = []
3778 for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
3779 if key in flashvars and '/get_file/' in flashvars[key]:
3780 next_format = {
3781 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
3782 'format_id': flashvars.get(key + '_text', key),
3783 'ext': 'mp4',
3784 }
3785 height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
3786 if height:
3787 next_format['height'] = int(height.group(1))
3788 else:
3789 next_format['quality'] = 1
3790 formats.append(next_format)
3791 self._sort_formats(formats)
3792
3793 return {
3794 'id': flashvars['video_id'],
3795 'display_id': display_id,
3796 'title': title,
3797 'thumbnail': thumbnail,
3798 'formats': formats,
3799 }
b30b8698 3800 if not found:
9b122384 3801 # Broaden the search a little bit
ced659bb 3802 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
aa9369a2 3803 if found:
3804 self.report_detected('video file')
b30b8698
PH
3805 if not found:
3806 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 3807 found = filter_video(re.findall(
54a9328b 3808 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
aa9369a2 3809 if found:
3810 self.report_detected('JW Player JS loader')
4d805e06
PH
3811 if not found:
3812 # Flow player
ced659bb 3813 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
3814 flowplayer\("[^"]+",\s*
3815 \{[^}]+?\}\s*,
52585fd6 3816 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 3817 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 3818 ''', webpage))
aa9369a2 3819 if found:
3820 self.report_detected('Flow Player')
501f13fb
PH
3821 if not found:
3822 # Cinerama player
3823 found = re.findall(
3824 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
aa9369a2 3825 if found:
3826 self.report_detected('Cinerama player')
b30b8698 3827 if not found:
9b122384 3828 # Try to find twitter cards info
371ddb14
S
3829 # twitter:player:stream should be checked before twitter:player since
3830 # it is expected to contain a raw stream (see
3831 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
ced659bb
S
3832 found = filter_video(re.findall(
3833 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
aa9369a2 3834 if found:
3835 self.report_detected('Twitter card')
b30b8698 3836 if not found:
9b122384
PH
3837 # We look for Open Graph info:
3838 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 3839 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
3840 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
3841 if m_video_type is not None:
b73612a2 3842 found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
aa9369a2 3843 if found:
3844 self.report_detected('Open Graph video info')
b30b8698 3845 if not found:
ed9a25dd 3846 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 3847 found = re.search(
89ef304b 3848 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 3849 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 3850 webpage)
84f81016
S
3851 if not found:
3852 # Look also in Refresh HTTP header
3853 refresh_header = head_response.headers.get('Refresh')
3854 if refresh_header:
6c91a5a7
S
3855 # In python 2 response HTTP headers are bytestrings
3856 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
3857 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 3858 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 3859 if found:
b37317d8 3860 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
54b960f3
S
3861 if new_url != url:
3862 self.report_following_redirect(new_url)
3863 return {
3864 '_type': 'url',
3865 'url': new_url,
3866 }
3867 else:
3868 found = None
371ddb14
S
3869
3870 if not found:
3871 # twitter:player is a https URL to iframe player that may or may not
7a5c1cfe 3872 # be supported by yt-dlp thus this is checked the very last (see
371ddb14
S
3873 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3874 embed_url = self._html_search_meta('twitter:player', webpage, default=None)
02d01e15 3875 if embed_url and embed_url != url:
aa9369a2 3876 self.report_detected('twitter:player iframe')
371ddb14
S
3877 return self.url_result(embed_url)
3878
b30b8698 3879 if not found:
416c7fcb 3880 raise UnsupportedError(url)
9b122384 3881
b30b8698 3882 entries = []
4a120778 3883 for video_url in orderedSet(found):
949b6497 3884 video_url = unescapeHTML(video_url)
6cc37c69 3885 video_url = video_url.replace('\\/', '/')
b30b8698 3886 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 3887 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 3888
b30b8698
PH
3889 # Sometimes, jwplayer extraction will result in a YouTube URL
3890 if YoutubeIE.suitable(video_url):
3891 entries.append(self.url_result(video_url, 'Youtube'))
3892 continue
9b122384 3893
b30b8698
PH
3894 # here's a fun little line of code for you:
3895 video_id = os.path.splitext(video_id)[0]
fc9713a1 3896
28602e74
YCH
3897 entry_info_dict = {
3898 'id': video_id,
3899 'uploader': video_uploader,
3900 'title': video_title,
3901 'age_limit': age_limit,
3902 }
3903
5620f840
S
3904 if RtmpIE.suitable(video_url):
3905 entry_info_dict.update({
3906 '_type': 'url_transparent',
3907 'ie_key': RtmpIE.ie_key(),
3908 'url': video_url,
3909 })
3910 entries.append(entry_info_dict)
3911 continue
3912
729accb4
S
3913 ext = determine_ext(video_url)
3914 if ext == 'smil':
da1c94ee 3915 entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
729accb4
S
3916 elif ext == 'xspf':
3917 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
750b9ff0 3918 elif ext == 'm3u8':
da1c94ee 3919 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
79a35085 3920 elif ext == 'mpd':
da1c94ee 3921 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
3f2f4a94
S
3922 elif ext == 'f4m':
3923 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
4119a96c 3924 elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
26aae566
S
3925 # Just matching .ism/manifest is not enough to be reliably sure
3926 # whether it's actually an ISM manifest or some other streaming
3927 # manifest since there are various streaming URL formats
3928 # possible (see [1]) as well as some other shenanigans like
3929 # .smil/manifest URLs that actually serve an ISM (see [2]) and
3930 # so on.
3931 # Thus the most reasonable way to solve this is to delegate
3932 # to generic extractor in order to look into the contents of
3933 # the manifest itself.
3934 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
3935 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
3936 entry_info_dict = self.url_result(
3937 smuggle_url(video_url, {'to_generic': True}),
3938 GenericIE.ie_key())
d6fd958c 3939 else:
28602e74
YCH
3940 entry_info_dict['url'] = video_url
3941
19dbaeec
S
3942 if entry_info_dict.get('formats'):
3943 self._sort_formats(entry_info_dict['formats'])
3944
28602e74 3945 entries.append(entry_info_dict)
b30b8698
PH
3946
3947 if len(entries) == 1:
669f0e7c 3948 return entries[0]
b30b8698
PH
3949 else:
3950 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
3951 # 'url' results don't have a title
3952 if e.get('title') is not None:
3953 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
3954 return {
3955 '_type': 'playlist',
3956 'entries': entries,
3957 }