]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
release 2018.09.01
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
dcdb292f 1# coding: utf-8
cfe50f04 2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
f7854627 12 compat_etree_fromstring,
4e363703 13 compat_str,
1ddb9456 14 compat_urllib_parse_unquote,
a5caba1e 15 compat_urlparse,
f7300c5c 16 compat_xml_parse_error,
8c25f81b
PH
17)
18from ..utils import (
b759a0d4 19 determine_ext,
9b122384 20 ExtractorError,
c8e9a235 21 float_or_none,
aa94a6d3 22 HEADRequest,
61ca9a80 23 is_html,
a4a554a7 24 js_to_json,
63d990d2 25 KNOWN_EXTENSIONS,
6cc62232 26 merge_dicts,
63d990d2 27 mimetype2ext,
ed2d6a19 28 orderedSet,
5c2266df 29 sanitized_Request,
9d4660ca
PH
30 smuggle_url,
31 unescapeHTML,
42393ce2 32 unified_strdate,
4d54ef20 33 unsmuggle_url,
416c7fcb 34 UnsupportedError,
76c73715 35 xpath_text,
9b122384 36)
b7a8c1bc 37from .commonprotocols import RtmpIE
ed126900 38from .brightcove import (
4fcaa4f4 39 BrightcoveLegacyIE,
5c17f0a6 40 BrightcoveNewIE,
ed126900 41)
3f59b015
S
42from .nexx import (
43 NexxIE,
44 NexxEmbedIE,
45)
a2edf2e7 46from .nbc import NBCSportsVPlayerIE
c0d0b01f 47from .ooyala import OoyalaIE
93d020dd 48from .rutv import RUTVIE
954c1d05 49from .tvc import TVCIE
d40a3b5b 50from .sportbox import SportBoxEmbedIE
cb3ac1c6 51from .smotri import SmotriIE
6dd94d3a 52from .myvi import MyviIE
1419fafd 53from .condenast import CondeNastIE
418c5cc3 54from .udn import UDNEmbedIE
2fe1b5bd 55from .senateisvp import SenateISVPIE
bab19a8e 56from .svt import SVTIE
65d161c4 57from .pornhub import PornHubIE
2bb5b6d0 58from .xhamster import XHamsterEmbedIE
2c9ca782 59from .tnaflix import TNAFlixNetworkEmbedIE
37e7a71c 60from .drtuber import DrTuberIE
e28ed498 61from .redtube import RedTubeIE
06993715 62from .tube8 import Tube8IE
b407e173 63from .vimeo import VimeoIE
3c4fbfec 64from .dailymotion import DailymotionIE
71a1db89 65from .dailymail import DailyMailIE
1ac1c4c2 66from .onionstudios import OnionStudiosIE
67167920 67from .viewlift import ViewLiftEmbedIE
46fde8a1 68from .mtv import MTVServicesEmbeddedIE
45dad7ba 69from .pladform import PladformIE
ff18735c 70from .videomore import VideomoreIE
83f1481b 71from .webcaster import WebcasterFeedIE
5b251628 72from .googledrive import GoogleDriveIE
7cb09524 73from .jwplatform import JWPlatformIE
aecfcd4e 74from .digiteka import DigitekaIE
1979969f 75from .arkena import ArkenaIE
5a51775a 76from .instagram import InstagramIE
b8f67449 77from .liveleak import LiveLeakIE
5d39176f 78from .threeqsdn import ThreeQSDNIE
4d8819d2 79from .theplatform import ThePlatformIE
48a5eabc 80from .vessel import VesselIE
c287f2bc 81from .kaltura import KalturaIE
06a96da1 82from .eagleplatform import EaglePlatformIE
fd6ca382 83from .facebook import FacebookIE
94aae015 84from .soundcloud import SoundcloudIE
027e2312 85from .tunein import TuneInBaseIE
2a1321a2 86from .vbox7 import Vbox7IE
b0c8f2e9 87from .dbtv import DBTVIE
b1c35797 88from .piksel import PikselIE
e186a9ec 89from .videa import VideaIE
b687c85e 90from .twentymin import TwentyMinutenIE
d77ac737 91from .ustream import UstreamIE
17f8deeb 92from .openload import OpenloadIE
6ef3e65a 93from .videopress import VideoPressIE
eb3079b6 94from .rutube import RutubeIE
e5d39886 95from .limelight import LimelightBaseIE
7986c3ab 96from .anvato import AnvatoIE
55719459 97from .washingtonpost import WashingtonPostIE
58bb4402 98from .wistia import WistiaIE
5d29af3d 99from .mediaset import MediasetIE
73cf76a9 100from .joj import JojIE
24e966e8 101from .megaphone import MegaphoneIE
41918eaa 102from .vzaar import VzaarIE
26bae2d9 103from .channel9 import Channel9IE
0987f2dd 104from .vshare import VShareIE
2ca7ed41 105from .mediasite import MediasiteIE
7d540621 106from .springboardplatform import SpringboardPlatformIE
4c780fbd 107from .yapfiles import YapFilesIE
86c8cfc5 108from .vice import ViceIE
178ee883 109from .xfileshare import XFileShareIE
660a230b 110from .cloudflarestream import CloudflareStreamIE
6bd499e8 111from .peertube import PeerTubeIE
aee36ca8 112from .indavideo import IndavideoEmbedIE
cfd7f2a6 113from .apa import APAIE
f51f526b 114from .foxnews import FoxNewsIE
9d1b2138 115from .viqeo import ViqeoIE
57c68ec4 116from .expressen import ExpressenIE
9b122384 117
0838239e 118
9b122384 119class GenericIE(InfoExtractor):
79649588 120 IE_DESC = 'Generic downloader that works on some sites'
9b122384 121 _VALID_URL = r'.*'
79649588 122 IE_NAME = 'generic'
cfe50f04 123 _TESTS = [
c5fa81fe
S
124 # Direct link to a video
125 {
126 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
127 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
128 'info_dict': {
129 'id': 'trailer',
130 'ext': 'mp4',
131 'title': 'trailer',
132 'upload_date': '20100513',
133 }
134 },
c5138a7c 135 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
136 {
137 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
138 'md5': '128c42e68b13950268b648275386fc74',
139 'info_dict': {
140 'id': 'FictionJunction-Parallel_Hearts',
141 'ext': 'flac',
142 'title': 'FictionJunction-Parallel_Hearts',
143 'upload_date': '20140522',
144 },
145 'expected_warnings': [
146 'URL could be a direct video link, returning it as such.'
39efc6e3
YCH
147 ],
148 'skip': 'URL invalid',
c5fa81fe
S
149 },
150 # Direct download with broken HEAD
151 {
152 'url': 'http://ai-radio.org:8000/radio.opus',
153 'info_dict': {
154 'id': 'radio',
155 'ext': 'opus',
156 'title': 'radio',
157 },
158 'params': {
159 'skip_download': True, # infinite live stream
160 },
161 'expected_warnings': [
ef0e4e7b
YCH
162 r'501.*Not Implemented',
163 r'400.*Bad Request',
c5fa81fe
S
164 ],
165 },
166 # Direct link with incorrect MIME type
167 {
168 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
169 'md5': '4ccbebe5f36706d85221f204d7eb5913',
170 'info_dict': {
171 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
172 'id': '5_Lennart_Poettering_-_Systemd',
173 'ext': 'webm',
174 'title': '5_Lennart_Poettering_-_Systemd',
175 'upload_date': '20141120',
176 },
177 'expected_warnings': [
178 'URL could be a direct video link, returning it as such.'
179 ]
180 },
181 # RSS feed
182 {
183 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
184 'info_dict': {
185 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
186 'title': 'Zero Punctuation',
187 'description': 're:.*groundbreaking video review series.*'
188 },
189 'playlist_mincount': 11,
190 },
191 # RSS feed with enclosure
192 {
193 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
194 'info_dict': {
195 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
196 'ext': 'm4v',
197 'upload_date': '20150228',
198 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
199 }
200 },
01aec848
BG
201 # RSS feed with enclosures and unsupported link URLs
202 {
203 'url': 'http://www.hellointernet.fm/podcast?format=rss',
204 'info_dict': {
205 'id': 'http://www.hellointernet.fm/podcast?format=rss',
206 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
207 'title': 'Hello Internet',
208 },
209 'playlist_mincount': 100,
210 },
8765222d
S
211 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
212 {
213 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
214 'info_dict': {
215 'id': 'smil',
216 'ext': 'mp4',
217 'title': 'Automatics, robotics and biocybernetics',
218 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
e327b736 219 'upload_date': '20130627',
8765222d
S
220 'formats': 'mincount:16',
221 'subtitles': 'mincount:1',
222 },
223 'params': {
224 'force_generic_extractor': True,
225 'skip_download': True,
226 },
227 },
228 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
229 {
230 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
231 'info_dict': {
232 'id': 'hds',
233 'ext': 'flv',
234 'title': 'hds',
235 'formats': 'mincount:1',
236 },
237 'params': {
238 'skip_download': True,
239 },
240 },
241 # SMIL from https://www.restudy.dk/video/play/id/1637
242 {
243 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
244 'info_dict': {
245 'id': 'video_1637',
246 'ext': 'flv',
247 'title': 'video_1637',
248 'formats': 'mincount:3',
249 },
250 'params': {
251 'skip_download': True,
252 },
253 },
254 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
255 {
256 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
257 'info_dict': {
258 'id': 'smil-service',
259 'ext': 'flv',
260 'title': 'smil-service',
261 'formats': 'mincount:1',
262 },
263 'params': {
264 'skip_download': True,
265 },
266 },
267 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
268 {
269 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
270 'info_dict': {
271 'id': '4719370',
272 'ext': 'mp4',
273 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
274 'formats': 'mincount:3',
275 },
276 'params': {
277 'skip_download': True,
278 },
279 },
1de5cd3b
S
280 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
281 {
282 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
283 'info_dict': {
284 'id': 'mZlp2ctYIUEB',
285 'ext': 'mp4',
286 'title': 'Tikibad ontruimd wegens brand',
287 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
ec85ded8 288 'thumbnail': r're:^https?://.*\.jpg$',
1de5cd3b
S
289 'duration': 33,
290 },
291 'params': {
292 'skip_download': True,
293 },
294 },
9d939cec
S
295 # MPD from http://dash-mse-test.appspot.com/media.html
296 {
297 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
298 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
299 'info_dict': {
300 'id': 'car-20120827-manifest',
301 'ext': 'mp4',
302 'title': 'car-20120827-manifest',
303 'formats': 'mincount:9',
0738187f 304 'upload_date': '20130904',
9d939cec
S
305 },
306 'params': {
307 'format': 'bestvideo',
308 },
309 },
20938f76
S
310 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
311 {
312 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
313 'info_dict': {
314 'id': 'content',
315 'ext': 'mp4',
316 'title': 'content',
317 'formats': 'mincount:8',
318 },
319 'params': {
320 # m3u8 downloads
321 'skip_download': True,
39efc6e3
YCH
322 },
323 'skip': 'video gone',
20938f76 324 },
edd9b71c
S
325 # m3u8 served with Content-Type: text/plain
326 {
327 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
328 'info_dict': {
329 'id': 'index',
330 'ext': 'mp4',
331 'title': 'index',
332 'upload_date': '20140720',
333 'formats': 'mincount:11',
334 },
335 'params': {
336 # m3u8 downloads
337 'skip_download': True,
39efc6e3
YCH
338 },
339 'skip': 'video gone',
edd9b71c 340 },
c5fa81fe
S
341 # google redirect
342 {
343 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
344 'info_dict': {
345 'id': 'cmQHVoWB5FY',
346 'ext': 'mp4',
347 'upload_date': '20130224',
348 'uploader_id': 'TheVerge',
ec85ded8 349 'description': r're:^Chris Ziegler takes a look at the\.*',
c5fa81fe
S
350 'uploader': 'The Verge',
351 'title': 'First Firefox OS phones side-by-side',
352 },
353 'params': {
354 'skip_download': False,
355 }
356 },
6c91a5a7
S
357 {
358 # redirect in Refresh HTTP header
359 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
360 'info_dict': {
361 'id': 'pO8h3EaFRdo',
362 'ext': 'mp4',
363 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
364 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
365 'upload_date': '20150917',
366 'uploader_id': 'brtvofficial',
367 'uploader': 'Boiler Room',
368 },
369 'params': {
370 'skip_download': False,
371 },
372 },
cfe50f04 373 {
79649588 374 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 375 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 376 'info_dict': {
d360a146
S
377 'id': '13601338388002',
378 'ext': 'mp4',
79649588
PH
379 'uploader': 'www.hodiho.fr',
380 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
381 }
382 },
c19f7764
JMF
383 # bandcamp page with custom domain
384 {
79649588
PH
385 'add_ie': ['Bandcamp'],
386 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 387 'info_dict': {
fd50bf62
S
388 'id': '3235767654',
389 'ext': 'mp3',
79649588
PH
390 'title': 'The Pony Mash',
391 'uploader': 'M_Pallante',
c19f7764 392 },
79649588 393 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 394 },
eeb165e6 395 {
53a664ed
S
396 # embedded brightcove video
397 # it also tests brightcove videos that need to set the 'Referer'
398 # in the http requests
3b7d9aa4 399 'add_ie': ['BrightcoveLegacy'],
79649588
PH
400 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
401 'info_dict': {
402 'id': '2765128793001',
403 'ext': 'mp4',
404 'title': 'Le cours de bourse : l’analyse technique',
405 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
406 'uploader': 'BFM BUSINESS',
eeb165e6 407 },
79649588
PH
408 'params': {
409 'skip_download': True,
eeb165e6
JMF
410 },
411 },
53a664ed
S
412 {
413 # embedded with itemprop embedURL and video id spelled as `idVideo`
414 'add_id': ['BrightcoveLegacy'],
415 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
416 'info_dict': {
417 'id': '5255628253001',
418 'ext': 'mp4',
419 'title': 'md5:37c519b1128915607601e75a87995fc0',
420 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
421 'uploader': 'BFM BUSINESS',
422 'uploader_id': '876450612001',
423 'timestamp': 1482255315,
424 'upload_date': '20161220',
425 },
426 'params': {
427 'skip_download': True,
428 },
429 },
17ab4d3b
PH
430 {
431 # https://github.com/rg3/youtube-dl/issues/2253
432 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
433 'md5': '0ba9446db037002366bab3b3eb30c88c',
434 'info_dict': {
fd50bf62
S
435 'id': '3101154703001',
436 'ext': 'mp4',
17ab4d3b
PH
437 'title': 'Still no power',
438 'uploader': 'thestar.com',
439 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
440 },
3b7d9aa4 441 'add_ie': ['BrightcoveLegacy'],
39efc6e3 442 'skip': 'video gone',
17ab4d3b 443 },
0479c625
S
444 {
445 'url': 'http://www.championat.com/video/football/v/87/87499.html',
446 'md5': 'fb973ecf6e4a78a67453647444222983',
447 'info_dict': {
448 'id': '3414141473001',
449 'ext': 'mp4',
450 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
451 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
452 'uploader': 'Championat',
453 },
454 },
bdf97017 455 {
37aab278 456 # https://github.com/rg3/youtube-dl/issues/3541
3b7d9aa4 457 'add_ie': ['BrightcoveLegacy'],
bdf97017
NJ
458 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
459 'info_dict': {
460 'id': '3866516442001',
37aab278 461 'ext': 'mp4',
bdf97017
NJ
462 'title': 'Leer mij vrouwen kennen: Aflevering 1',
463 'description': 'Leer mij vrouwen kennen: Aflevering 1',
464 'uploader': 'SBS Broadcasting',
465 },
37aab278 466 'skip': 'Restricted to Netherlands',
bdf97017 467 'params': {
37aab278 468 'skip_download': True, # m3u8 download
bdf97017
NJ
469 },
470 },
06d0ad9a
YCH
471 {
472 # Brightcove video in <iframe>
473 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
474 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
475 'info_dict': {
476 'id': '5360463607001',
477 'ext': 'mp4',
478 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
479 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
480 'uploader': 'United Nations',
481 'uploader_id': '1362235914001',
482 'timestamp': 1489593889,
483 'upload_date': '20170315',
484 },
485 'add_ie': ['BrightcoveLegacy'],
486 },
16e2c8f7
YCH
487 {
488 # Brightcove with alternative playerID key
489 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
490 'info_dict': {
491 'id': 'nmeth.2062_SV1',
492 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
493 },
494 'playlist': [{
495 'info_dict': {
496 'id': '2228375078001',
497 'ext': 'mp4',
498 'title': 'nmeth.2062-sv1',
499 'description': 'nmeth.2062-sv1',
500 'timestamp': 1363357591,
501 'upload_date': '20130315',
502 'uploader': 'Nature Publishing Group',
503 'uploader_id': '1964492299001',
504 },
505 }],
506 },
40158f55
JH
507 {
508 # Brightcove with UUID in videoPlayer
509 'url': 'http://www8.hp.com/cn/zh/home.html',
510 'info_dict': {
511 'id': '5255815316001',
512 'ext': 'mp4',
513 'title': 'Sprocket Video - China',
514 'description': 'Sprocket Video - China',
515 'uploader': 'HP-Video Gallery',
516 'timestamp': 1482263210,
517 'upload_date': '20161220',
518 'uploader_id': '1107601872001',
519 },
520 'params': {
521 'skip_download': True, # m3u8 download
522 },
523 'skip': 'video rotates...weekly?',
524 },
525 {
526 # Brightcove:new type [2].
527 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
528 'md5': '2b35148fcf48da41c9fb4591650784f3',
529 'info_dict': {
530 'id': '5348741021001',
531 'ext': 'mp4',
532 'upload_date': '20170306',
533 'uploader_id': '4191638492001',
534 'timestamp': 1488769918,
535 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
536
537 },
538 },
539 {
540 # Alternative brightcove <video> attributes
541 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
542 'info_dict': {
543 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
544 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
545 },
546 'playlist': [{
547 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
548 'info_dict': {
549 'id': '5311302538001',
550 'ext': 'mp4',
551 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
552 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
553 'timestamp': 1486321708,
554 'upload_date': '20170205',
555 'uploader_id': '800000640001',
556 },
557 'only_matching': True,
558 }],
559 },
b68a812e
S
560 {
561 # Brightcove with UUID in videoPlayer
562 'url': 'http://www8.hp.com/cn/zh/home.html',
563 'info_dict': {
564 'id': '5255815316001',
565 'ext': 'mp4',
566 'title': 'Sprocket Video - China',
567 'description': 'Sprocket Video - China',
568 'uploader': 'HP-Video Gallery',
569 'timestamp': 1482263210,
570 'upload_date': '20161220',
571 'uploader_id': '1107601872001',
572 },
573 'params': {
574 'skip_download': True, # m3u8 download
575 },
576 },
c0d0b01f
JMF
577 # ooyala video
578 {
79649588 579 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 580 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
581 'info_dict': {
582 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
583 'ext': 'mp4',
3486df38 584 'title': '2cc213299525360.mov', # that's what we get
53e06b25 585 'duration': 238.231,
c0d0b01f 586 },
87830900 587 'add_ie': ['Ooyala'],
c0d0b01f 588 },
bf94d763
S
589 {
590 # ooyala video embedded with http://player.ooyala.com/iframe.js
591 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
592 'info_dict': {
593 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
594 'ext': 'mp4',
595 'title': '"Steve Jobs: Man in the Machine" trailer',
596 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
53e06b25 597 'duration': 135.427,
bf94d763
S
598 },
599 'params': {
600 'skip_download': True,
601 },
39efc6e3 602 'skip': 'movie expired',
bf94d763 603 },
198d4cb4
GR
604 # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
605 {
606 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
607 'info_dict': {
608 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
609 'ext': 'mp4',
610 'title': 'Steampunk Fest Comes to Honesdale',
611 'duration': 43.276,
612 },
613 'params': {
614 'skip_download': True,
615 }
616 },
1b86cc41 617 # embed.ly video
618 {
619 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
620 'info_dict': {
621 'id': '9ODmcdjQcHQ',
622 'ext': 'mp4',
0a5bce56
PH
623 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
624 'upload_date': '20140225',
625 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
626 'uploader': 'Tested',
627 'uploader_id': 'testedcom',
1b86cc41 628 },
629 # No need to test YoutubeIE here
630 'params': {
631 'skip_download': True,
632 },
633 },
60cc4dc4
PH
634 # funnyordie embed
635 {
636 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
637 'info_dict': {
638 'id': '18e820ec3f',
639 'ext': 'mp4',
640 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
641 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 642 },
39efc6e3
YCH
643 # HEAD requests lead to endless 301, while GET is OK
644 'expected_warnings': ['301'],
60cc4dc4 645 },
93d020dd
S
646 # RUTV embed
647 {
648 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
649 'info_dict': {
650 'id': '776940',
651 'ext': 'mp4',
652 'title': 'Охотское море стало целиком российским',
653 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
654 },
655 'params': {
656 # m3u8 download
657 'skip_download': True,
658 },
aab74fa1 659 },
f37bdbe5
S
660 # TVC embed
661 {
662 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
663 'info_dict': {
664 'id': '55304',
665 'ext': 'mp4',
666 'title': 'Дошкольное воспитание',
667 },
668 },
b827a601
S
669 # SportBox embed
670 {
671 'url': 'http://www.vestifinance.ru/articles/25753',
672 'info_dict': {
673 'id': '25753',
05d1e7aa 674 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
b827a601
S
675 },
676 'playlist': [{
677 'info_dict': {
678 'id': '370908',
679 'title': 'Госзаказ. День 3',
680 'ext': 'mp4',
681 }
682 }, {
683 'info_dict': {
684 'id': '370905',
685 'title': 'Госзаказ. День 2',
686 'ext': 'mp4',
687 }
688 }, {
689 'info_dict': {
690 'id': '370902',
691 'title': 'Госзаказ. День 1',
692 'ext': 'mp4',
693 }
694 }],
695 'params': {
696 # m3u8 download
697 'skip_download': True,
698 },
699 },
bf20b9c5
S
700 # Myvi.ru embed
701 {
702 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
703 'info_dict': {
704 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
705 'ext': 'mp4',
706 'title': 'Ужастики, русский трейлер (2015)',
ec85ded8 707 'thumbnail': r're:^https?://.*\.jpg$',
bf20b9c5
S
708 'duration': 153,
709 }
710 },
c76799c5
S
711 # XHamster embed
712 {
713 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
714 'info_dict': {
715 'id': 'showthread',
716 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
717 },
718 'playlist_mincount': 7,
39efc6e3
YCH
719 # This forum does not allow <iframe> syntaxes anymore
720 # Now HTML tags are displayed as-is
721 'skip': 'No videos on this page',
c76799c5 722 },
aab74fa1
PH
723 # Embedded TED video
724 {
725 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 726 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 727 'info_dict': {
a8eb5a8e 728 'id': '1969',
aab74fa1 729 'ext': 'mp4',
a8eb5a8e
PH
730 'title': 'Hidden miracles of the natural world',
731 'uploader': 'Louie Schwartzberg',
732 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 733 }
60cc4dc4 734 },
d95e35d6
S
735 # nowvideo embed hidden behind percent encoding
736 {
737 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
738 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
739 'info_dict': {
740 'id': '06e53103ca9aa',
741 'ext': 'flv',
742 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
743 'description': 'No description',
744 },
0f2a2ba1 745 },
893f8832
PH
746 # arte embed
747 {
748 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
749 'md5': '7653032cbb25bf6c80d80f217055fa43',
750 'info_dict': {
751 'id': '048195-004_PLUS7-F',
752 'ext': 'flv',
753 'title': 'X:enius',
754 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
755 'upload_date': '20140320',
756 },
757 'params': {
758 'skip_download': 'Requires rtmpdump'
39efc6e3
YCH
759 },
760 'skip': 'video gone',
893f8832 761 },
cbd55ade
S
762 # francetv embed
763 {
764 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
765 'info_dict': {
766 'id': 'EV_30231',
767 'ext': 'mp4',
768 'title': 'Alcaline, le concert avec Calogero',
769 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
770 'upload_date': '20150226',
771 'timestamp': 1424989860,
772 'duration': 5400,
773 },
774 'params': {
775 # m3u8 downloads
776 'skip_download': True,
777 },
778 'expected_warnings': [
779 'Forbidden'
780 ]
781 },
fa35cdad
PH
782 # Condé Nast embed
783 {
784 'url': 'http://www.wired.com/2014/04/honda-asimo/',
785 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
786 'info_dict': {
787 'id': '53501be369702d3275860000',
788 'ext': 'mp4',
789 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
790 }
ebd3c7b3
PH
791 },
792 # Dailymotion embed
793 {
794 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
795 'md5': '441aeeb82eb72c422c7f14ec533999cd',
796 'info_dict': {
797 'id': 'k2mm4bCdJ6CQ2i7c8o2',
798 'ext': 'mp4',
799 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
0738187f 800 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
ebd3c7b3 801 'uploader': 'Spi0n',
0738187f
YCH
802 'uploader_id': 'xgditw',
803 'upload_date': '20140425',
804 'timestamp': 1398441542,
ebd3c7b3
PH
805 },
806 'add_ie': ['Dailymotion'],
2b88feed 807 },
71a1db89
S
808 # DailyMail embed
809 {
810 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
811 'info_dict': {
812 'id': '1495629',
813 'ext': 'mp4',
814 'title': 'Care worker punches elderly dementia patient in head 11 times',
815 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
816 },
817 'add_ie': ['DailyMail'],
818 'params': {
819 'skip_download': True,
820 },
821 },
2b88feed
PH
822 # YouTube embed
823 {
824 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
825 'info_dict': {
826 'id': 'FXRb4ykk4S0',
827 'ext': 'mp4',
828 'title': 'The NBL Auction 2014',
829 'uploader': 'BADMINTON England',
830 'uploader_id': 'BADMINTONEvents',
831 'upload_date': '20140603',
832 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
833 },
834 'add_ie': ['Youtube'],
835 'params': {
836 'skip_download': True,
837 }
838 },
c5cd249e
JMF
839 # MTVSercices embed
840 {
1fa309da
YCH
841 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
842 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
c5cd249e 843 'info_dict': {
1fa309da 844 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
c5cd249e 845 'ext': 'mp4',
1fa309da
YCH
846 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
847 'description': 'Two valets share their love for movie star Liam Neesons.',
05d1e7aa
YCH
848 'timestamp': 1349922600,
849 'upload_date': '20121011',
c5cd249e
JMF
850 },
851 },
61013473 852 # YouTube embed via <data-embed-url="">
853 {
854 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 855 'info_dict': {
a8eb5a8e 856 'id': '4vAffPZIT44',
61013473 857 'ext': 'mp4',
a8eb5a8e 858 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
859 'uploader': 'Gameloft',
860 'uploader_id': 'gameloft',
a8eb5a8e
PH
861 'upload_date': '20140828',
862 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
863 },
864 'params': {
865 'skip_download': True,
61013473 866 }
c8e9a235 867 },
61568e50
JH
868 # YouTube <object> embed
869 {
870 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
871 'md5': '516718101ec834f74318df76259fb3cc',
872 'info_dict': {
873 'id': 'msN87y-iEx0',
874 'ext': 'webm',
875 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
876 'upload_date': '20080526',
877 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
878 'uploader': 'Christopher Sykes',
879 'uploader_id': 'ChristopherJSykes',
880 },
881 'add_ie': ['Youtube'],
882 },
c8e9a235
PH
883 # Camtasia studio
884 {
885 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
886 'playlist': [{
887 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
888 'info_dict': {
889 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
890 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
891 'ext': 'flv',
892 'duration': 2235.90,
893 }
894 }, {
895 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
896 'info_dict': {
897 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
898 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
899 'ext': 'flv',
900 'duration': 2235.93,
901 }
902 }],
903 'info_dict': {
904 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
905 }
4d805e06
PH
906 },
907 # Flowplayer
908 {
909 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
910 'md5': '9d65602bf31c6e20014319c7d07fba27',
911 'info_dict': {
912 'id': '5123ea6d5e5a7',
913 'ext': 'mp4',
914 'age_limit': 18,
915 'uploader': 'www.handjobhub.com',
d6d9186f 916 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 917 }
0990305d 918 },
22a6f150
PH
919 # Multiple brightcove videos
920 # https://github.com/rg3/youtube-dl/issues/2283
921 {
922 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
923 'info_dict': {
924 'id': 'always-never',
925 'title': 'Always / Never - The New Yorker',
926 },
927 'playlist_count': 3,
928 'params': {
929 'extract_flat': False,
930 'skip_download': True,
931 }
1a94ff68
S
932 },
933 # MLB embed
934 {
935 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
936 'md5': '96f09a37e44da40dd083e12d9a683327',
937 'info_dict': {
938 'id': '33322633',
939 'ext': 'mp4',
940 'title': 'Ump changes call to ball',
941 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
942 'duration': 48,
943 'timestamp': 1401537900,
944 'upload_date': '20140531',
ec85ded8 945 'thumbnail': r're:^https?://.*\.jpg$',
1a94ff68
S
946 },
947 },
746c67d7
NJ
948 # Wistia embed
949 {
6c114b12
S
950 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
951 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
746c67d7 952 'info_dict': {
6c114b12 953 'id': '6e2wtrbdaf',
746c67d7 954 'ext': 'mov',
6c114b12
S
955 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
956 'description': 'a Paywall Videos video from Remilon',
957 'duration': 644.072,
958 'uploader': 'study.com',
959 'timestamp': 1459678540,
960 'upload_date': '20160403',
961 'filesize': 24687186,
746c67d7
NJ
962 },
963 },
52cffcb1 964 {
965 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
966 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
967 'info_dict': {
968 'id': 'uxjb0lwrcz',
969 'ext': 'mp4',
6c114b12 970 'title': 'Conversation about Hexagonal Rails Part 1',
0738187f 971 'description': 'a Martin Fowler video from ThoughtWorks',
52cffcb1 972 'duration': 1715.0,
85d7b765 973 'uploader': 'thoughtworks.wistia.com',
0738187f 974 'timestamp': 1401832161,
6c114b12 975 'upload_date': '20140603',
70b7e3fb 976 },
52cffcb1 977 },
7ded6545
S
978 # Wistia standard embed (async)
979 {
980 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
981 'info_dict': {
982 'id': '807fafadvk',
983 'ext': 'mp4',
984 'title': 'Drip Brennan Dunn Workshop',
985 'description': 'a JV Webinars video from getdrip-1',
986 'duration': 4986.95,
7ded6545 987 'timestamp': 1463607249,
6c114b12 988 'upload_date': '20160518',
7ded6545
S
989 },
990 'params': {
991 'skip_download': True,
992 }
993 },
ac645ac7
PH
994 # Soundcloud embed
995 {
996 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
997 'info_dict': {
998 'id': '174391317',
999 'ext': 'mp3',
1000 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
1001 'uploader': 'Sophos Security',
1002 'title': 'Chet Chat 171 - Oct 29, 2014',
1003 'upload_date': '20141029',
1004 }
af63fed7 1005 },
db19df6c
S
1006 # Soundcloud multiple embeds
1007 {
1008 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
1009 'info_dict': {
1010 'id': '52809',
1011 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
1012 },
1013 'playlist_mincount': 7,
1014 },
027e2312
S
1015 # TuneIn station embed
1016 {
1017 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
1018 'info_dict': {
1019 'id': '204146',
1020 'ext': 'mp3',
1021 'title': 'CNRV',
1022 'location': 'Paris, France',
1023 'is_live': True,
1024 },
1025 'params': {
1026 # Live stream
1027 'skip_download': True,
1028 },
1029 },
af63fed7
PH
1030 # Livestream embed
1031 {
1032 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
1033 'info_dict': {
1034 'id': '67864563',
1035 'ext': 'flv',
1036 'upload_date': '20141112',
1037 'title': 'Rosetta #CometLanding webcast HL 10',
1038 }
1039 },
78d3b3e2
YCH
1040 # Another Livestream embed, without 'new.' in URL
1041 {
1042 'url': 'https://www.freespeech.org/',
1043 'info_dict': {
1044 'id': '123537347',
1045 'ext': 'mp4',
1046 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
1047 },
1048 'params': {
1049 # Live stream
1050 'skip_download': True,
1051 },
1052 },
65f3a228
PH
1053 # LazyYT
1054 {
e8e4cc5a 1055 'url': 'https://skiplagged.com/',
65f3a228 1056 'info_dict': {
e8e4cc5a
JH
1057 'id': 'skiplagged',
1058 'title': 'Skiplagged: The smart way to find cheap flights',
65f3a228 1059 },
e8e4cc5a
JH
1060 'playlist_mincount': 1,
1061 'add_ie': ['Youtube'],
4e262a88 1062 },
42bdd9d0
PH
1063 # Cinchcast embed
1064 {
1065 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1066 'info_dict': {
1067 'id': '7141703',
1068 'ext': 'mp3',
1069 'upload_date': '20141126',
1070 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1071 }
1072 },
501f13fb
PH
1073 # Cinerama player
1074 {
1075 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1076 'info_dict': {
1077 'id': '730m_DandD_1901_512k',
1078 'ext': 'mp4',
1079 'uploader': 'www.abc.net.au',
1080 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1081 }
796df3c6
S
1082 },
1083 # embedded viddler video
1084 {
1085 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1086 'info_dict': {
1087 'id': '4d03aad9',
1088 'ext': 'mp4',
1089 'uploader': 'deadspin',
1090 'title': 'WALL-TO-GORTAT',
1091 'timestamp': 1422285291,
1092 'upload_date': '20150126',
1093 },
1094 'add_ie': ['Viddler'],
a0f71985 1095 },
2051acde
S
1096 # Libsyn embed
1097 {
1098 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1099 'info_dict': {
1100 'id': '3377616',
1101 'ext': 'mp3',
1102 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1103 'description': 'md5:601cb790edd05908957dae8aaa866465',
1104 'upload_date': '20150220',
1105 },
326fa4e6 1106 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
2051acde 1107 },
a0f71985
PH
1108 # jwplayer YouTube
1109 {
1110 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1111 'info_dict': {
1112 'id': 'Mrj4DVp2zeA',
1113 'ext': 'mp4',
f37e3f99 1114 'upload_date': '20150212',
a0f71985 1115 'uploader': 'The National Archives UK',
2637fadc 1116 'description': 'md5:8078af856dca76edc42910b61273dbbf',
a0f71985
PH
1117 'uploader_id': 'NationalArchives08',
1118 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1119 },
59b8ab58 1120 },
5620f840
S
1121 # jwplayer rtmp
1122 {
6899b1d9 1123 'url': 'http://www.suffolk.edu/sjc/live.php',
5620f840 1124 'info_dict': {
6899b1d9 1125 'id': 'live',
5620f840
S
1126 'ext': 'flv',
1127 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1128 'uploader': 'www.suffolk.edu',
1129 },
1130 'params': {
1131 'skip_download': True,
2637fadc 1132 },
6899b1d9 1133 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
5620f840 1134 },
a4a554a7
YCH
1135 # Complex jwplayer
1136 {
1137 'url': 'http://www.indiedb.com/games/king-machine/videos',
1138 'info_dict': {
1139 'id': 'videos',
1140 'ext': 'mp4',
1141 'title': 'king machine trailer 1',
2637fadc 1142 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
a4a554a7
YCH
1143 'thumbnail': r're:^https?://.*\.jpg$',
1144 },
1145 },
03486dbb
RU
1146 {
1147 # JWPlayer config passed as variable
1148 'url': 'http://www.txxx.com/videos/3326530/ariele/',
1149 'info_dict': {
1150 'id': '3326530_hq',
1151 'ext': 'mp4',
1152 'title': 'ARIELE | Tube Cup',
1153 'uploader': 'www.txxx.com',
1154 'age_limit': 18,
1155 },
1156 'params': {
1157 'skip_download': True,
1158 }
1159 },
939be9ad
JH
1160 {
1161 # JWPlatform iframe
1162 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
1163 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
1164 'info_dict': {
1165 'id': 'O0c5JcKT',
1166 'ext': 'mp4',
1167 'upload_date': '20171122',
1168 'timestamp': 1511366290,
1169 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
1170 },
805f5bf7 1171 'add_ie': [JWPlatformIE.ie_key()],
939be9ad 1172 },
63d990d2 1173 {
c5b7014a 1174 # Video.js embed, multiple formats
63d990d2
S
1175 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
1176 'info_dict': {
1177 'id': 'yygqldloqIk',
1178 'ext': 'mp4',
1179 'title': 'SolidWorks. Урок 6 Настройка чертежа',
1180 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
1181 'upload_date': '20130314',
1182 'uploader': 'PROстое3D',
1183 'uploader_id': 'PROstoe3D',
1184 },
1185 'params': {
1186 'skip_download': True,
1187 },
1188 },
c5b7014a
S
1189 {
1190 # Video.js embed, single format
1191 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
1192 'info_dict': {
1193 'id': 'watch',
1194 'ext': 'mp4',
1195 'title': 'Step 1 - Good Foundation',
1196 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
1197 },
1198 'params': {
1199 'skip_download': True,
1200 },
1201 },
59b8ab58
PH
1202 # rtl.nl embed
1203 {
1204 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1205 'playlist_mincount': 5,
1206 'info_dict': {
1207 'id': 'aanslagen-kopenhagen',
2637fadc 1208 'title': 'Aanslagen Kopenhagen',
59b8ab58 1209 }
255fca5e
S
1210 },
1211 # Zapiks embed
1212 {
1213 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1214 'info_dict': {
1215 'id': '118046',
1216 'ext': 'mp4',
1217 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1218 }
1219 },
66e568de
S
1220 # Kaltura embed (different embed code)
1221 {
1222 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1223 'info_dict': {
1224 'id': '1_a52wc67y',
1225 'ext': 'flv',
1226 'upload_date': '20150127',
1227 'uploader_id': 'PremierMedia',
1228 'timestamp': int,
1229 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1230 },
1231 },
87703231
YCH
1232 # Kaltura embed with single quotes
1233 {
1234 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1235 'info_dict': {
1236 'id': '0_izeg5utt',
1237 'ext': 'mp4',
1238 'title': '35871',
1239 'timestamp': 1355743100,
1240 'upload_date': '20121217',
e30991f9 1241 'uploader_id': 'cplapp@learn360.com',
87703231
YCH
1242 },
1243 'add_ie': ['Kaltura'],
1244 },
427cd050
S
1245 {
1246 # Kaltura embedded via quoted entry_id
1247 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1248 'info_dict': {
1249 'id': '0_utuok90b',
1250 'ext': 'mp4',
1251 'title': '06_matthew_brender_raj_dutt',
1252 'timestamp': 1466638791,
1253 'upload_date': '20160622',
1254 },
1255 'add_ie': ['Kaltura'],
1256 'expected_warnings': [
1257 'Could not send HEAD request'
1258 ],
1259 'params': {
1260 'skip_download': True,
1261 }
1262 },
8ab7e6c4
YCH
1263 {
1264 # Kaltura embedded, some fileExt broken (#11480)
1265 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1266 'info_dict': {
1267 'id': '1_sgtvehim',
1268 'ext': 'mp4',
1269 'title': 'Our "Standard Models" of particle physics and cosmology',
1270 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1271 'timestamp': 1321158993,
1272 'upload_date': '20111113',
1273 'uploader_id': 'kps1',
1274 },
1275 'add_ie': ['Kaltura'],
1276 },
a01825a5
JH
1277 {
1278 # Kaltura iframe embed
1279 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1280 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1281 'info_dict': {
1282 'id': '0_f2cfbpwy',
1283 'ext': 'mp4',
1284 'title': 'I. M. Pei: A Centennial Celebration',
1285 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1286 'upload_date': '20170403',
1287 'uploader_id': 'batchUser',
1288 'timestamp': 1491232186,
1289 },
1290 'add_ie': ['Kaltura'],
1291 },
c21692fa
S
1292 {
1293 # Kaltura iframe embed, more sophisticated
1294 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
1295 'info_dict': {
1296 'id': '1_9gzouybz',
1297 'ext': 'mp4',
1298 'title': 'lecture-05sep2017',
1299 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
1300 'upload_date': '20170913',
1301 'uploader_id': 'eps2',
1302 'timestamp': 1505340777,
1303 },
1304 'params': {
1305 'skip_download': True,
1306 },
1307 'add_ie': ['Kaltura'],
1308 },
e30991f9
S
1309 {
1310 # meta twitter:player
1311 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
1312 'info_dict': {
1313 'id': '0_01b42zps',
1314 'ext': 'mp4',
1315 'title': 'Main Twerk (Video)',
1316 'upload_date': '20171208',
1317 'uploader_id': 'sebastian.salinas@thechive.com',
1318 'timestamp': 1512713057,
1319 },
1320 'params': {
1321 'skip_download': True,
1322 },
1323 'add_ie': ['Kaltura'],
1324 },
250b042c
S
1325 # referrer protected EaglePlatform embed
1326 {
1327 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
1328 'info_dict': {
1329 'id': '582306',
1330 'ext': 'mp4',
1331 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1332 'thumbnail': r're:^https?://.*\.jpg$',
1333 'duration': 3382,
1334 'view_count': int,
1335 },
1336 'params': {
1337 'skip_download': True,
1338 },
135c9c42 1339 },
665e9452 1340 # ClipYou (EaglePlatform) embed (custom URL)
d47ae7f6
S
1341 {
1342 'url': 'http://muz-tv.ru/play/7129/',
4645432d 1343 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
d47ae7f6
S
1344 'info_dict': {
1345 'id': '12820',
1346 'ext': 'mp4',
1347 'title': "'O Sole Mio",
ec85ded8 1348 'thumbnail': r're:^https?://.*\.jpg$',
d47ae7f6
S
1349 'duration': 216,
1350 'view_count': int,
1351 },
250b042c
S
1352 'params': {
1353 'skip_download': True,
1354 },
2637fadc 1355 'skip': 'This video is unavailable.',
d47ae7f6 1356 },
f8388757
S
1357 # Pladform embed
1358 {
1359 'url': 'http://muz-tv.ru/kinozal/view/7400/',
1360 'info_dict': {
1361 'id': '100183293',
1362 'ext': 'mp4',
62259846 1363 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757 1364 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
ec85ded8 1365 'thumbnail': r're:^https?://.*\.jpg$',
f8388757
S
1366 'duration': 694,
1367 'age_limit': 0,
1368 },
2637fadc 1369 'skip': 'HTTP Error 404: Not Found',
f8388757 1370 },
c798f15b
S
1371 # Playwire embed
1372 {
1373 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1374 'info_dict': {
1375 'id': '3519514',
1376 'ext': 'mp4',
1377 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
ec85ded8 1378 'thumbnail': r're:^https?://.*\.png$',
c798f15b
S
1379 'duration': 45.115,
1380 },
1381 },
ad320e9b
NJ
1382 # 5min embed
1383 {
1384 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1385 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1386 'info_dict': {
1387 'id': '518726732',
1388 'ext': 'mp4',
1389 'title': 'Facebook Creates "On This Day" | Crunch Report',
2637fadc
RA
1390 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
1391 'timestamp': 1427237531,
1392 'uploader': 'Crunch Report',
1393 'upload_date': '20150324',
1394 },
1395 'params': {
1396 # m3u8 download
1397 'skip_download': True,
ad320e9b
NJ
1398 },
1399 },
a4257017
S
1400 # Crooks and Liars embed
1401 {
1402 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1403 'info_dict': {
1404 'id': '8RUoRhRi',
1405 'ext': 'mp4',
1406 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1407 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1408 'timestamp': 1428207000,
1409 'upload_date': '20150405',
1410 'uploader': 'Heather',
1411 },
1412 },
1413 # Crooks and Liars external embed
1414 {
1415 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1416 'info_dict': {
1417 'id': 'MTE3MjUtMzQ2MzA',
1418 'ext': 'mp4',
1419 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1420 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1421 'timestamp': 1265032391,
1422 'upload_date': '20100201',
1423 'uploader': 'Heather',
1424 },
1425 },
facecb84 1426 # NBC Sports vplayer embed
a2edf2e7 1427 {
facecb84 1428 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 1429 'info_dict': {
facecb84
S
1430 'id': 'ln7x1qSThw4k',
1431 'ext': 'flv',
1432 'title': "PFT Live: New leader in the 'new-look' defense",
1433 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
0738187f
YCH
1434 'uploader': 'NBCU-SPORTS',
1435 'upload_date': '20140107',
1436 'timestamp': 1389118457,
a2edf2e7 1437 },
2637fadc 1438 'skip': 'Invalid Page URL',
418c5cc3 1439 },
de3eb07e
YCH
1440 # NBC News embed
1441 {
1442 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1443 'md5': '1aa589c675898ae6d37a17913cf68d66',
1444 'info_dict': {
2637fadc 1445 'id': 'x_dtl_oa_LettermanliftPR_160608',
de3eb07e 1446 'ext': 'mp4',
2637fadc 1447 'title': 'David Letterman: A Preview',
de3eb07e 1448 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
2637fadc
RA
1449 'upload_date': '20160609',
1450 'timestamp': 1465431544,
1451 'uploader': 'NBCU-NEWS',
de3eb07e
YCH
1452 },
1453 },
418c5cc3
YCH
1454 # UDN embed
1455 {
811586eb 1456 'url': 'https://video.udn.com/news/300346',
01c58f84 1457 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 1458 'info_dict': {
01c58f84 1459 'id': '300346',
418c5cc3 1460 'ext': 'mp4',
01c58f84 1461 'title': '中一中男師變性 全校師生力挺',
ec85ded8 1462 'thumbnail': r're:^https?://.*\.jpg$',
811586eb
YCH
1463 },
1464 'params': {
1465 # m3u8 download
1466 'skip_download': True,
1467 },
2637fadc 1468 'expected_warnings': ['Failed to parse JSON Expecting value'],
edfcf7ab 1469 },
b26733ba
YCH
1470 # Brightcove URL in single quotes
1471 {
1472 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1473 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1474 'info_dict': {
1475 'id': '4255764656001',
1476 'ext': 'mp4',
1477 'title': 'SN Presents: Russell Martin, World Citizen',
1478 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1479 'uploader': 'Rogers Sportsnet',
0738187f
YCH
1480 'uploader_id': '1704050871',
1481 'upload_date': '20150525',
1482 'timestamp': 1432570283,
b26733ba 1483 },
756f574e 1484 },
8084be78
S
1485 # OnionStudios embed
1486 {
1487 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1488 'info_dict': {
1489 'id': '2855',
1490 'ext': 'mp4',
1491 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
ec85ded8 1492 'thumbnail': r're:^https?://.*\.jpe?g$',
8084be78
S
1493 'uploader': 'ClickHole',
1494 'uploader_id': 'clickhole',
1495 }
1496 },
b8c1cc1a
S
1497 # SnagFilms embed
1498 {
1499 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1500 'info_dict': {
1501 'id': '74849a00-85a9-11e1-9660-123139220831',
1502 'ext': 'mp4',
1503 'title': '#whilewewatch',
1504 }
1505 },
a5158f38
YCH
1506 # AdobeTVVideo embed
1507 {
1508 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1509 'md5': '43662b577c018ad707a63766462b1e87',
1510 'info_dict': {
1511 'id': '2456',
1512 'ext': 'mp4',
1513 'title': 'New experience with Acrobat DC',
1514 'description': 'New experience with Acrobat DC',
1515 'duration': 248.667,
1516 },
1f812580 1517 },
ed126900 1518 # BrightcoveInPageEmbed embed
1519 {
1520 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1521 'info_dict': {
1522 'id': '4238694884001',
1523 'ext': 'flv',
1524 'title': 'Tabletop: Dread, Last Thoughts',
1525 'description': 'Tabletop: Dread, Last Thoughts',
1526 'duration': 51690,
1527 },
750b9ff0 1528 },
d10fe835
YCH
1529 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1530 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1531 {
1532 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1533 'info_dict': {
1534 'id': '4785848093001',
1535 'ext': 'mp4',
1536 'title': 'The Cardinal Pell Interview',
1537 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1538 'uploader': 'GlobeCast Australia - GlobeStream',
0738187f
YCH
1539 'uploader_id': '2733773828001',
1540 'upload_date': '20160304',
1541 'timestamp': 1457083087,
d10fe835
YCH
1542 },
1543 'params': {
1544 # m3u8 downloads
1545 'skip_download': True,
1546 },
1547 },
9edf47df
S
1548 {
1549 # Brightcove embed with whitespace around attribute names
1550 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1551 'info_dict': {
1552 'id': '3167554373001',
1553 'ext': 'mp4',
1554 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1555 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1556 'uploader_id': '1079349493',
1557 'upload_date': '20140207',
1558 'timestamp': 1391810548,
1559 },
1560 'params': {
1561 'skip_download': True,
1562 },
1563 },
134c207e
YCH
1564 # Another form of arte.tv embed
1565 {
1566 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1567 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1568 'info_dict': {
1569 'id': '030273-562_PLUS7-F',
1570 'ext': 'mp4',
1571 'title': 'ARTE Reportage - Nulle part, en France',
1572 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1573 'upload_date': '20160409',
1574 },
1575 },
b8f67449
KM
1576 # LiveLeak embed
1577 {
1578 'url': 'http://www.wykop.pl/link/3088787/',
09747ba7 1579 'md5': '7619da8c820e835bef21a1efa2a0fc71',
b8f67449
KM
1580 'info_dict': {
1581 'id': '874_1459135191',
1582 'ext': 'mp4',
1583 'title': 'Man shows poor quality of new apartment building',
1584 'description': 'The wall is like a sand pile.',
1585 'uploader': 'Lake8737',
09747ba7
YCH
1586 },
1587 'add_ie': [LiveLeakIE.ie_key()],
1588 },
1589 # Another LiveLeak embed pattern (#13336)
1590 {
1591 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
1592 'info_dict': {
1593 'id': '2eb_1496309988',
1594 'ext': 'mp4',
1595 'title': 'Thief robs place where everyone was armed',
1596 'description': 'md5:694d73ee79e535953cf2488562288eee',
1597 'uploader': 'brazilwtf',
1598 },
1599 'add_ie': [LiveLeakIE.ie_key()],
b8f67449 1600 },
4a120778
YCH
1601 # Duplicated embedded video URLs
1602 {
1603 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1604 'info_dict': {
1605 'id': '149298443_480_16c25b74_2',
1606 'ext': 'mp4',
1607 'title': 'vs. Blue Orange Spring Game',
1608 'uploader': 'www.hudl.com',
1609 },
1610 },
371ddb14
S
1611 # twitter:player:stream embed
1612 {
1613 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1614 'info_dict': {
1615 'id': 'master',
1616 'ext': 'mp4',
1617 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1618 'uploader': 'www.rtl.be',
1619 },
1620 'params': {
1621 # m3u8 downloads
1622 'skip_download': True,
1623 },
1624 },
32917907
RA
1625 # twitter:player embed
1626 {
1627 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1628 'md5': 'a3e0df96369831de324f0778e126653c',
1629 'info_dict': {
1630 'id': '4909620399001',
1631 'ext': 'mp4',
1632 'title': 'What Do Black Holes Sound Like?',
1633 'description': 'what do black holes sound like',
1634 'upload_date': '20160524',
1635 'uploader_id': '29913724001',
1636 'timestamp': 1464107587,
1637 'uploader': 'TheAtlantic',
1638 },
1639 'add_ie': ['BrightcoveLegacy'],
fd6ca382
YCH
1640 },
1641 # Facebook <iframe> embed
1642 {
1643 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
dbf0157a 1644 'md5': 'fbcde74f534176ecb015849146dd3aee',
fd6ca382
YCH
1645 'info_dict': {
1646 'id': '599637780109885',
1647 'ext': 'mp4',
1648 'title': 'Facebook video #599637780109885',
1649 },
1650 },
fd1c5fba
S
1651 # Facebook <iframe> embed, plugin video
1652 {
1653 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
1654 'info_dict': {
1655 'id': '1754168231264132',
1656 'ext': 'mp4',
1657 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
1658 'uploader': 'Tariq Ramadan (official)',
1659 'timestamp': 1496758379,
1660 'upload_date': '20170606',
1661 },
1662 'params': {
1663 'skip_download': True,
1664 },
1665 },
fd6ca382
YCH
1666 # Facebook API embed
1667 {
1668 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
dbf0157a 1669 'md5': 'a47372ee61b39a7b90287094d447d94e',
fd6ca382
YCH
1670 'info_dict': {
1671 'id': '10153467542406923',
1672 'ext': 'mp4',
1673 'title': 'Facebook video #10153467542406923',
1674 },
7deef1ba
YCH
1675 },
1676 # Wordpress "YouTube Video Importer" plugin
1677 {
1678 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
dbf0157a 1679 'md5': 'd16797741b560b485194eddda8121b48',
7deef1ba
YCH
1680 'info_dict': {
1681 'id': 'HNTXWDXV9Is',
1682 'ext': 'mp4',
1683 'title': 'Blue Devils Drumline Stanford lot 2016',
1684 'upload_date': '20160627',
1685 'uploader_id': 'GENOCIDE8GENERAL10',
1686 'uploader': 'cylus cyrus',
1687 },
1688 },
81953d1a
RA
1689 {
1690 # video stored on custom kaltura server
1691 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1692 'md5': '537617d06e64dfed891fa1593c4b30cc',
1693 'info_dict': {
1694 'id': '0_1iotm5bh',
1695 'ext': 'mp4',
1696 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1697 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1698 'uploader_id': 'videos.expansion@el-mundo.net',
1699 'upload_date': '20150429',
1700 'timestamp': 1430303472,
1701 },
1702 'add_ie': ['Kaltura'],
1703 },
c03adf90
YCH
1704 {
1705 # Non-standard Vimeo embed
1706 'url': 'https://openclassrooms.com/courses/understanding-the-web',
1707 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1708 'info_dict': {
1709 'id': '148867247',
1710 'ext': 'mp4',
1711 'title': 'Understanding the web - Teaser',
1712 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1713 'upload_date': '20151214',
1714 'uploader': 'OpenClassrooms',
1715 'uploader_id': 'openclassrooms',
1716 },
1717 'add_ie': ['Vimeo'],
1718 },
a5ff05df
S
1719 {
1720 # generic vimeo embed that requires original URL passed as Referer
1721 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1722 'only_matching': True,
1723 },
1979969f
S
1724 {
1725 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1726 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1727 'info_dict': {
1728 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1729 'ext': 'mp4',
1730 'title': 'Big Buck Bunny',
1731 'description': 'Royalty free test video',
1732 'timestamp': 1432816365,
1733 'upload_date': '20150528',
1734 'is_live': False,
1735 },
1736 'params': {
1737 'skip_download': True,
1738 },
1739 'add_ie': [ArkenaIE.ie_key()],
1740 },
2a1321a2
S
1741 {
1742 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1743 'info_dict': {
1744 'id': '1c7141f46c',
1745 'ext': 'mp4',
1746 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1747 },
1748 'params': {
1749 'skip_download': True,
1750 },
1751 'add_ie': [Vbox7IE.ie_key()],
1752 },
b0c8f2e9
DR
1753 {
1754 # DBTV embeds
1755 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
fd3ec986
S
1756 'info_dict': {
1757 'id': '43254897',
1758 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1759 },
b0c8f2e9
DR
1760 'playlist_mincount': 3,
1761 },
e186a9ec
S
1762 {
1763 # Videa embeds
1764 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1765 'info_dict': {
1766 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1767 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1768 },
1769 'playlist_mincount': 2,
1770 },
b687c85e
S
1771 {
1772 # 20 minuten embed
1773 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1774 'info_dict': {
1775 'id': '523629',
1776 'ext': 'mp4',
1777 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1778 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1779 },
1780 'params': {
1781 'skip_download': True,
1782 },
1783 'add_ie': [TwentyMinutenIE.ie_key()],
6ef3e65a
S
1784 },
1785 {
1786 # VideoPress embed
1787 'url': 'https://en.support.wordpress.com/videopress/',
1788 'info_dict': {
1789 'id': 'OcobLTqC',
1790 'ext': 'm4v',
1791 'title': 'IMG_5786',
1792 'timestamp': 1435711927,
1793 'upload_date': '20150701',
1794 },
1795 'params': {
1796 'skip_download': True,
1797 },
1798 'add_ie': [VideoPressIE.ie_key()],
fef51645 1799 },
eb3079b6
S
1800 {
1801 # Rutube embed
1802 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1803 'info_dict': {
1804 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1805 'ext': 'flv',
1806 'title': 'Магаззино: Казань 2',
1807 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1808 'uploader': 'Магаззино',
1809 'upload_date': '20170228',
1810 'uploader_id': '996642',
1811 },
1812 'params': {
1813 'skip_download': True,
1814 },
1815 'add_ie': [RutubeIE.ie_key()],
1816 },
fef51645
YCH
1817 {
1818 # ThePlatform embedded with whitespaces in URLs
1819 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1820 'only_matching': True,
1821 },
97952bdb
JH
1822 {
1823 # Senate ISVP iframe https
1824 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1825 'md5': 'fb8c70b0b515e5037981a2492099aab8',
1826 'info_dict': {
1827 'id': 'govtaff020316',
1828 'ext': 'mp4',
1829 'title': 'Integrated Senate Video Player',
1830 },
1831 'add_ie': [SenateISVPIE.ie_key()],
1832 },
ab87c260
S
1833 {
1834 # Limelight embeds (1 channel embed + 4 media embeds)
1835 'url': 'http://www.sedona.com/FacilitatorTraining2017',
1836 'info_dict': {
1837 'id': 'FacilitatorTraining2017',
1838 'title': 'Facilitator Training 2017',
1839 },
1840 'playlist_mincount': 5,
1841 },
eb02940c
S
1842 {
1843 # Limelight embed (LimelightPlayerUtil.embed)
1844 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
1845 'info_dict': {
1846 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
1847 'ext': 'mp4',
1848 'title': '07448641',
1849 'timestamp': 1499890639,
1850 'upload_date': '20170712',
1851 },
1852 'params': {
1853 'skip_download': True,
1854 },
1855 'add_ie': ['LimelightMedia'],
1856 },
7986c3ab
S
1857 {
1858 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1859 'info_dict': {
1860 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1861 'title': 'Standoff with Walnut Creek murder suspect ends',
1862 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1863 },
1864 'playlist_mincount': 4,
1865 },
55719459
JH
1866 {
1867 # WashingtonPost embed
1868 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1869 'info_dict': {
1870 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1871 'ext': 'mp4',
1872 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1873 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1874 'timestamp': 1455216756,
1875 'uploader': 'The Washington Post',
1876 'upload_date': '20160211',
1877 },
1878 'add_ie': [WashingtonPostIE.ie_key()],
1879 },
2b8e6a68
S
1880 {
1881 # Mediaset embed
1882 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
1883 'info_dict': {
1884 'id': '720642',
1885 'ext': 'mp4',
1886 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
1887 },
1888 'params': {
1889 'skip_download': True,
1890 },
1891 'add_ie': [MediasetIE.ie_key()],
1892 },
73cf76a9
S
1893 {
1894 # JOJ.sk embeds
1895 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1896 'info_dict': {
1897 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1898 'title': 'Slovenskom sa prehnala vlna silných búrok',
1899 },
1900 'playlist_mincount': 5,
1901 'add_ie': [JojIE.ie_key()],
1902 },
4328ddf8
S
1903 {
1904 # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
1905 'url': 'https://tvrain.ru/amp/418921/',
1906 'md5': 'cc00413936695987e8de148b67d14f1d',
1907 'info_dict': {
1908 'id': '418921',
1909 'ext': 'mp4',
1910 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1911 },
1912 },
41918eaa 1913 {
1914 # vzaar embed
1663bd6e
S
1915 'url': 'http://help.vzaar.com/article/165-embedding-video',
1916 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
41918eaa 1917 'info_dict': {
1663bd6e 1918 'id': '8707641',
41918eaa 1919 'ext': 'mp4',
1663bd6e 1920 'title': 'Building A Business Online: Principal Chairs Q & A',
41918eaa 1921 },
1922 },
9ce1ac40 1923 {
1924 # multiple HTML5 videos on one page
1925 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
1926 'info_dict': {
1927 'id': 'keyscenarios',
1928 'title': 'Rescue Kit 14 Free Edition - Getting started',
1929 },
1930 'playlist_count': 4,
0987f2dd
T
1931 },
1932 {
1933 # vshare embed
1934 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
1935 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
1936 'info_dict': {
1937 'id': '0f64ce6',
1938 'title': 'vl14062007715967',
1939 'ext': 'mp4',
1940 }
2ca7ed41
S
1941 },
1942 {
1943 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
1944 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
1945 'info_dict': {
1946 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
1947 'ext': 'mp4',
1948 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
1949 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
1950 'timestamp': 1474354800,
1951 'upload_date': '20160920',
1952 }
7d540621
S
1953 },
1954 {
1955 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
1956 'info_dict': {
1957 'id': '1731611',
1958 'ext': 'mp4',
1959 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
1960 'description': 'md5:eb5f23826a027ba95277d105f248b825',
1961 'timestamp': 1516100691,
1962 'upload_date': '20180116',
1963 },
1964 'params': {
1965 'skip_download': True,
1966 },
1967 'add_ie': [SpringboardPlatformIE.ie_key()],
ea696249
S
1968 },
1969 {
1970 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1971 'info_dict': {
1972 'id': 'uPDB5I9wfp8',
1973 'ext': 'webm',
1974 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1975 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1976 'upload_date': '20160219',
1977 'uploader': 'Pocoyo - Português (BR)',
1978 'uploader_id': 'PocoyoBrazil',
1979 },
1980 'add_ie': [YoutubeIE.ie_key()],
1981 'params': {
1982 'skip_download': True,
1983 },
4c780fbd
S
1984 },
1985 {
1986 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
1987 'info_dict': {
1988 'id': 'vMDE4NzI1Mjgt690b',
1989 'ext': 'mp4',
1990 'title': 'Котята',
1991 },
1992 'add_ie': [YapFilesIE.ie_key()],
1993 'params': {
1994 'skip_download': True,
1995 },
1fc37ca3 1996 },
660a230b
S
1997 {
1998 # CloudflareStream embed
1999 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
2000 'info_dict': {
2001 'id': '31c9291ab41fac05471db4e73aa11717',
2002 'ext': 'mp4',
2003 'title': '31c9291ab41fac05471db4e73aa11717',
2004 },
2005 'add_ie': [CloudflareStreamIE.ie_key()],
2006 'params': {
2007 'skip_download': True,
2008 },
2009 },
6bd499e8
S
2010 {
2011 # PeerTube embed
2012 'url': 'https://joinpeertube.org/fr/home/',
2013 'info_dict': {
2014 'id': 'home',
2015 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
2016 },
2017 'playlist_count': 2,
2018 },
aee36ca8
S
2019 {
2020 # Indavideo embed
2021 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
2022 'info_dict': {
2023 'id': '1693903',
2024 'ext': 'mp4',
2025 'title': 'Így kell otthon hamburgert sütni',
2026 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
2027 'timestamp': 1426330212,
2028 'upload_date': '20150314',
2029 'uploader': 'StreetKitchen',
2030 'uploader_id': '546363',
2031 },
2032 'add_ie': [IndavideoEmbedIE.ie_key()],
2033 'params': {
2034 'skip_download': True,
2035 },
2036 },
cfd7f2a6
S
2037 {
2038 # APA embed via JWPlatform embed
2039 'url': 'http://www.vol.at/blue-man-group/5593454',
2040 'info_dict': {
2041 'id': 'jjv85FdZ',
2042 'ext': 'mp4',
2043 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
2044 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2045 'thumbnail': r're:^https?://.*\.jpg$',
2046 'duration': 254,
2047 'timestamp': 1519211149,
2048 'upload_date': '20180221',
2049 },
2050 'params': {
2051 'skip_download': True,
2052 },
2053 },
1fc37ca3
SO
2054 {
2055 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
2056 'md5': 'b68d276de422ab07ee1d49388103f457',
2057 'info_dict': {
2058 'id': '83645793',
2059 'title': 'Lock up and get excited',
1fc37ca3 2060 'ext': 'mp4'
d3431dcb
S
2061 },
2062 'skip': 'TODO: fix nested playlists processing in tests',
2063 },
9d1b2138
S
2064 {
2065 # Viqeo embeds
2066 'url': 'https://viqeo.tv/',
2067 'info_dict': {
2068 'id': 'viqeo',
2069 'title': 'All-new video platform',
2070 },
2071 'playlist_count': 6,
2072 },
e0b6e988
S
2073 {
2074 # videojs embed
2075 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
2076 'info_dict': {
2077 'id': 'shell',
2078 'ext': 'mp4',
2079 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
2080 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
2081 'thumbnail': r're:^https?://.*\.jpg$',
2082 },
2083 'params': {
2084 'skip_download': True,
2085 },
2086 'expected_warnings': ['Failed to download MPD manifest'],
2087 },
6e6b70d6
S
2088 # {
2089 # # TODO: find another test
2090 # # http://schema.org/VideoObject
2091 # 'url': 'https://flipagram.com/f/nyvTSJMKId',
2092 # 'md5': '888dcf08b7ea671381f00fab74692755',
2093 # 'info_dict': {
2094 # 'id': 'nyvTSJMKId',
2095 # 'ext': 'mp4',
2096 # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
2097 # 'description': '#love for cats.',
2098 # 'timestamp': 1461244995,
2099 # 'upload_date': '20160421',
2100 # },
2101 # 'params': {
2102 # 'force_generic_extractor': True,
2103 # },
2104 # }
cfe50f04 2105 ]
9b122384 2106
9b122384
PH
2107 def report_following_redirect(self, new_url):
2108 """Report information extraction."""
79649588 2109 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 2110
4fc946b5
PH
2111 def _extract_rss(self, url, video_id, doc):
2112 playlist_title = doc.find('./channel/title').text
2113 playlist_desc_el = doc.find('./channel/description')
2114 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
2115
76c73715
PH
2116 entries = []
2117 for it in doc.findall('./channel/item'):
01aec848
BG
2118 next_url = None
2119 enclosure_nodes = it.findall('./enclosure')
2120 for e in enclosure_nodes:
2121 next_url = e.attrib.get('url')
2122 if next_url:
2123 break
2124
76c73715 2125 if not next_url:
01aec848 2126 next_url = xpath_text(it, 'link', fatal=False)
76c73715
PH
2127
2128 if not next_url:
2129 continue
2130
2131 entries.append({
413c1f8e 2132 '_type': 'url_transparent',
76c73715
PH
2133 'url': next_url,
2134 'title': it.find('title').text,
2135 })
4fc946b5
PH
2136
2137 return {
2138 '_type': 'playlist',
2139 'id': url,
2140 'title': playlist_title,
2141 'description': playlist_desc,
2142 'entries': entries,
2143 }
2144
c8e9a235
PH
2145 def _extract_camtasia(self, url, video_id, webpage):
2146 """ Returns None if no camtasia video can be found. """
2147
2148 camtasia_cfg = self._search_regex(
2149 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
2150 webpage, 'camtasia configuration file', default=None)
2151 if camtasia_cfg is None:
2152 return None
2153
2154 title = self._html_search_meta('DC.title', webpage, fatal=True)
2155
2156 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
2157 camtasia_cfg = self._download_xml(
2158 camtasia_url, video_id,
2159 note='Downloading camtasia configuration',
2160 errnote='Failed to download camtasia configuration')
2161 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
2162
2163 entries = []
2164 for n in fileset_node.getchildren():
2165 url_n = n.find('./uri')
2166 if url_n is None:
2167 continue
2168
2169 entries.append({
2170 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
2171 'title': '%s - %s' % (title, n.tag),
2172 'url': compat_urlparse.urljoin(url, url_n.text),
2173 'duration': float_or_none(n.find('./duration').text),
2174 })
2175
2176 return {
2177 '_type': 'playlist',
2178 'entries': entries,
2179 'title': title,
2180 }
2181
9b122384 2182 def _real_extract(self, url):
ebd3c7b3
PH
2183 if url.startswith('//'):
2184 return {
2185 '_type': 'url',
20991253 2186 'url': self.http_scheme() + url,
ebd3c7b3
PH
2187 }
2188
a7130543
JMF
2189 parsed_url = compat_urlparse.urlparse(url)
2190 if not parsed_url.scheme:
04b4d394
PH
2191 default_search = self._downloader.params.get('default_search')
2192 if default_search is None:
1f7ccb90 2193 default_search = 'fixup_error'
04b4d394 2194
1f7ccb90 2195 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
2196 if '/' in url:
2197 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
2198 return self.url_result('http://' + url)
1f7ccb90 2199 elif default_search != 'fixup_error':
9c1fc022 2200 if default_search == 'auto_warning':
0e67ab0d
PH
2201 if re.match(r'^(?:url|URL)$', url):
2202 raise ExtractorError(
2203 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
2204 expected=True)
2205 else:
2206 self._downloader.report_warning(
7571c02c 2207 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 2208 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
2209
2210 if default_search in ('error', 'fixup_error'):
7571c02c 2211 raise ExtractorError(
b74e86f4
PH
2212 '%r is not a valid URL. '
2213 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
2214 % (url, url), expected=True)
04b4d394 2215 else:
f2f2c0c2
PH
2216 if ':' not in default_search:
2217 default_search += ':'
04b4d394 2218 return self.url_result(default_search + url)
4d54ef20
PH
2219
2220 url, smuggled_data = unsmuggle_url(url)
2221 force_videoid = None
d6e6a422 2222 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
2223 if smuggled_data and 'force_videoid' in smuggled_data:
2224 force_videoid = smuggled_data['force_videoid']
2225 video_id = force_videoid
2226 else:
9dcd6fd3 2227 video_id = self._generic_id(url)
3d83a1ae 2228
79649588 2229 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 2230
ebab4520 2231 head_req = HEADRequest(url)
23be51d8 2232 head_response = self._request_webpage(
ebab4520
PH
2233 head_req, video_id,
2234 note=False, errnote='Could not send HEAD request to %s' % url,
2235 fatal=False)
42393ce2 2236
23be51d8 2237 if head_response is not False:
42393ce2 2238 # Check for redirect
5551d771 2239 new_url = compat_str(head_response.geturl())
42393ce2
PH
2240 if url != new_url:
2241 self.report_following_redirect(new_url)
4d54ef20
PH
2242 if force_videoid:
2243 new_url = smuggle_url(
2244 new_url, {'force_videoid': force_videoid})
cecaaf3f 2245 return self.url_result(new_url)
42393ce2 2246
23be51d8
PH
2247 full_response = None
2248 if head_response is False:
5c2266df 2249 request = sanitized_Request(url)
58bde34a
S
2250 request.add_header('Accept-Encoding', '*')
2251 full_response = self._request_webpage(request, video_id)
23be51d8
PH
2252 head_response = full_response
2253
f930e0c7
S
2254 info_dict = {
2255 'id': video_id,
9dcd6fd3 2256 'title': self._generic_title(url),
303dcdb9 2257 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
f930e0c7
S
2258 }
2259
23be51d8 2260 # Check for direct link to a video
955737b2 2261 content_type = head_response.headers.get('Content-Type', '').lower()
263eff95 2262 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
23be51d8 2263 if m:
4e363703 2264 format_id = compat_str(m.group('format_id'))
f930e0c7 2265 if format_id.endswith('mpegurl'):
eadc3ccd 2266 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
f930e0c7
S
2267 elif format_id == 'f4m':
2268 formats = self._extract_f4m_formats(url, video_id)
eadc3ccd 2269 else:
2270 formats = [{
4e363703 2271 'format_id': format_id,
eadc3ccd 2272 'url': url,
2273 'vcodec': 'none' if m.group('type') == 'audio' else None
2274 }]
de6c51e8 2275 info_dict['direct'] = True
19dbaeec 2276 self._sort_formats(formats)
de6c51e8 2277 info_dict['formats'] = formats
f930e0c7 2278 return info_dict
42393ce2 2279
d6e6a422 2280 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
2281 force = self._downloader.params.get('force_generic_extractor', False)
2282 self._downloader.report_warning(
2283 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 2284
4e262a88 2285 if not full_response:
5c2266df 2286 request = sanitized_Request(url)
58bde34a
S
2287 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
2288 # making it impossible to download only chunk of the file (yet we need only 512kB to
2289 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
2290 # that will always result in downloading the whole file that is not desirable.
2291 # Therefore for extraction pass we have to override Accept-Encoding to any in order
2292 # to accept raw bytes and being able to download only a chunk.
2293 # It may probably better to solve this by checking Content-Type for application/octet-stream
2294 # after HEAD request finishes, but not sure if we can rely on this.
2295 request.add_header('Accept-Encoding', '*')
2296 full_response = self._request_webpage(request, video_id)
4e262a88 2297
5940862d
S
2298 first_bytes = full_response.read(512)
2299
2300 # Is it an M3U playlist?
0d769bcb 2301 if first_bytes.startswith(b'#EXTM3U'):
5940862d 2302 info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
19dbaeec 2303 self._sort_formats(info_dict['formats'])
5940862d
S
2304 return info_dict
2305
4e262a88
PH
2306 # Maybe it's a direct link to a video?
2307 # Be careful not to download the whole thing!
61ca9a80 2308 if not is_html(first_bytes):
4e262a88
PH
2309 self._downloader.report_warning(
2310 'URL could be a direct video link, returning it as such.')
f930e0c7 2311 info_dict.update({
4e262a88
PH
2312 'direct': True,
2313 'url': url,
f930e0c7
S
2314 })
2315 return info_dict
4e262a88
PH
2316
2317 webpage = self._webpage_read_content(
2318 full_response, url, video_id, prefix=first_bytes)
2319
9b122384 2320 self.report_extraction(video_id)
887c6acd 2321
1b840245 2322 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
4fc946b5 2323 try:
f7854627 2324 doc = compat_etree_fromstring(webpage.encode('utf-8'))
4fc946b5
PH
2325 if doc.tag == 'rss':
2326 return self._extract_rss(url, video_id, doc)
cc99a77a
S
2327 elif doc.tag == 'SmoothStreamingMedia':
2328 info_dict['formats'] = self._parse_ism_formats(doc, url)
2329 self._sort_formats(info_dict['formats'])
2330 return info_dict
e5e8d20a 2331 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
19dbaeec
S
2332 smil = self._parse_smil(doc, url, video_id)
2333 self._sort_formats(smil['formats'])
2334 return smil
729accb4 2335 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
96b8b9ab 2336 return self.playlist_result(
47a5cb77
S
2337 self._parse_xspf(
2338 doc, video_id, xspf_url=url,
2339 xspf_base_url=compat_str(full_response.geturl())),
96b8b9ab 2340 video_id)
1b840245 2341 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
f930e0c7 2342 info_dict['formats'] = self._parse_mpd_formats(
d3f8b76b 2343 doc,
5551d771 2344 mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
86f4d14f 2345 mpd_url=url)
19dbaeec 2346 self._sort_formats(info_dict['formats'])
f930e0c7
S
2347 return info_dict
2348 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
2349 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
19dbaeec 2350 self._sort_formats(info_dict['formats'])
f930e0c7 2351 return info_dict
f7300c5c 2352 except compat_xml_parse_error:
4fc946b5
PH
2353 pass
2354
c8e9a235
PH
2355 # Is it a Camtasia project?
2356 camtasia_res = self._extract_camtasia(url, video_id, webpage)
2357 if camtasia_res is not None:
2358 return camtasia_res
2359
14390730
S
2360 # Sometimes embedded video player is hidden behind percent encoding
2361 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
2362 # Unescaping the whole page allows to handle those cases in a generic way
45eedbe5 2363 webpage = compat_urllib_parse_unquote(webpage)
1f7659db 2364
887c6acd
PH
2365 # it's tempting to parse this further, but you would
2366 # have to take into account all the variations like
2367 # Video Title - Site Name
2368 # Site Name | Video Title
2369 # Video Title - Tagline | Site Name
2370 # and so on and so forth; it's just not practical
6f41b2bc
S
2371 video_title = self._og_search_title(
2372 webpage, default=None) or self._html_search_regex(
79649588
PH
2373 r'(?s)<title>(.*?)</title>', webpage, 'video title',
2374 default='video')
ef4fd848 2375
4d805e06
PH
2376 # Try to detect age limit automatically
2377 age_limit = self._rta_search(webpage)
2378 # And then there are the jokers who advertise that they use RTA,
2379 # but actually don't.
2380 AGE_LIMIT_MARKERS = [
197224b7 2381 r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
4d805e06
PH
2382 ]
2383 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2384 age_limit = 18
2385
ef4fd848
PH
2386 # video uploader is domain name
2387 video_uploader = self._search_regex(
79649588 2388 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 2389
6f41b2bc
S
2390 video_description = self._og_search_description(webpage, default=None)
2391 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2392
b311b0ea
S
2393 info_dict.update({
2394 'title': video_title,
2395 'description': video_description,
2396 'thumbnail': video_thumbnail,
2397 'age_limit': age_limit,
2398 })
2399
1f4b722b 2400 # Look for Brightcove Legacy Studio embeds
4fcaa4f4 2401 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
99877772 2402 if bc_urls:
99877772
PH
2403 entries = [{
2404 '_type': 'url',
2405 'url': smuggle_url(bc_url, {'Referer': url}),
3b7d9aa4 2406 'ie_key': 'BrightcoveLegacy'
99877772
PH
2407 } for bc_url in bc_urls]
2408
2409 return {
2410 '_type': 'playlist',
2411 'title': video_title,
2412 'id': video_id,
2413 'entries': entries,
2414 }
cfe50f04 2415
f6519f89 2416 # Look for Brightcove New Studio embeds
0254f93b 2417 bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
f6519f89 2418 if bc_urls:
5399ab3f
S
2419 return self.playlist_from_matches(
2420 bc_urls, video_id, video_title,
2421 getter=lambda x: smuggle_url(x, {'referrer': url}),
2422 ie='BrightcoveNew')
ed126900 2423
4e826cd9
S
2424 # Look for Nexx embeds
2425 nexx_urls = NexxIE._extract_urls(webpage)
2426 if nexx_urls:
2427 return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
2428
3f59b015
S
2429 # Look for Nexx iFrame embeds
2430 nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
2431 if nexx_embed_urls:
2432 return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
2433
4d8819d2
S
2434 # Look for ThePlatform embeds
2435 tp_urls = ThePlatformIE._extract_urls(webpage)
2436 if tp_urls:
46b18f23 2437 return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
4d8819d2 2438
48a5eabc
S
2439 # Look for Vessel embeds
2440 vessel_urls = VesselIE._extract_urls(webpage)
2441 if vessel_urls:
46b18f23 2442 return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
48a5eabc 2443
59b8ab58
PH
2444 # Look for embedded rtl.nl player
2445 matches = re.findall(
2637fadc 2446 r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
59b8ab58
PH
2447 webpage)
2448 if matches:
46b18f23 2449 return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
59b8ab58 2450
09b9c45e
S
2451 vimeo_urls = VimeoIE._extract_urls(url, webpage)
2452 if vimeo_urls:
46b18f23 2453 return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
7115ca84 2454
a1b85269
YCH
2455 vid_me_embed_url = self._search_regex(
2456 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
2457 webpage, 'vid.me embed', default=None)
2458 if vid_me_embed_url is not None:
2459 return self.url_result(vid_me_embed_url, 'Vidme')
2460
66c9fa36
S
2461 # Look for YouTube embeds
2462 youtube_urls = YoutubeIE._extract_urls(webpage)
2463 if youtube_urls:
46b18f23 2464 return self.playlist_from_matches(
66c9fa36 2465 youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
7deef1ba 2466
ad213a1d 2467 matches = DailymotionIE._extract_urls(webpage)
355e4fd0 2468 if matches:
46b18f23 2469 return self.playlist_from_matches(matches, video_id, video_title)
355e4fd0 2470
8489578d
NJ
2471 # Look for embedded Dailymotion playlist player (#3822)
2472 m = re.search(
2473 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2474 if m:
2475 playlists = re.findall(
2476 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2477 if playlists:
46b18f23
JH
2478 return self.playlist_from_matches(
2479 playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
8489578d 2480
71a1db89
S
2481 # Look for DailyMail embeds
2482 dailymail_urls = DailyMailIE._extract_urls(webpage)
2483 if dailymail_urls:
2484 return self.playlist_from_matches(
2485 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
2486
ef4fd848 2487 # Look for embedded Wistia player
58bb4402
S
2488 wistia_url = WistiaIE._extract_url(webpage)
2489 if wistia_url:
ef4fd848
PH
2490 return {
2491 '_type': 'url_transparent',
58bb4402
S
2492 'url': self._proto_relative_url(wistia_url),
2493 'ie_key': WistiaIE.ie_key(),
ef4fd848 2494 'uploader': video_uploader,
ef4fd848 2495 }
5f6a1245 2496
bab19a8e
S
2497 # Look for SVT player
2498 svt_url = SVTIE._extract_url(webpage)
2499 if svt_url:
2500 return self.url_result(svt_url, 'SVT')
2501
c19f7764
JMF
2502 # Look for Bandcamp pages with custom domain
2503 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2504 if mobj is not None:
2505 burl = unescapeHTML(mobj.group(1))
09804265
JMF
2506 # Don't set the extractor because it can be a track url or an album
2507 return self.url_result(burl)
c19f7764 2508
f25571ff
PH
2509 # Look for embedded Vevo player
2510 mobj = re.search(
2511 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2512 if mobj is not None:
2513 return self.url_result(mobj.group('url'))
796df3c6
S
2514
2515 # Look for embedded Viddler player
cb454b33
S
2516 mobj = re.search(
2517 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2518 webpage)
796df3c6
S
2519 if mobj is not None:
2520 return self.url_result(mobj.group('url'))
f25571ff 2521
3378d67a
S
2522 # Look for NYTimes player
2523 mobj = re.search(
2524 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2525 webpage)
2526 if mobj is not None:
2527 return self.url_result(mobj.group('url'))
2528
cefdf970
S
2529 # Look for Libsyn player
2530 mobj = re.search(
2531 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2532 if mobj is not None:
2533 return self.url_result(mobj.group('url'))
2534
c0d0b01f 2535 # Look for Ooyala videos
8a37aa15 2536 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 2537 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
198d4cb4 2538 re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
2539 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2540 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 2541 if mobj is not None:
9837cb75
RA
2542 embed_token = self._search_regex(
2543 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2544 webpage, 'ooyala embed token', default=None)
2545 return OoyalaIE._build_url_result(smuggle_url(
2546 mobj.group('ec'), {
2547 'domain': url,
2548 'embed_token': embed_token,
2549 }))
c0d0b01f 2550
f076b638 2551 # Look for multiple Ooyala embeds on SBN network websites
2552 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2553 if mobj is not None:
2554 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2555 if embeds:
46b18f23
JH
2556 return self.playlist_from_matches(
2557 embeds, video_id, video_title,
2558 getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
f076b638 2559
aa94a6d3 2560 # Look for Aparat videos
48099643 2561 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
2562 if mobj is not None:
2563 return self.url_result(mobj.group(1), 'Aparat')
2564
c93c2ab1 2565 # Look for MPORA videos
c3f51436 2566 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
2567 if mobj is not None:
2568 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 2569
15c0e8e7 2570 # Look for embedded NovaMov-based player
8f89e687 2571 mobj = re.search(
8dfa187b 2572 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
2573 (?P<url>http://(?:(?:embed|www)\.)?
2574 (?:novamov\.com|
2575 nowvideo\.(?:ch|sx|eu|at|ag|co)|
2576 videoweed\.(?:es|com)|
2577 movshare\.(?:net|sx|ag)|
2578 divxstage\.(?:eu|net|ch|co|at|ag))
2579 /embed\.php.+?)\1''', webpage)
8f89e687 2580 if mobj is not None:
15c0e8e7 2581 return self.url_result(mobj.group('url'))
50f56607 2582
9834872b 2583 # Look for embedded Facebook player
0646e34c
S
2584 facebook_urls = FacebookIE._extract_urls(webpage)
2585 if facebook_urls:
2586 return self.playlist_from_matches(facebook_urls, video_id, video_title)
9834872b 2587
ca97a56e
S
2588 # Look for embedded VK player
2589 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2590 if mobj is not None:
2591 return self.url_result(mobj.group('url'), 'VK')
2592
33d4fdab
S
2593 # Look for embedded Odnoklassniki player
2594 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2595 if mobj is not None:
2596 return self.url_result(mobj.group('url'), 'Odnoklassniki')
2597
0364fa8b
S
2598 # Look for embedded ivi player
2599 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2600 if mobj is not None:
2601 return self.url_result(mobj.group('url'), 'Ivi')
2602
db1f3888
PH
2603 # Look for embedded Huffington Post player
2604 mobj = re.search(
c3f51436 2605 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
2606 if mobj is not None:
2607 return self.url_result(mobj.group('url'), 'HuffPost')
2608
1b86cc41 2609 # Look for embed.ly
2610 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2611 if mobj is not None:
2612 return self.url_result(mobj.group('url'))
2613 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2614 if mobj is not None:
f7e6f7fa 2615 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 2616
60cc4dc4
PH
2617 # Look for funnyordie embed
2618 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2619 if matches:
46b18f23
JH
2620 return self.playlist_from_matches(
2621 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 2622
db546cf8
S
2623 # Look for BBC iPlayer embed
2624 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2625 if matches:
46b18f23 2626 return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
db546cf8 2627
93d020dd
S
2628 # Look for embedded RUTV player
2629 rutv_url = RUTVIE._extract_url(webpage)
2630 if rutv_url:
2631 return self.url_result(rutv_url, 'RUTV')
2632
494f20cb 2633 # Look for embedded TVC player
b8599718
S
2634 tvc_url = TVCIE._extract_url(webpage)
2635 if tvc_url:
2636 return self.url_result(tvc_url, 'TVC')
494f20cb 2637
d40a3b5b
S
2638 # Look for embedded SportBox player
2639 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2640 if sportbox_urls:
46b18f23 2641 return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
d40a3b5b 2642
2bb5b6d0
S
2643 # Look for embedded XHamster player
2644 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2645 if xhamster_urls:
46b18f23 2646 return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2bb5b6d0 2647
2c9ca782
S
2648 # Look for embedded TNAFlixNetwork player
2649 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2650 if tnaflix_urls:
46b18f23 2651 return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2c9ca782 2652
b52c9ef1
S
2653 # Look for embedded PornHub player
2654 pornhub_urls = PornHubIE._extract_urls(webpage)
2655 if pornhub_urls:
46b18f23 2656 return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
b52c9ef1 2657
37e7a71c
S
2658 # Look for embedded DrTuber player
2659 drtuber_urls = DrTuberIE._extract_urls(webpage)
2660 if drtuber_urls:
46b18f23 2661 return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
37e7a71c 2662
e28ed498
S
2663 # Look for embedded RedTube player
2664 redtube_urls = RedTubeIE._extract_urls(webpage)
2665 if redtube_urls:
46b18f23 2666 return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
e28ed498 2667
06993715
S
2668 # Look for embedded Tube8 player
2669 tube8_urls = Tube8IE._extract_urls(webpage)
2670 if tube8_urls:
2671 return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
2672
9872d311
S
2673 # Look for embedded Tvigle player
2674 mobj = re.search(
2675 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2676 if mobj is not None:
2677 return self.url_result(mobj.group('url'), 'Tvigle')
2678
7e2ede98
JMF
2679 # Look for embedded TED player
2680 mobj = re.search(
d7cc31b6 2681 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
2682 if mobj is not None:
2683 return self.url_result(mobj.group('url'), 'TED')
2684
5c386252 2685 # Look for embedded Ustream videos
d77ac737
YCH
2686 ustream_url = UstreamIE._extract_url(webpage)
2687 if ustream_url:
2688 return self.url_result(ustream_url, UstreamIE.ie_key())
5c386252 2689
893f8832
PH
2690 # Look for embedded arte.tv player
2691 mobj = re.search(
134c207e 2692 r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
893f8832
PH
2693 webpage)
2694 if mobj is not None:
2695 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2696
cbd55ade
S
2697 # Look for embedded francetv player
2698 mobj = re.search(
2699 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2700 webpage)
2701 if mobj is not None:
2702 return self.url_result(mobj.group('url'))
2703
cb3ac1c6
S
2704 # Look for embedded smotri.com player
2705 smotri_url = SmotriIE._extract_url(webpage)
2706 if smotri_url:
2707 return self.url_result(smotri_url, 'Smotri')
2708
e6c2d9ad 2709 # Look for embedded Myvi.ru player
6dd94d3a 2710 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
2711 if myvi_url:
2712 return self.url_result(myvi_url)
2713
dfb1b146 2714 # Look for embedded soundcloud player
94aae015
S
2715 soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2716 if soundcloud_urls:
46b18f23 2717 return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
20991253 2718
027e2312
S
2719 # Look for tunein player
2720 tunein_urls = TuneInBaseIE._extract_urls(webpage)
2721 if tunein_urls:
46b18f23 2722 return self.playlist_from_matches(tunein_urls, video_id, video_title)
027e2312 2723
c5cd249e 2724 # Look for embedded mtvservices player
46fde8a1
S
2725 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2726 if mtvservices_url:
2727 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 2728
49807b4a
S
2729 # Look for embedded yahoo player
2730 mobj = re.search(
2731 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2732 webpage)
2733 if mobj is not None:
2734 return self.url_result(mobj.group('url'), 'Yahoo')
2735
2ef6fcb5
PH
2736 # Look for embedded sbs.com.au player
2737 mobj = re.search(
e98b8e79
PH
2738 r'''(?x)
2739 (?:
2740 <meta\s+property="og:video"\s+content=|
2741 <iframe[^>]+?src=
2742 )
2743 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
2744 webpage)
2745 if mobj is not None:
2746 return self.url_result(mobj.group('url'), 'SBS')
2747
42bdd9d0
PH
2748 # Look for embedded Cinchcast player
2749 mobj = re.search(
2750 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2751 webpage)
2752 if mobj is not None:
2753 return self.url_result(mobj.group('url'), 'Cinchcast')
2754
1a94ff68 2755 mobj = re.search(
5263cdfc 2756 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 2757 webpage)
8001607e
YCH
2758 if not mobj:
2759 mobj = re.search(
2760 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2761 webpage)
1a94ff68
S
2762 if mobj is not None:
2763 return self.url_result(mobj.group('url'), 'MLB')
2764
1419fafd 2765 mobj = re.search(
dd467d33 2766 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
2767 webpage)
2768 if mobj is not None:
2769 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2770
af63fed7 2771 mobj = re.search(
78d3b3e2 2772 r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
af63fed7
PH
2773 webpage)
2774 if mobj is not None:
2775 return self.url_result(mobj.group('url'), 'Livestream')
2776
255fca5e
S
2777 # Look for Zapiks embed
2778 mobj = re.search(
2779 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2780 if mobj is not None:
2781 return self.url_result(mobj.group('url'), 'Zapiks')
2782
e3216b82 2783 # Look for Kaltura embeds
c287f2bc
S
2784 kaltura_url = KalturaIE._extract_url(webpage)
2785 if kaltura_url:
2786 return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
e3216b82 2787
665e9452 2788 # Look for EaglePlatform embeds
06a96da1
S
2789 eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2790 if eagleplatform_url:
665e9452 2791 return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
135c9c42 2792
665e9452 2793 # Look for ClipYou (uses EaglePlatform) embeds
d47ae7f6
S
2794 mobj = re.search(
2795 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2796 if mobj is not None:
2797 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2798
f8388757 2799 # Look for Pladform embeds
45dad7ba
S
2800 pladform_url = PladformIE._extract_url(webpage)
2801 if pladform_url:
2802 return self.url_result(pladform_url)
f8388757 2803
ff18735c
S
2804 # Look for Videomore embeds
2805 videomore_url = VideomoreIE._extract_url(webpage)
2806 if videomore_url:
2807 return self.url_result(videomore_url)
2808
83f1481b
S
2809 # Look for Webcaster embeds
2810 webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2811 if webcaster_url:
2812 return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2813
2dcc114f
S
2814 # Look for Playwire embeds
2815 mobj = re.search(
2816 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2817 if mobj is not None:
2818 return self.url_result(mobj.group('url'))
2819
ad320e9b
NJ
2820 # Look for 5min embeds
2821 mobj = re.search(
2822 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2823 if mobj is not None:
2824 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2825
18153f1b
S
2826 # Look for Crooks and Liars embeds
2827 mobj = re.search(
2828 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2829 if mobj is not None:
2830 return self.url_result(mobj.group('url'))
2831
a2edf2e7
YCH
2832 # Look for NBC Sports VPlayer embeds
2833 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2834 if nbc_sports_url:
2835 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2836
de3eb07e
YCH
2837 # Look for NBC News embeds
2838 nbc_news_embed_url = re.search(
2839 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2840 if nbc_news_embed_url:
2841 return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2842
653789af 2843 # Look for Google Drive embeds
5b251628 2844 google_drive_url = GoogleDriveIE._extract_url(webpage)
653789af 2845 if google_drive_url:
2846 return self.url_result(google_drive_url, 'GoogleDrive')
2847
418c5cc3
YCH
2848 # Look for UDN embeds
2849 mobj = re.search(
2637fadc 2850 r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
418c5cc3
YCH
2851 if mobj is not None:
2852 return self.url_result(
0a160363 2853 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 2854
2fe1b5bd
YCH
2855 # Look for Senate ISVP iframe
2856 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2857 if senate_isvp_url:
25c3a734 2858 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 2859
1ac1c4c2
S
2860 # Look for OnionStudios embeds
2861 onionstudios_url = OnionStudiosIE._extract_url(webpage)
2862 if onionstudios_url:
2863 return self.url_result(onionstudios_url)
2864
67167920 2865 # Look for ViewLift embeds
2866 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2867 if viewlift_url:
2868 return self.url_result(viewlift_url)
eedd20ef 2869
7cb09524 2870 # Look for JWPlatform embeds
b0ead0e0
S
2871 jwplatform_urls = JWPlatformIE._extract_urls(webpage)
2872 if jwplatform_urls:
2873 return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
7cb09524 2874
aecfcd4e
S
2875 # Look for Digiteka embeds
2876 digiteka_url = DigitekaIE._extract_url(webpage)
2877 if digiteka_url:
2878 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
6aeba407 2879
1979969f
S
2880 # Look for Arkena embeds
2881 arkena_url = ArkenaIE._extract_url(webpage)
2882 if arkena_url:
2883 return self.url_result(arkena_url, ArkenaIE.ie_key())
2884
b1c35797
RA
2885 # Look for Piksel embeds
2886 piksel_url = PikselIE._extract_url(webpage)
2887 if piksel_url:
2888 return self.url_result(piksel_url, PikselIE.ie_key())
2889
1bf996fa 2890 # Look for Limelight embeds
e5d39886
S
2891 limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
2892 if limelight_urls:
2893 return self.playlist_result(
2894 limelight_urls, video_id, video_title, video_description)
2895
7986c3ab
S
2896 # Look for Anvato embeds
2897 anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
2898 if anvato_urls:
2899 return self.playlist_result(
2900 anvato_urls, video_id, video_title, video_description)
2901
a5158f38
YCH
2902 # Look for AdobeTVVideo embeds
2903 mobj = re.search(
2904 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2905 webpage)
2906 if mobj is not None:
2907 return self.url_result(
2908 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2909 'AdobeTVVideo')
2910
088e1aac
YCH
2911 # Look for Vine embeds
2912 mobj = re.search(
2913 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2914 webpage)
2915 if mobj is not None:
2916 return self.url_result(
2917 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2918
217d5ae0
RA
2919 # Look for VODPlatform embeds
2920 mobj = re.search(
93b84045 2921 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
217d5ae0
RA
2922 webpage)
2923 if mobj is not None:
2924 return self.url_result(
93b84045 2925 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
217d5ae0 2926
7d273a38
RA
2927 # Look for Mangomolo embeds
2928 mobj = re.search(
d05ef09d 2929 r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
7d273a38
RA
2930 (?:
2931 video\?.*?\bid=(?P<video_id>\d+)|
2932 index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2933 ).+?)\1''', webpage)
2934 if mobj is not None:
2935 info = {
2936 '_type': 'url_transparent',
2937 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2938 'title': video_title,
2939 'description': video_description,
2940 'thumbnail': video_thumbnail,
2941 'uploader': video_uploader,
2942 }
2943 video_id = mobj.group('video_id')
2944 if video_id:
2945 info.update({
2946 'ie_key': 'MangomoloVideo',
2947 'id': video_id,
2948 })
2949 else:
2950 info.update({
2951 'ie_key': 'MangomoloLive',
2952 'id': mobj.group('channel_id'),
2953 })
2954 return info
2955
5a51775a
YCH
2956 # Look for Instagram embeds
2957 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2958 if instagram_embed_url is not None:
11e60fca
S
2959 return self.url_result(
2960 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
5a51775a 2961
b8f67449 2962 # Look for LiveLeak embeds
09747ba7
YCH
2963 liveleak_urls = LiveLeakIE._extract_urls(webpage)
2964 if liveleak_urls:
2965 return self.playlist_from_matches(liveleak_urls, video_id, video_title)
b8f67449 2966
5d39176f
S
2967 # Look for 3Q SDN embeds
2968 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2969 if threeqsdn_url:
6f41b2bc
S
2970 return {
2971 '_type': 'url_transparent',
2972 'ie_key': ThreeQSDNIE.ie_key(),
2973 'url': self._proto_relative_url(threeqsdn_url),
2974 'title': video_title,
2975 'description': video_description,
2976 'thumbnail': video_thumbnail,
2977 'uploader': video_uploader,
2978 }
5d39176f 2979
2a1321a2
S
2980 # Look for VBOX7 embeds
2981 vbox7_url = Vbox7IE._extract_url(webpage)
2982 if vbox7_url:
2983 return self.url_result(vbox7_url, Vbox7IE.ie_key())
2984
b0c8f2e9
DR
2985 # Look for DBTV embeds
2986 dbtv_urls = DBTVIE._extract_urls(webpage)
2987 if dbtv_urls:
46b18f23 2988 return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
b0c8f2e9 2989
e186a9ec
S
2990 # Look for Videa embeds
2991 videa_urls = VideaIE._extract_urls(webpage)
2992 if videa_urls:
46b18f23 2993 return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
e186a9ec 2994
b687c85e
S
2995 # Look for 20 minuten embeds
2996 twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2997 if twentymin_urls:
46b18f23
JH
2998 return self.playlist_from_matches(
2999 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
b687c85e 3000
17f8deeb
S
3001 # Look for Openload embeds
3002 openload_urls = OpenloadIE._extract_urls(webpage)
3003 if openload_urls:
46b18f23
JH
3004 return self.playlist_from_matches(
3005 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
17f8deeb 3006
6ef3e65a
S
3007 # Look for VideoPress embeds
3008 videopress_urls = VideoPressIE._extract_urls(webpage)
3009 if videopress_urls:
46b18f23
JH
3010 return self.playlist_from_matches(
3011 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
6ef3e65a 3012
eb3079b6
S
3013 # Look for Rutube embeds
3014 rutube_urls = RutubeIE._extract_urls(webpage)
3015 if rutube_urls:
46b18f23 3016 return self.playlist_from_matches(
2583c0b5 3017 rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
6ef3e65a 3018
55719459
JH
3019 # Look for WashingtonPost embeds
3020 wapo_urls = WashingtonPostIE._extract_urls(webpage)
3021 if wapo_urls:
3022 return self.playlist_from_matches(
3023 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
3024
5d29af3d
S
3025 # Look for Mediaset embeds
3026 mediaset_urls = MediasetIE._extract_urls(webpage)
3027 if mediaset_urls:
3028 return self.playlist_from_matches(
3029 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
3030
73cf76a9
S
3031 # Look for JOJ.sk embeds
3032 joj_urls = JojIE._extract_urls(webpage)
3033 if joj_urls:
3034 return self.playlist_from_matches(
3035 joj_urls, video_id, video_title, ie=JojIE.ie_key())
3036
24e966e8
PH
3037 # Look for megaphone.fm embeds
3038 mpfn_urls = MegaphoneIE._extract_urls(webpage)
3039 if mpfn_urls:
3040 return self.playlist_from_matches(
3041 mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
3042
1663bd6e
S
3043 # Look for vzaar embeds
3044 vzaar_urls = VzaarIE._extract_urls(webpage)
3045 if vzaar_urls:
3046 return self.playlist_from_matches(
3047 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
3048
26bae2d9
S
3049 channel9_urls = Channel9IE._extract_urls(webpage)
3050 if channel9_urls:
3051 return self.playlist_from_matches(
3052 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
3053
0987f2dd
T
3054 vshare_urls = VShareIE._extract_urls(webpage)
3055 if vshare_urls:
3056 return self.playlist_from_matches(
3057 vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
3058
8056c854 3059 # Look for Mediasite embeds
2ca7ed41
S
3060 mediasite_urls = MediasiteIE._extract_urls(webpage)
3061 if mediasite_urls:
3062 entries = [
3063 self.url_result(smuggle_url(
3064 compat_urlparse.urljoin(url, mediasite_url),
3065 {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
3066 for mediasite_url in mediasite_urls]
3067 return self.playlist_result(entries, video_id, video_title)
8056c854 3068
7d540621
S
3069 springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
3070 if springboardplatform_urls:
3071 return self.playlist_from_matches(
3072 springboardplatform_urls, video_id, video_title,
3073 ie=SpringboardPlatformIE.ie_key())
3074
4c780fbd
S
3075 yapfiles_urls = YapFilesIE._extract_urls(webpage)
3076 if yapfiles_urls:
3077 return self.playlist_from_matches(
3078 yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
3079
86c8cfc5
S
3080 vice_urls = ViceIE._extract_urls(webpage)
3081 if vice_urls:
3082 return self.playlist_from_matches(
3083 vice_urls, video_id, video_title, ie=ViceIE.ie_key())
3084
178ee883
S
3085 xfileshare_urls = XFileShareIE._extract_urls(webpage)
3086 if xfileshare_urls:
3087 return self.playlist_from_matches(
3088 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
3089
660a230b
S
3090 cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
3091 if cloudflarestream_urls:
3092 return self.playlist_from_matches(
3093 cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
3094
8b4b400a 3095 peertube_urls = PeerTubeIE._extract_urls(webpage, url)
6bd499e8
S
3096 if peertube_urls:
3097 return self.playlist_from_matches(
3098 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
3099
aee36ca8
S
3100 indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
3101 if indavideo_urls:
3102 return self.playlist_from_matches(
3103 indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
3104
cfd7f2a6
S
3105 apa_urls = APAIE._extract_urls(webpage)
3106 if apa_urls:
3107 return self.playlist_from_matches(
3108 apa_urls, video_id, video_title, ie=APAIE.ie_key())
3109
f51f526b
S
3110 foxnews_urls = FoxNewsIE._extract_urls(webpage)
3111 if foxnews_urls:
3112 return self.playlist_from_matches(
3113 foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
3114
d3431dcb
S
3115 sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
3116 r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
3117 webpage)]
3118 if sharevideos_urls:
3119 return self.playlist_from_matches(
3120 sharevideos_urls, video_id, video_title)
3121
9d1b2138
S
3122 viqeo_urls = ViqeoIE._extract_urls(webpage)
3123 if viqeo_urls:
3124 return self.playlist_from_matches(
3125 viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
57c68ec4
S
3126
3127 expressen_urls = ExpressenIE._extract_urls(webpage)
3128 if expressen_urls:
3129 return self.playlist_from_matches(
3130 expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
9d1b2138 3131
bd264412
YCH
3132 # Look for HTML5 media
3133 entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
3134 if entries:
9ce1ac40 3135 if len(entries) == 1:
3136 entries[0].update({
bd264412
YCH
3137 'id': video_id,
3138 'title': video_title,
3139 })
9ce1ac40 3140 else:
3141 for num, entry in enumerate(entries, start=1):
3142 entry.update({
3143 'id': '%s-%s' % (video_id, num),
3144 'title': '%s (%d)' % (video_title, num),
3145 })
3146 for entry in entries:
bd264412 3147 self._sort_formats(entry['formats'])
9ce1ac40 3148 return self.playlist_result(entries, video_id, video_title)
bd264412 3149
c73e330e
RU
3150 jwplayer_data = self._find_jwplayer_data(
3151 webpage, video_id, transform_source=js_to_json)
3152 if jwplayer_data:
4f06c1c9 3153 info = self._parse_jwplayer_data(
3e943cfe 3154 jwplayer_data, video_id, require_title=False, base_url=url)
b311b0ea 3155 return merge_dicts(info, info_dict)
a4a554a7 3156
63d990d2
S
3157 # Video.js embed
3158 mobj = re.search(
c5b7014a 3159 r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
63d990d2
S
3160 webpage)
3161 if mobj is not None:
3162 sources = self._parse_json(
3163 mobj.group(1), video_id, transform_source=js_to_json,
3164 fatal=False) or []
c5b7014a
S
3165 if not isinstance(sources, list):
3166 sources = [sources]
63d990d2
S
3167 formats = []
3168 for source in sources:
e0b6e988
S
3169 src = source.get('src')
3170 if not src or not isinstance(src, compat_str):
63d990d2
S
3171 continue
3172 src = compat_urlparse.urljoin(url, src)
3173 src_type = source.get('type')
3174 if isinstance(src_type, compat_str):
3175 src_type = src_type.lower()
3176 ext = determine_ext(src).lower()
3177 if src_type == 'video/youtube':
3178 return self.url_result(src, YoutubeIE.ie_key())
3179 if src_type == 'application/dash+xml' or ext == 'mpd':
3180 formats.extend(self._extract_mpd_formats(
3181 src, video_id, mpd_id='dash', fatal=False))
3182 elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
3183 formats.extend(self._extract_m3u8_formats(
3184 src, video_id, 'mp4', entry_protocol='m3u8_native',
3185 m3u8_id='hls', fatal=False))
3186 else:
3187 formats.append({
3188 'url': src,
3189 'ext': (mimetype2ext(src_type) or
3190 ext if ext in KNOWN_EXTENSIONS else 'mp4'),
3191 })
3192 if formats:
3193 self._sort_formats(formats)
3194 info_dict['formats'] = formats
3195 return info_dict
3196
ff17be3a
S
3197 # Looking for http://schema.org/VideoObject
3198 json_ld = self._search_json_ld(
3199 webpage, video_id, default={}, expected_type='VideoObject')
3200 if json_ld.get('url'):
3201 return merge_dicts(json_ld, info_dict)
3202
ced659bb 3203 def check_video(vurl):
a0f71985
PH
3204 if YoutubeIE.suitable(vurl):
3205 return True
b7a8c1bc
S
3206 if RtmpIE.suitable(vurl):
3207 return True
ced659bb
S
3208 vpath = compat_urlparse.urlparse(vurl).path
3209 vext = determine_ext(vpath)
0ee79a37 3210 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
ced659bb
S
3211
3212 def filter_video(urls):
3213 return list(filter(check_video, urls))
3214
9b122384 3215 # Start with something easy: JW Player in SWFObject
ced659bb 3216 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 3217 if not found:
d981cef6 3218 # Look for gorilla-vid style embedding
ced659bb 3219 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
3220 (?:
3221 jw_plugins|
3222 JWPlayerOptions|
3223 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
3224 )
a0f71985
PH
3225 .*?
3226 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 3227 if not found:
9b122384 3228 # Broaden the search a little bit
ced659bb 3229 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
3230 if not found:
3231 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 3232 found = filter_video(re.findall(
54a9328b 3233 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
3234 if not found:
3235 # Flow player
ced659bb 3236 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
3237 flowplayer\("[^"]+",\s*
3238 \{[^}]+?\}\s*,
52585fd6 3239 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 3240 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 3241 ''', webpage))
501f13fb
PH
3242 if not found:
3243 # Cinerama player
3244 found = re.findall(
3245 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 3246 if not found:
9b122384 3247 # Try to find twitter cards info
371ddb14
S
3248 # twitter:player:stream should be checked before twitter:player since
3249 # it is expected to contain a raw stream (see
3250 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
ced659bb
S
3251 found = filter_video(re.findall(
3252 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 3253 if not found:
9b122384
PH
3254 # We look for Open Graph info:
3255 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 3256 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
3257 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
3258 if m_video_type is not None:
ced659bb 3259 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 3260 if not found:
ed9a25dd 3261 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 3262 found = re.search(
89ef304b 3263 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 3264 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 3265 webpage)
84f81016
S
3266 if not found:
3267 # Look also in Refresh HTTP header
3268 refresh_header = head_response.headers.get('Refresh')
3269 if refresh_header:
6c91a5a7
S
3270 # In python 2 response HTTP headers are bytestrings
3271 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
3272 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 3273 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 3274 if found:
b37317d8 3275 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
54b960f3
S
3276 if new_url != url:
3277 self.report_following_redirect(new_url)
3278 return {
3279 '_type': 'url',
3280 'url': new_url,
3281 }
3282 else:
3283 found = None
371ddb14
S
3284
3285 if not found:
3286 # twitter:player is a https URL to iframe player that may or may not
3287 # be supported by youtube-dl thus this is checked the very last (see
3288 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3289 embed_url = self._html_search_meta('twitter:player', webpage, default=None)
02d01e15 3290 if embed_url and embed_url != url:
371ddb14
S
3291 return self.url_result(embed_url)
3292
b30b8698 3293 if not found:
416c7fcb 3294 raise UnsupportedError(url)
9b122384 3295
b30b8698 3296 entries = []
4a120778 3297 for video_url in orderedSet(found):
949b6497 3298 video_url = unescapeHTML(video_url)
6cc37c69 3299 video_url = video_url.replace('\\/', '/')
b30b8698 3300 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 3301 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 3302
b30b8698
PH
3303 # Sometimes, jwplayer extraction will result in a YouTube URL
3304 if YoutubeIE.suitable(video_url):
3305 entries.append(self.url_result(video_url, 'Youtube'))
3306 continue
9b122384 3307
b30b8698
PH
3308 # here's a fun little line of code for you:
3309 video_id = os.path.splitext(video_id)[0]
fc9713a1 3310
28602e74
YCH
3311 entry_info_dict = {
3312 'id': video_id,
3313 'uploader': video_uploader,
3314 'title': video_title,
3315 'age_limit': age_limit,
3316 }
3317
5620f840
S
3318 if RtmpIE.suitable(video_url):
3319 entry_info_dict.update({
3320 '_type': 'url_transparent',
3321 'ie_key': RtmpIE.ie_key(),
3322 'url': video_url,
3323 })
3324 entries.append(entry_info_dict)
3325 continue
3326
729accb4
S
3327 ext = determine_ext(video_url)
3328 if ext == 'smil':
28602e74 3329 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
729accb4
S
3330 elif ext == 'xspf':
3331 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
750b9ff0
YCH
3332 elif ext == 'm3u8':
3333 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
79a35085
S
3334 elif ext == 'mpd':
3335 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
3f2f4a94
S
3336 elif ext == 'f4m':
3337 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
4119a96c 3338 elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
26aae566
S
3339 # Just matching .ism/manifest is not enough to be reliably sure
3340 # whether it's actually an ISM manifest or some other streaming
3341 # manifest since there are various streaming URL formats
3342 # possible (see [1]) as well as some other shenanigans like
3343 # .smil/manifest URLs that actually serve an ISM (see [2]) and
3344 # so on.
3345 # Thus the most reasonable way to solve this is to delegate
3346 # to generic extractor in order to look into the contents of
3347 # the manifest itself.
3348 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
3349 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
3350 entry_info_dict = self.url_result(
3351 smuggle_url(video_url, {'to_generic': True}),
3352 GenericIE.ie_key())
d6fd958c 3353 else:
28602e74
YCH
3354 entry_info_dict['url'] = video_url
3355
19dbaeec
S
3356 if entry_info_dict.get('formats'):
3357 self._sort_formats(entry_info_dict['formats'])
3358
28602e74 3359 entries.append(entry_info_dict)
b30b8698
PH
3360
3361 if len(entries) == 1:
669f0e7c 3362 return entries[0]
b30b8698
PH
3363 else:
3364 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
3365 # 'url' results don't have a title
3366 if e.get('title') is not None:
3367 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
3368 return {
3369 '_type': 'playlist',
3370 'entries': entries,
3371 }