]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[extractor/generic] Add support for async wistia embeds (Closes #9549)
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
cfe50f04
JMF
1# encoding: utf-8
2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
f7854627 12 compat_etree_fromstring,
1ddb9456 13 compat_urllib_parse_unquote,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
5c2266df 24 sanitized_Request,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
42393ce2 30 url_basename,
76c73715 31 xpath_text,
9b122384 32)
ed126900 33from .brightcove import (
4fcaa4f4 34 BrightcoveLegacyIE,
5c17f0a6 35 BrightcoveNewIE,
ed126900 36)
a2edf2e7 37from .nbc import NBCSportsVPlayerIE
c0d0b01f 38from .ooyala import OoyalaIE
93d020dd 39from .rutv import RUTVIE
954c1d05 40from .tvc import TVCIE
d40a3b5b 41from .sportbox import SportBoxEmbedIE
cb3ac1c6 42from .smotri import SmotriIE
6dd94d3a 43from .myvi import MyviIE
1419fafd 44from .condenast import CondeNastIE
418c5cc3 45from .udn import UDNEmbedIE
2fe1b5bd 46from .senateisvp import SenateISVPIE
bab19a8e 47from .svt import SVTIE
65d161c4 48from .pornhub import PornHubIE
2bb5b6d0 49from .xhamster import XHamsterEmbedIE
2c9ca782 50from .tnaflix import TNAFlixNetworkEmbedIE
b407e173 51from .vimeo import VimeoIE
756f574e 52from .dailymotion import DailymotionCloudIE
1ac1c4c2 53from .onionstudios import OnionStudiosIE
67167920 54from .viewlift import ViewLiftEmbedIE
efd712c6 55from .screenwavemedia import ScreenwaveMediaIE
46fde8a1 56from .mtv import MTVServicesEmbeddedIE
45dad7ba 57from .pladform import PladformIE
ff18735c 58from .videomore import VideomoreIE
5b251628 59from .googledrive import GoogleDriveIE
7cb09524 60from .jwplatform import JWPlatformIE
aecfcd4e 61from .digiteka import DigitekaIE
5a51775a 62from .instagram import InstagramIE
b8f67449 63from .liveleak import LiveLeakIE
5d39176f 64from .threeqsdn import ThreeQSDNIE
9b122384 65
0838239e 66
9b122384 67class GenericIE(InfoExtractor):
79649588 68 IE_DESC = 'Generic downloader that works on some sites'
9b122384 69 _VALID_URL = r'.*'
79649588 70 IE_NAME = 'generic'
cfe50f04 71 _TESTS = [
c5fa81fe
S
72 # Direct link to a video
73 {
74 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
75 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
76 'info_dict': {
77 'id': 'trailer',
78 'ext': 'mp4',
79 'title': 'trailer',
80 'upload_date': '20100513',
81 }
82 },
c5138a7c 83 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
84 {
85 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
86 'md5': '128c42e68b13950268b648275386fc74',
87 'info_dict': {
88 'id': 'FictionJunction-Parallel_Hearts',
89 'ext': 'flac',
90 'title': 'FictionJunction-Parallel_Hearts',
91 'upload_date': '20140522',
92 },
93 'expected_warnings': [
94 'URL could be a direct video link, returning it as such.'
95 ]
96 },
97 # Direct download with broken HEAD
98 {
99 'url': 'http://ai-radio.org:8000/radio.opus',
100 'info_dict': {
101 'id': 'radio',
102 'ext': 'opus',
103 'title': 'radio',
104 },
105 'params': {
106 'skip_download': True, # infinite live stream
107 },
108 'expected_warnings': [
ef0e4e7b
YCH
109 r'501.*Not Implemented',
110 r'400.*Bad Request',
c5fa81fe
S
111 ],
112 },
113 # Direct link with incorrect MIME type
114 {
115 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
116 'md5': '4ccbebe5f36706d85221f204d7eb5913',
117 'info_dict': {
118 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
119 'id': '5_Lennart_Poettering_-_Systemd',
120 'ext': 'webm',
121 'title': '5_Lennart_Poettering_-_Systemd',
122 'upload_date': '20141120',
123 },
124 'expected_warnings': [
125 'URL could be a direct video link, returning it as such.'
126 ]
127 },
128 # RSS feed
129 {
130 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
131 'info_dict': {
132 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
133 'title': 'Zero Punctuation',
134 'description': 're:.*groundbreaking video review series.*'
135 },
136 'playlist_mincount': 11,
137 },
138 # RSS feed with enclosure
139 {
140 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
141 'info_dict': {
142 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
143 'ext': 'm4v',
144 'upload_date': '20150228',
145 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
146 }
147 },
8765222d
S
148 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
149 {
150 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
151 'info_dict': {
152 'id': 'smil',
153 'ext': 'mp4',
154 'title': 'Automatics, robotics and biocybernetics',
155 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
e327b736 156 'upload_date': '20130627',
8765222d
S
157 'formats': 'mincount:16',
158 'subtitles': 'mincount:1',
159 },
160 'params': {
161 'force_generic_extractor': True,
162 'skip_download': True,
163 },
164 },
165 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
166 {
167 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
168 'info_dict': {
169 'id': 'hds',
170 'ext': 'flv',
171 'title': 'hds',
172 'formats': 'mincount:1',
173 },
174 'params': {
175 'skip_download': True,
176 },
177 },
178 # SMIL from https://www.restudy.dk/video/play/id/1637
179 {
180 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
181 'info_dict': {
182 'id': 'video_1637',
183 'ext': 'flv',
184 'title': 'video_1637',
185 'formats': 'mincount:3',
186 },
187 'params': {
188 'skip_download': True,
189 },
190 },
191 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
192 {
193 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
194 'info_dict': {
195 'id': 'smil-service',
196 'ext': 'flv',
197 'title': 'smil-service',
198 'formats': 'mincount:1',
199 },
200 'params': {
201 'skip_download': True,
202 },
203 },
204 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
205 {
206 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
207 'info_dict': {
208 'id': '4719370',
209 'ext': 'mp4',
210 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
211 'formats': 'mincount:3',
212 },
213 'params': {
214 'skip_download': True,
215 },
216 },
1de5cd3b
S
217 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
218 {
219 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
220 'info_dict': {
221 'id': 'mZlp2ctYIUEB',
222 'ext': 'mp4',
223 'title': 'Tikibad ontruimd wegens brand',
224 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
225 'thumbnail': 're:^https?://.*\.jpg$',
226 'duration': 33,
227 },
228 'params': {
229 'skip_download': True,
230 },
231 },
9d939cec
S
232 # MPD from http://dash-mse-test.appspot.com/media.html
233 {
234 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
235 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
236 'info_dict': {
237 'id': 'car-20120827-manifest',
238 'ext': 'mp4',
239 'title': 'car-20120827-manifest',
240 'formats': 'mincount:9',
0738187f 241 'upload_date': '20130904',
9d939cec
S
242 },
243 'params': {
244 'format': 'bestvideo',
245 },
246 },
20938f76
S
247 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
248 {
249 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
250 'info_dict': {
251 'id': 'content',
252 'ext': 'mp4',
253 'title': 'content',
254 'formats': 'mincount:8',
255 },
256 'params': {
257 # m3u8 downloads
258 'skip_download': True,
259 }
260 },
edd9b71c
S
261 # m3u8 served with Content-Type: text/plain
262 {
263 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
264 'info_dict': {
265 'id': 'index',
266 'ext': 'mp4',
267 'title': 'index',
268 'upload_date': '20140720',
269 'formats': 'mincount:11',
270 },
271 'params': {
272 # m3u8 downloads
273 'skip_download': True,
274 }
275 },
c5fa81fe
S
276 # google redirect
277 {
278 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
279 'info_dict': {
280 'id': 'cmQHVoWB5FY',
281 'ext': 'mp4',
282 'upload_date': '20130224',
283 'uploader_id': 'TheVerge',
284 'description': 're:^Chris Ziegler takes a look at the\.*',
285 'uploader': 'The Verge',
286 'title': 'First Firefox OS phones side-by-side',
287 },
288 'params': {
289 'skip_download': False,
290 }
291 },
6c91a5a7
S
292 {
293 # redirect in Refresh HTTP header
294 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
295 'info_dict': {
296 'id': 'pO8h3EaFRdo',
297 'ext': 'mp4',
298 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
299 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
300 'upload_date': '20150917',
301 'uploader_id': 'brtvofficial',
302 'uploader': 'Boiler Room',
303 },
304 'params': {
305 'skip_download': False,
306 },
307 },
cfe50f04 308 {
79649588 309 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 310 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 311 'info_dict': {
d360a146
S
312 'id': '13601338388002',
313 'ext': 'mp4',
79649588
PH
314 'uploader': 'www.hodiho.fr',
315 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
316 }
317 },
c19f7764
JMF
318 # bandcamp page with custom domain
319 {
79649588
PH
320 'add_ie': ['Bandcamp'],
321 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 322 'info_dict': {
fd50bf62
S
323 'id': '3235767654',
324 'ext': 'mp3',
79649588
PH
325 'title': 'The Pony Mash',
326 'uploader': 'M_Pallante',
c19f7764 327 },
79649588 328 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 329 },
eeb165e6 330 # embedded brightcove video
dd5bcdc4
JMF
331 # it also tests brightcove videos that need to set the 'Referer' in the
332 # http requests
eeb165e6 333 {
3b7d9aa4 334 'add_ie': ['BrightcoveLegacy'],
79649588
PH
335 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
336 'info_dict': {
337 'id': '2765128793001',
338 'ext': 'mp4',
339 'title': 'Le cours de bourse : l’analyse technique',
340 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
341 'uploader': 'BFM BUSINESS',
eeb165e6 342 },
79649588
PH
343 'params': {
344 'skip_download': True,
eeb165e6
JMF
345 },
346 },
17ab4d3b
PH
347 {
348 # https://github.com/rg3/youtube-dl/issues/2253
349 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
350 'md5': '0ba9446db037002366bab3b3eb30c88c',
351 'info_dict': {
fd50bf62
S
352 'id': '3101154703001',
353 'ext': 'mp4',
17ab4d3b
PH
354 'title': 'Still no power',
355 'uploader': 'thestar.com',
356 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
357 },
3b7d9aa4 358 'add_ie': ['BrightcoveLegacy'],
17ab4d3b 359 },
0479c625
S
360 {
361 'url': 'http://www.championat.com/video/football/v/87/87499.html',
362 'md5': 'fb973ecf6e4a78a67453647444222983',
363 'info_dict': {
364 'id': '3414141473001',
365 'ext': 'mp4',
366 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
367 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
368 'uploader': 'Championat',
369 },
370 },
bdf97017 371 {
37aab278 372 # https://github.com/rg3/youtube-dl/issues/3541
3b7d9aa4 373 'add_ie': ['BrightcoveLegacy'],
bdf97017
NJ
374 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
375 'info_dict': {
376 'id': '3866516442001',
37aab278 377 'ext': 'mp4',
bdf97017
NJ
378 'title': 'Leer mij vrouwen kennen: Aflevering 1',
379 'description': 'Leer mij vrouwen kennen: Aflevering 1',
380 'uploader': 'SBS Broadcasting',
381 },
37aab278 382 'skip': 'Restricted to Netherlands',
bdf97017 383 'params': {
37aab278 384 'skip_download': True, # m3u8 download
bdf97017
NJ
385 },
386 },
c0d0b01f
JMF
387 # ooyala video
388 {
79649588 389 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 390 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
391 'info_dict': {
392 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
393 'ext': 'mp4',
3486df38 394 'title': '2cc213299525360.mov', # that's what we get
53e06b25 395 'duration': 238.231,
c0d0b01f 396 },
87830900 397 'add_ie': ['Ooyala'],
c0d0b01f 398 },
bf94d763
S
399 {
400 # ooyala video embedded with http://player.ooyala.com/iframe.js
401 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
402 'info_dict': {
403 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
404 'ext': 'mp4',
405 'title': '"Steve Jobs: Man in the Machine" trailer',
406 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
53e06b25 407 'duration': 135.427,
bf94d763
S
408 },
409 'params': {
410 'skip_download': True,
411 },
412 },
1b86cc41 413 # embed.ly video
414 {
415 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
416 'info_dict': {
417 'id': '9ODmcdjQcHQ',
418 'ext': 'mp4',
0a5bce56
PH
419 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
420 'upload_date': '20140225',
421 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
422 'uploader': 'Tested',
423 'uploader_id': 'testedcom',
1b86cc41 424 },
425 # No need to test YoutubeIE here
426 'params': {
427 'skip_download': True,
428 },
429 },
60cc4dc4
PH
430 # funnyordie embed
431 {
432 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
433 'info_dict': {
434 'id': '18e820ec3f',
435 'ext': 'mp4',
436 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
437 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 438 },
60cc4dc4 439 },
93d020dd
S
440 # RUTV embed
441 {
442 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
443 'info_dict': {
444 'id': '776940',
445 'ext': 'mp4',
446 'title': 'Охотское море стало целиком российским',
447 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
448 },
449 'params': {
450 # m3u8 download
451 'skip_download': True,
452 },
aab74fa1 453 },
f37bdbe5
S
454 # TVC embed
455 {
456 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
457 'info_dict': {
458 'id': '55304',
459 'ext': 'mp4',
460 'title': 'Дошкольное воспитание',
461 },
462 },
b827a601
S
463 # SportBox embed
464 {
465 'url': 'http://www.vestifinance.ru/articles/25753',
466 'info_dict': {
467 'id': '25753',
468 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
469 },
470 'playlist': [{
471 'info_dict': {
472 'id': '370908',
473 'title': 'Госзаказ. День 3',
474 'ext': 'mp4',
475 }
476 }, {
477 'info_dict': {
478 'id': '370905',
479 'title': 'Госзаказ. День 2',
480 'ext': 'mp4',
481 }
482 }, {
483 'info_dict': {
484 'id': '370902',
485 'title': 'Госзаказ. День 1',
486 'ext': 'mp4',
487 }
488 }],
489 'params': {
490 # m3u8 download
491 'skip_download': True,
492 },
493 },
bf20b9c5
S
494 # Myvi.ru embed
495 {
496 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
497 'info_dict': {
498 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
499 'ext': 'mp4',
500 'title': 'Ужастики, русский трейлер (2015)',
501 'thumbnail': 're:^https?://.*\.jpg$',
502 'duration': 153,
503 }
504 },
c76799c5
S
505 # XHamster embed
506 {
507 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
508 'info_dict': {
509 'id': 'showthread',
510 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
511 },
512 'playlist_mincount': 7,
513 },
aab74fa1
PH
514 # Embedded TED video
515 {
516 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 517 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 518 'info_dict': {
a8eb5a8e 519 'id': '1969',
aab74fa1 520 'ext': 'mp4',
a8eb5a8e
PH
521 'title': 'Hidden miracles of the natural world',
522 'uploader': 'Louie Schwartzberg',
523 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 524 }
60cc4dc4 525 },
dfb1b146 526 # Embedded Ustream video
5c386252 527 {
528 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
529 'md5': '27b99cdb639c9b12a79bca876a073417',
530 'info_dict': {
ca6aada4 531 'id': '45734260',
532 'ext': 'flv',
533 'uploader': 'AU SPA: The NSA and Privacy',
5c386252 534 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
535 }
536 },
d95e35d6
S
537 # nowvideo embed hidden behind percent encoding
538 {
539 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
540 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
541 'info_dict': {
542 'id': '06e53103ca9aa',
543 'ext': 'flv',
544 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
545 'description': 'No description',
546 },
0f2a2ba1 547 },
893f8832
PH
548 # arte embed
549 {
550 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
551 'md5': '7653032cbb25bf6c80d80f217055fa43',
552 'info_dict': {
553 'id': '048195-004_PLUS7-F',
554 'ext': 'flv',
555 'title': 'X:enius',
556 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
557 'upload_date': '20140320',
558 },
559 'params': {
560 'skip_download': 'Requires rtmpdump'
561 }
562 },
cbd55ade
S
563 # francetv embed
564 {
565 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
566 'info_dict': {
567 'id': 'EV_30231',
568 'ext': 'mp4',
569 'title': 'Alcaline, le concert avec Calogero',
570 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
571 'upload_date': '20150226',
572 'timestamp': 1424989860,
573 'duration': 5400,
574 },
575 'params': {
576 # m3u8 downloads
577 'skip_download': True,
578 },
579 'expected_warnings': [
580 'Forbidden'
581 ]
582 },
fa35cdad
PH
583 # Condé Nast embed
584 {
585 'url': 'http://www.wired.com/2014/04/honda-asimo/',
586 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
587 'info_dict': {
588 'id': '53501be369702d3275860000',
589 'ext': 'mp4',
590 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
591 }
ebd3c7b3
PH
592 },
593 # Dailymotion embed
594 {
595 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
596 'md5': '441aeeb82eb72c422c7f14ec533999cd',
597 'info_dict': {
598 'id': 'k2mm4bCdJ6CQ2i7c8o2',
599 'ext': 'mp4',
600 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
0738187f 601 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
ebd3c7b3 602 'uploader': 'Spi0n',
0738187f
YCH
603 'uploader_id': 'xgditw',
604 'upload_date': '20140425',
605 'timestamp': 1398441542,
ebd3c7b3
PH
606 },
607 'add_ie': ['Dailymotion'],
2b88feed
PH
608 },
609 # YouTube embed
610 {
611 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
612 'info_dict': {
613 'id': 'FXRb4ykk4S0',
614 'ext': 'mp4',
615 'title': 'The NBL Auction 2014',
616 'uploader': 'BADMINTON England',
617 'uploader_id': 'BADMINTONEvents',
618 'upload_date': '20140603',
619 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
620 },
621 'add_ie': ['Youtube'],
622 'params': {
623 'skip_download': True,
624 }
625 },
c5cd249e
JMF
626 # MTVSercices embed
627 {
628 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
629 'md5': '35727f82f58c76d996fc188f9755b0d5',
630 'info_dict': {
631 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
632 'ext': 'mp4',
633 'title': 'Review',
634 'description': 'Mario\'s life in the fast lane has never looked so good.',
635 },
636 },
61013473 637 # YouTube embed via <data-embed-url="">
638 {
639 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 640 'info_dict': {
a8eb5a8e 641 'id': '4vAffPZIT44',
61013473 642 'ext': 'mp4',
a8eb5a8e 643 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
644 'uploader': 'Gameloft',
645 'uploader_id': 'gameloft',
a8eb5a8e
PH
646 'upload_date': '20140828',
647 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
648 },
649 'params': {
650 'skip_download': True,
61013473 651 }
c8e9a235
PH
652 },
653 # Camtasia studio
654 {
655 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
656 'playlist': [{
657 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
658 'info_dict': {
659 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
660 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
661 'ext': 'flv',
662 'duration': 2235.90,
663 }
664 }, {
665 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
666 'info_dict': {
667 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
668 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
669 'ext': 'flv',
670 'duration': 2235.93,
671 }
672 }],
673 'info_dict': {
674 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
675 }
4d805e06
PH
676 },
677 # Flowplayer
678 {
679 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
680 'md5': '9d65602bf31c6e20014319c7d07fba27',
681 'info_dict': {
682 'id': '5123ea6d5e5a7',
683 'ext': 'mp4',
684 'age_limit': 18,
685 'uploader': 'www.handjobhub.com',
d6d9186f 686 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 687 }
0990305d 688 },
22a6f150
PH
689 # Multiple brightcove videos
690 # https://github.com/rg3/youtube-dl/issues/2283
691 {
692 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
693 'info_dict': {
694 'id': 'always-never',
695 'title': 'Always / Never - The New Yorker',
696 },
697 'playlist_count': 3,
698 'params': {
699 'extract_flat': False,
700 'skip_download': True,
701 }
1a94ff68
S
702 },
703 # MLB embed
704 {
705 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
706 'md5': '96f09a37e44da40dd083e12d9a683327',
707 'info_dict': {
708 'id': '33322633',
709 'ext': 'mp4',
710 'title': 'Ump changes call to ball',
711 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
712 'duration': 48,
713 'timestamp': 1401537900,
714 'upload_date': '20140531',
715 'thumbnail': 're:^https?://.*\.jpg$',
716 },
717 },
746c67d7
NJ
718 # Wistia embed
719 {
720 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
721 'md5': '8788b683c777a5cf25621eaf286d0c23',
722 'info_dict': {
723 'id': '1cfaf6b7ea',
724 'ext': 'mov',
725 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
726 'duration': 643.0,
727 'filesize': 182808282,
728 'uploader': 'education-portal.com',
729 },
730 },
52cffcb1 731 {
732 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
733 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
734 'info_dict': {
735 'id': 'uxjb0lwrcz',
736 'ext': 'mp4',
85d7b765 737 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
0738187f 738 'description': 'a Martin Fowler video from ThoughtWorks',
52cffcb1 739 'duration': 1715.0,
85d7b765 740 'uploader': 'thoughtworks.wistia.com',
0738187f
YCH
741 'upload_date': '20140603',
742 'timestamp': 1401832161,
70b7e3fb 743 },
52cffcb1 744 },
ac645ac7
PH
745 # Soundcloud embed
746 {
747 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
748 'info_dict': {
749 'id': '174391317',
750 'ext': 'mp3',
751 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
752 'uploader': 'Sophos Security',
753 'title': 'Chet Chat 171 - Oct 29, 2014',
754 'upload_date': '20141029',
755 }
af63fed7
PH
756 },
757 # Livestream embed
758 {
759 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
760 'info_dict': {
761 'id': '67864563',
762 'ext': 'flv',
763 'upload_date': '20141112',
764 'title': 'Rosetta #CometLanding webcast HL 10',
765 }
766 },
65f3a228
PH
767 # LazyYT
768 {
769 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
770 'info_dict': {
11e611a7 771 'id': '1986',
65f3a228
PH
772 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
773 },
774 'playlist_mincount': 2,
4e262a88 775 },
42bdd9d0
PH
776 # Cinchcast embed
777 {
778 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
779 'info_dict': {
780 'id': '7141703',
781 'ext': 'mp3',
782 'upload_date': '20141126',
783 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
784 }
785 },
501f13fb
PH
786 # Cinerama player
787 {
788 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
789 'info_dict': {
790 'id': '730m_DandD_1901_512k',
791 'ext': 'mp4',
792 'uploader': 'www.abc.net.au',
793 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
794 }
796df3c6
S
795 },
796 # embedded viddler video
797 {
798 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
799 'info_dict': {
800 'id': '4d03aad9',
801 'ext': 'mp4',
802 'uploader': 'deadspin',
803 'title': 'WALL-TO-GORTAT',
804 'timestamp': 1422285291,
805 'upload_date': '20150126',
806 },
807 'add_ie': ['Viddler'],
a0f71985 808 },
2051acde
S
809 # Libsyn embed
810 {
811 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
812 'info_dict': {
813 'id': '3377616',
814 'ext': 'mp3',
815 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
816 'description': 'md5:601cb790edd05908957dae8aaa866465',
817 'upload_date': '20150220',
818 },
819 },
a0f71985
PH
820 # jwplayer YouTube
821 {
822 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
823 'info_dict': {
824 'id': 'Mrj4DVp2zeA',
825 'ext': 'mp4',
f37e3f99 826 'upload_date': '20150212',
a0f71985
PH
827 'uploader': 'The National Archives UK',
828 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
829 'uploader_id': 'NationalArchives08',
830 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
831 },
59b8ab58
PH
832 },
833 # rtl.nl embed
834 {
835 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
836 'playlist_mincount': 5,
837 'info_dict': {
838 'id': 'aanslagen-kopenhagen',
839 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
840 }
255fca5e
S
841 },
842 # Zapiks embed
843 {
844 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
845 'info_dict': {
846 'id': '118046',
847 'ext': 'mp4',
848 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
849 }
850 },
e3216b82
NJ
851 # Kaltura embed
852 {
853 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
854 'info_dict': {
855 'id': '1_eergr3h1',
856 'ext': 'mp4',
857 'upload_date': '20150226',
858 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
859 'timestamp': int,
860 'title': 'John Carlson Postgame 2/25/15',
861 },
862 },
66e568de
S
863 # Kaltura embed (different embed code)
864 {
865 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
866 'info_dict': {
867 'id': '1_a52wc67y',
868 'ext': 'flv',
869 'upload_date': '20150127',
870 'uploader_id': 'PremierMedia',
871 'timestamp': int,
872 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
873 },
874 },
6da620de
S
875 # Kaltura embed protected with referrer
876 {
877 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
878 'info_dict': {
879 'id': '1_g4fbemnq',
880 'ext': 'mp4',
881 'title': 'Violetta - Achter De Schermen - Ruggero',
882 'description': 'Achter de schermen met Ruggero',
883 'timestamp': 1435133761,
884 'upload_date': '20150624',
885 'uploader_id': 'echojecka',
886 },
887 },
135c9c42
S
888 # Eagle.Platform embed (generic URL)
889 {
890 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
4645432d 891 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
135c9c42
S
892 'info_dict': {
893 'id': '227304',
894 'ext': 'mp4',
895 'title': 'Навальный вышел на свободу',
896 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
897 'thumbnail': 're:^https?://.*\.jpg$',
898 'duration': 87,
899 'view_count': int,
900 'age_limit': 0,
901 },
902 },
d47ae7f6
S
903 # ClipYou (Eagle.Platform) embed (custom URL)
904 {
905 'url': 'http://muz-tv.ru/play/7129/',
4645432d 906 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
d47ae7f6
S
907 'info_dict': {
908 'id': '12820',
909 'ext': 'mp4',
910 'title': "'O Sole Mio",
911 'thumbnail': 're:^https?://.*\.jpg$',
912 'duration': 216,
913 'view_count': int,
914 },
915 },
f8388757
S
916 # Pladform embed
917 {
918 'url': 'http://muz-tv.ru/kinozal/view/7400/',
919 'info_dict': {
920 'id': '100183293',
921 'ext': 'mp4',
62259846 922 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757
S
923 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
924 'thumbnail': 're:^https?://.*\.jpg$',
925 'duration': 694,
926 'age_limit': 0,
927 },
928 },
c798f15b
S
929 # Playwire embed
930 {
931 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
932 'info_dict': {
933 'id': '3519514',
934 'ext': 'mp4',
935 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
936 'thumbnail': 're:^https?://.*\.png$',
937 'duration': 45.115,
938 },
939 },
ad320e9b
NJ
940 # 5min embed
941 {
942 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
943 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
944 'info_dict': {
945 'id': '518726732',
946 'ext': 'mp4',
947 'title': 'Facebook Creates "On This Day" | Crunch Report',
948 },
949 },
dc455a5f
S
950 # SVT embed
951 {
952 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
953 'info_dict': {
954 'id': '2900353',
955 'ext': 'flv',
956 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
957 'duration': 27,
958 'age_limit': 0,
959 },
960 },
a4257017
S
961 # Crooks and Liars embed
962 {
963 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
964 'info_dict': {
965 'id': '8RUoRhRi',
966 'ext': 'mp4',
967 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
968 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
969 'timestamp': 1428207000,
970 'upload_date': '20150405',
971 'uploader': 'Heather',
972 },
973 },
974 # Crooks and Liars external embed
975 {
976 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
977 'info_dict': {
978 'id': 'MTE3MjUtMzQ2MzA',
979 'ext': 'mp4',
980 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
981 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
982 'timestamp': 1265032391,
983 'upload_date': '20100201',
984 'uploader': 'Heather',
985 },
986 },
facecb84 987 # NBC Sports vplayer embed
a2edf2e7 988 {
facecb84 989 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 990 'info_dict': {
facecb84
S
991 'id': 'ln7x1qSThw4k',
992 'ext': 'flv',
993 'title': "PFT Live: New leader in the 'new-look' defense",
994 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
0738187f
YCH
995 'uploader': 'NBCU-SPORTS',
996 'upload_date': '20140107',
997 'timestamp': 1389118457,
a2edf2e7 998 },
418c5cc3
YCH
999 },
1000 # UDN embed
1001 {
1002 'url': 'http://www.udn.com/news/story/7314/822787',
01c58f84 1003 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 1004 'info_dict': {
01c58f84 1005 'id': '300346',
418c5cc3 1006 'ext': 'mp4',
01c58f84 1007 'title': '中一中男師變性 全校師生力挺',
418c5cc3
YCH
1008 'thumbnail': 're:^https?://.*\.jpg$',
1009 }
edfcf7ab
YCH
1010 },
1011 # Ooyala embed
1012 {
1013 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1014 'info_dict': {
1015 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1016 'ext': 'mp4',
cce9d15d 1017 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
edfcf7ab 1018 'title': 'This is what separates the Excel masters from the wannabes',
53e06b25 1019 'duration': 191.933,
edfcf7ab
YCH
1020 },
1021 'params': {
1022 # m3u8 downloads
1023 'skip_download': True,
1024 }
d6fd958c
YCH
1025 },
1026 # Contains a SMIL manifest
1027 {
1028 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1029 'info_dict': {
1030 'id': 'file',
1031 'ext': 'flv',
1032 'title': '+ Football: Lottery Champions League Europe',
1033 'uploader': 'www.telewebion.com',
1034 },
1035 'params': {
1036 # rtmpe downloads
1037 'skip_download': True,
1038 }
b26733ba
YCH
1039 },
1040 # Brightcove URL in single quotes
1041 {
1042 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1043 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1044 'info_dict': {
1045 'id': '4255764656001',
1046 'ext': 'mp4',
1047 'title': 'SN Presents: Russell Martin, World Citizen',
1048 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1049 'uploader': 'Rogers Sportsnet',
0738187f
YCH
1050 'uploader_id': '1704050871',
1051 'upload_date': '20150525',
1052 'timestamp': 1432570283,
b26733ba 1053 },
756f574e
YCH
1054 },
1055 # Dailymotion Cloud video
1056 {
1057 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1058 'md5': '49444254273501a64675a7e68c502681',
1059 'info_dict': {
1060 'id': '5585de919473990de4bee11b',
1061 'ext': 'mp4',
1062 'title': 'Le débat',
1063 'thumbnail': 're:^https?://.*\.jpe?g$',
1064 }
a5158f38 1065 },
8084be78
S
1066 # OnionStudios embed
1067 {
1068 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1069 'info_dict': {
1070 'id': '2855',
1071 'ext': 'mp4',
1072 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1073 'thumbnail': 're:^https?://.*\.jpe?g$',
1074 'uploader': 'ClickHole',
1075 'uploader_id': 'clickhole',
1076 }
1077 },
b8c1cc1a
S
1078 # SnagFilms embed
1079 {
1080 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1081 'info_dict': {
1082 'id': '74849a00-85a9-11e1-9660-123139220831',
1083 'ext': 'mp4',
1084 'title': '#whilewewatch',
1085 }
1086 },
a5158f38
YCH
1087 # AdobeTVVideo embed
1088 {
1089 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1090 'md5': '43662b577c018ad707a63766462b1e87',
1091 'info_dict': {
1092 'id': '2456',
1093 'ext': 'mp4',
1094 'title': 'New experience with Acrobat DC',
1095 'description': 'New experience with Acrobat DC',
1096 'duration': 248.667,
1097 },
1f812580
S
1098 },
1099 # ScreenwaveMedia embed
1100 {
1101 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1102 'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1103 'info_dict': {
1104 'id': 'cinemasnob-55d26273809dd',
1105 'ext': 'mp4',
1106 'title': 'cinemasnob',
1107 },
ed126900 1108 },
1109 # BrightcoveInPageEmbed embed
1110 {
1111 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1112 'info_dict': {
1113 'id': '4238694884001',
1114 'ext': 'flv',
1115 'title': 'Tabletop: Dread, Last Thoughts',
1116 'description': 'Tabletop: Dread, Last Thoughts',
1117 'duration': 51690,
1118 },
750b9ff0
YCH
1119 },
1120 # JWPlayer with M3U8
1121 {
1122 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1123 'info_dict': {
1124 'id': 'playlist',
1125 'ext': 'mp4',
1126 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1127 'uploader': 'ren.tv',
1128 },
1129 'params': {
1130 # m3u8 downloads
1131 'skip_download': True,
1132 }
d10fe835
YCH
1133 },
1134 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1135 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1136 {
1137 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1138 'info_dict': {
1139 'id': '4785848093001',
1140 'ext': 'mp4',
1141 'title': 'The Cardinal Pell Interview',
1142 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1143 'uploader': 'GlobeCast Australia - GlobeStream',
0738187f
YCH
1144 'uploader_id': '2733773828001',
1145 'upload_date': '20160304',
1146 'timestamp': 1457083087,
d10fe835
YCH
1147 },
1148 'params': {
1149 # m3u8 downloads
1150 'skip_download': True,
1151 },
1152 },
134c207e
YCH
1153 # Another form of arte.tv embed
1154 {
1155 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1156 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1157 'info_dict': {
1158 'id': '030273-562_PLUS7-F',
1159 'ext': 'mp4',
1160 'title': 'ARTE Reportage - Nulle part, en France',
1161 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1162 'upload_date': '20160409',
1163 },
1164 },
b8f67449
KM
1165 # LiveLeak embed
1166 {
1167 'url': 'http://www.wykop.pl/link/3088787/',
1168 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1169 'info_dict': {
1170 'id': '874_1459135191',
1171 'ext': 'mp4',
1172 'title': 'Man shows poor quality of new apartment building',
1173 'description': 'The wall is like a sand pile.',
1174 'uploader': 'Lake8737',
1175 }
1176 },
cfe50f04 1177 ]
9b122384 1178
9b122384
PH
1179 def report_following_redirect(self, new_url):
1180 """Report information extraction."""
79649588 1181 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 1182
4fc946b5
PH
1183 def _extract_rss(self, url, video_id, doc):
1184 playlist_title = doc.find('./channel/title').text
1185 playlist_desc_el = doc.find('./channel/description')
1186 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1187
76c73715
PH
1188 entries = []
1189 for it in doc.findall('./channel/item'):
1190 next_url = xpath_text(it, 'link', fatal=False)
1191 if not next_url:
1192 enclosure_nodes = it.findall('./enclosure')
1193 for e in enclosure_nodes:
1194 next_url = e.attrib.get('url')
1195 if next_url:
1196 break
1197
1198 if not next_url:
1199 continue
1200
1201 entries.append({
1202 '_type': 'url',
1203 'url': next_url,
1204 'title': it.find('title').text,
1205 })
4fc946b5
PH
1206
1207 return {
1208 '_type': 'playlist',
1209 'id': url,
1210 'title': playlist_title,
1211 'description': playlist_desc,
1212 'entries': entries,
1213 }
1214
c8e9a235
PH
1215 def _extract_camtasia(self, url, video_id, webpage):
1216 """ Returns None if no camtasia video can be found. """
1217
1218 camtasia_cfg = self._search_regex(
1219 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1220 webpage, 'camtasia configuration file', default=None)
1221 if camtasia_cfg is None:
1222 return None
1223
1224 title = self._html_search_meta('DC.title', webpage, fatal=True)
1225
1226 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1227 camtasia_cfg = self._download_xml(
1228 camtasia_url, video_id,
1229 note='Downloading camtasia configuration',
1230 errnote='Failed to download camtasia configuration')
1231 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1232
1233 entries = []
1234 for n in fileset_node.getchildren():
1235 url_n = n.find('./uri')
1236 if url_n is None:
1237 continue
1238
1239 entries.append({
1240 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1241 'title': '%s - %s' % (title, n.tag),
1242 'url': compat_urlparse.urljoin(url, url_n.text),
1243 'duration': float_or_none(n.find('./duration').text),
1244 })
1245
1246 return {
1247 '_type': 'playlist',
1248 'entries': entries,
1249 'title': title,
1250 }
1251
9b122384 1252 def _real_extract(self, url):
ebd3c7b3
PH
1253 if url.startswith('//'):
1254 return {
1255 '_type': 'url',
20991253 1256 'url': self.http_scheme() + url,
ebd3c7b3
PH
1257 }
1258
a7130543
JMF
1259 parsed_url = compat_urlparse.urlparse(url)
1260 if not parsed_url.scheme:
04b4d394
PH
1261 default_search = self._downloader.params.get('default_search')
1262 if default_search is None:
1f7ccb90 1263 default_search = 'fixup_error'
04b4d394 1264
1f7ccb90 1265 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
1266 if '/' in url:
1267 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1268 return self.url_result('http://' + url)
1f7ccb90 1269 elif default_search != 'fixup_error':
9c1fc022 1270 if default_search == 'auto_warning':
0e67ab0d
PH
1271 if re.match(r'^(?:url|URL)$', url):
1272 raise ExtractorError(
1273 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
1274 expected=True)
1275 else:
1276 self._downloader.report_warning(
7571c02c 1277 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 1278 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
1279
1280 if default_search in ('error', 'fixup_error'):
7571c02c 1281 raise ExtractorError(
b74e86f4
PH
1282 '%r is not a valid URL. '
1283 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
1284 % (url, url), expected=True)
04b4d394 1285 else:
f2f2c0c2
PH
1286 if ':' not in default_search:
1287 default_search += ':'
04b4d394 1288 return self.url_result(default_search + url)
4d54ef20
PH
1289
1290 url, smuggled_data = unsmuggle_url(url)
1291 force_videoid = None
d6e6a422 1292 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
1293 if smuggled_data and 'force_videoid' in smuggled_data:
1294 force_videoid = smuggled_data['force_videoid']
1295 video_id = force_videoid
1296 else:
1ddb9456 1297 video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
a7130543 1298
79649588 1299 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 1300
ebab4520 1301 head_req = HEADRequest(url)
23be51d8 1302 head_response = self._request_webpage(
ebab4520
PH
1303 head_req, video_id,
1304 note=False, errnote='Could not send HEAD request to %s' % url,
1305 fatal=False)
42393ce2 1306
23be51d8 1307 if head_response is not False:
42393ce2 1308 # Check for redirect
23be51d8 1309 new_url = head_response.geturl()
42393ce2
PH
1310 if url != new_url:
1311 self.report_following_redirect(new_url)
4d54ef20
PH
1312 if force_videoid:
1313 new_url = smuggle_url(
1314 new_url, {'force_videoid': force_videoid})
cecaaf3f 1315 return self.url_result(new_url)
42393ce2 1316
23be51d8
PH
1317 full_response = None
1318 if head_response is False:
5c2266df 1319 request = sanitized_Request(url)
58bde34a
S
1320 request.add_header('Accept-Encoding', '*')
1321 full_response = self._request_webpage(request, video_id)
23be51d8
PH
1322 head_response = full_response
1323
f930e0c7
S
1324 info_dict = {
1325 'id': video_id,
1326 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
303dcdb9 1327 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
f930e0c7
S
1328 }
1329
23be51d8 1330 # Check for direct link to a video
955737b2 1331 content_type = head_response.headers.get('Content-Type', '').lower()
263eff95 1332 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
23be51d8 1333 if m:
f930e0c7
S
1334 format_id = m.group('format_id')
1335 if format_id.endswith('mpegurl'):
eadc3ccd 1336 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
f930e0c7
S
1337 elif format_id == 'f4m':
1338 formats = self._extract_f4m_formats(url, video_id)
eadc3ccd 1339 else:
1340 formats = [{
1341 'format_id': m.group('format_id'),
1342 'url': url,
1343 'vcodec': 'none' if m.group('type') == 'audio' else None
1344 }]
de6c51e8 1345 info_dict['direct'] = True
19dbaeec 1346 self._sort_formats(formats)
de6c51e8 1347 info_dict['formats'] = formats
f930e0c7 1348 return info_dict
42393ce2 1349
d6e6a422 1350 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
1351 force = self._downloader.params.get('force_generic_extractor', False)
1352 self._downloader.report_warning(
1353 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 1354
4e262a88 1355 if not full_response:
5c2266df 1356 request = sanitized_Request(url)
58bde34a
S
1357 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1358 # making it impossible to download only chunk of the file (yet we need only 512kB to
1359 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1360 # that will always result in downloading the whole file that is not desirable.
1361 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1362 # to accept raw bytes and being able to download only a chunk.
1363 # It may probably better to solve this by checking Content-Type for application/octet-stream
1364 # after HEAD request finishes, but not sure if we can rely on this.
1365 request.add_header('Accept-Encoding', '*')
1366 full_response = self._request_webpage(request, video_id)
4e262a88 1367
5940862d
S
1368 first_bytes = full_response.read(512)
1369
1370 # Is it an M3U playlist?
0d769bcb 1371 if first_bytes.startswith(b'#EXTM3U'):
5940862d 1372 info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
19dbaeec 1373 self._sort_formats(info_dict['formats'])
5940862d
S
1374 return info_dict
1375
4e262a88
PH
1376 # Maybe it's a direct link to a video?
1377 # Be careful not to download the whole thing!
61ca9a80 1378 if not is_html(first_bytes):
4e262a88
PH
1379 self._downloader.report_warning(
1380 'URL could be a direct video link, returning it as such.')
f930e0c7 1381 info_dict.update({
4e262a88
PH
1382 'direct': True,
1383 'url': url,
f930e0c7
S
1384 })
1385 return info_dict
4e262a88
PH
1386
1387 webpage = self._webpage_read_content(
1388 full_response, url, video_id, prefix=first_bytes)
1389
9b122384 1390 self.report_extraction(video_id)
887c6acd 1391
1b840245 1392 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
4fc946b5 1393 try:
f7854627 1394 doc = compat_etree_fromstring(webpage.encode('utf-8'))
4fc946b5
PH
1395 if doc.tag == 'rss':
1396 return self._extract_rss(url, video_id, doc)
e5e8d20a 1397 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
19dbaeec
S
1398 smil = self._parse_smil(doc, url, video_id)
1399 self._sort_formats(smil['formats'])
1400 return smil
729accb4
S
1401 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1402 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1b840245 1403 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
f930e0c7
S
1404 info_dict['formats'] = self._parse_mpd_formats(
1405 doc, video_id, mpd_base_url=url.rpartition('/')[0])
19dbaeec 1406 self._sort_formats(info_dict['formats'])
f930e0c7
S
1407 return info_dict
1408 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1409 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
19dbaeec 1410 self._sort_formats(info_dict['formats'])
f930e0c7 1411 return info_dict
f7300c5c 1412 except compat_xml_parse_error:
4fc946b5
PH
1413 pass
1414
c8e9a235
PH
1415 # Is it a Camtasia project?
1416 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1417 if camtasia_res is not None:
1418 return camtasia_res
1419
14390730
S
1420 # Sometimes embedded video player is hidden behind percent encoding
1421 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1422 # Unescaping the whole page allows to handle those cases in a generic way
45eedbe5 1423 webpage = compat_urllib_parse_unquote(webpage)
1f7659db 1424
887c6acd
PH
1425 # it's tempting to parse this further, but you would
1426 # have to take into account all the variations like
1427 # Video Title - Site Name
1428 # Site Name | Video Title
1429 # Video Title - Tagline | Site Name
1430 # and so on and so forth; it's just not practical
6f41b2bc
S
1431 video_title = self._og_search_title(
1432 webpage, default=None) or self._html_search_regex(
79649588
PH
1433 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1434 default='video')
ef4fd848 1435
4d805e06
PH
1436 # Try to detect age limit automatically
1437 age_limit = self._rta_search(webpage)
1438 # And then there are the jokers who advertise that they use RTA,
1439 # but actually don't.
1440 AGE_LIMIT_MARKERS = [
1441 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1442 ]
1443 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1444 age_limit = 18
1445
ef4fd848
PH
1446 # video uploader is domain name
1447 video_uploader = self._search_regex(
79649588 1448 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1449
6f41b2bc
S
1450 video_description = self._og_search_description(webpage, default=None)
1451 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1452
ed2d6a19 1453 # Helper method
83992676 1454 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1455 urlrs = orderedSet(
83992676 1456 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1457 for m in matches)
ed2d6a19
PH
1458 return self.playlist_result(
1459 urlrs, playlist_id=video_id, playlist_title=video_title)
1460
1f4b722b 1461 # Look for Brightcove Legacy Studio embeds
4fcaa4f4 1462 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
99877772 1463 if bc_urls:
79649588 1464 self.to_screen('Brightcove video detected.')
99877772
PH
1465 entries = [{
1466 '_type': 'url',
1467 'url': smuggle_url(bc_url, {'Referer': url}),
3b7d9aa4 1468 'ie_key': 'BrightcoveLegacy'
99877772
PH
1469 } for bc_url in bc_urls]
1470
1471 return {
1472 '_type': 'playlist',
1473 'title': video_title,
1474 'id': video_id,
1475 'entries': entries,
1476 }
cfe50f04 1477
f6519f89
S
1478 # Look for Brightcove New Studio embeds
1479 bc_urls = BrightcoveNewIE._extract_urls(webpage)
1480 if bc_urls:
1481 return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
ed126900 1482
59b8ab58
PH
1483 # Look for embedded rtl.nl player
1484 matches = re.findall(
97b570a9 1485 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1486 webpage)
1487 if matches:
1488 return _playlist_from_matches(matches, ie='RtlNl')
1489
b407e173
YCH
1490 vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1491 if vimeo_url is not None:
1492 return self.url_result(vimeo_url)
7115ca84 1493
a1b85269
YCH
1494 vid_me_embed_url = self._search_regex(
1495 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1496 webpage, 'vid.me embed', default=None)
1497 if vid_me_embed_url is not None:
1498 return self.url_result(vid_me_embed_url, 'Vidme')
1499
53c1d3ef 1500 # Look for embedded YouTube player
1f9da904 1501 matches = re.findall(r'''(?x)
2b88feed
PH
1502 (?:
1503 <iframe[^>]+?src=|
c71dfccc 1504 data-video-url=|
2b88feed 1505 <embed[^>]+?src=|
a7e97f6d
PH
1506 embedSWF\(?:\s*|
1507 new\s+SWFObject\(
2b88feed
PH
1508 )
1509 (["\'])
1bf5423e 1510 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1511 (?:embed|v|p)/.+?)
1f9da904 1512 \1''', webpage)
887c6acd 1513 if matches:
ed2d6a19 1514 return _playlist_from_matches(
3b2f933b 1515 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1516
65f3a228
PH
1517 # Look for lazyYT YouTube embed
1518 matches = re.findall(
1519 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1520 if matches:
1521 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1522
355e4fd0
PH
1523 # Look for embedded Dailymotion player
1524 matches = re.findall(
5a490592 1525 r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
355e4fd0 1526 if matches:
ed2d6a19
PH
1527 return _playlist_from_matches(
1528 matches, lambda m: unescapeHTML(m[1]))
355e4fd0 1529
8489578d
NJ
1530 # Look for embedded Dailymotion playlist player (#3822)
1531 m = re.search(
1532 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1533 if m:
1534 playlists = re.findall(
1535 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1536 if playlists:
1537 return _playlist_from_matches(
1538 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1539
ef4fd848
PH
1540 # Look for embedded Wistia player
1541 match = re.search(
281d3f1d 1542 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1543 if match:
9471c444
NJ
1544 embed_url = self._proto_relative_url(
1545 unescapeHTML(match.group('url')))
ef4fd848
PH
1546 return {
1547 '_type': 'url_transparent',
9471c444 1548 'url': embed_url,
ef4fd848
PH
1549 'ie_key': 'Wistia',
1550 'uploader': video_uploader,
1551 'title': video_title,
1552 'id': video_id,
1553 }
5f6a1245 1554
9471c444 1555 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1556 if match:
1557 return {
1558 '_type': 'url_transparent',
1559 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1560 'ie_key': 'Wistia',
1561 'uploader': video_uploader,
1562 'title': video_title,
1563 'id': match.group('id')
1564 }
ef4fd848 1565
aa5957ac
S
1566 match = re.search(
1567 r'''(?sx)
1568 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1569 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1570 ''', webpage)
1571 if match:
1572 return self.url_result(self._proto_relative_url(
1573 'wistia:%s' % match.group('id')), 'Wistia')
1574
bab19a8e
S
1575 # Look for SVT player
1576 svt_url = SVTIE._extract_url(webpage)
1577 if svt_url:
1578 return self.url_result(svt_url, 'SVT')
1579
fa35cdad
PH
1580 # Look for embedded condenast player
1581 matches = re.findall(
1582 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1583 webpage)
1584 if matches:
1585 return {
1586 '_type': 'playlist',
1587 'entries': [{
1588 '_type': 'url',
1589 'ie_key': 'CondeNast',
1590 'url': ma,
1591 } for ma in matches],
1592 'title': video_title,
1593 'id': video_id,
1594 }
1595
c19f7764
JMF
1596 # Look for Bandcamp pages with custom domain
1597 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1598 if mobj is not None:
1599 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1600 # Don't set the extractor because it can be a track url or an album
1601 return self.url_result(burl)
c19f7764 1602
f25571ff
PH
1603 # Look for embedded Vevo player
1604 mobj = re.search(
1605 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1606 if mobj is not None:
1607 return self.url_result(mobj.group('url'))
796df3c6
S
1608
1609 # Look for embedded Viddler player
cb454b33
S
1610 mobj = re.search(
1611 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1612 webpage)
796df3c6
S
1613 if mobj is not None:
1614 return self.url_result(mobj.group('url'))
f25571ff 1615
3378d67a
S
1616 # Look for NYTimes player
1617 mobj = re.search(
1618 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1619 webpage)
1620 if mobj is not None:
1621 return self.url_result(mobj.group('url'))
1622
cefdf970
S
1623 # Look for Libsyn player
1624 mobj = re.search(
1625 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1626 if mobj is not None:
1627 return self.url_result(mobj.group('url'))
1628
c0d0b01f 1629 # Look for Ooyala videos
8a37aa15 1630 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1631 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1632 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1633 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1634 if mobj is not None:
cce9d15d 1635 return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
c0d0b01f 1636
f076b638 1637 # Look for multiple Ooyala embeds on SBN network websites
1638 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1639 if mobj is not None:
1640 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1641 if embeds:
1642 return _playlist_from_matches(
cce9d15d 1643 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
f076b638 1644
aa94a6d3 1645 # Look for Aparat videos
48099643 1646 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
1647 if mobj is not None:
1648 return self.url_result(mobj.group(1), 'Aparat')
1649
c93c2ab1 1650 # Look for MPORA videos
c3f51436 1651 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
1652 if mobj is not None:
1653 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 1654
15c0e8e7 1655 # Look for embedded NovaMov-based player
8f89e687 1656 mobj = re.search(
8dfa187b 1657 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
1658 (?P<url>http://(?:(?:embed|www)\.)?
1659 (?:novamov\.com|
1660 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1661 videoweed\.(?:es|com)|
1662 movshare\.(?:net|sx|ag)|
1663 divxstage\.(?:eu|net|ch|co|at|ag))
1664 /embed\.php.+?)\1''', webpage)
8f89e687 1665 if mobj is not None:
15c0e8e7 1666 return self.url_result(mobj.group('url'))
50f56607 1667
9834872b
PH
1668 # Look for embedded Facebook player
1669 mobj = re.search(
db1f3888 1670 r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
9834872b
PH
1671 if mobj is not None:
1672 return self.url_result(mobj.group('url'), 'Facebook')
1673
ca97a56e
S
1674 # Look for embedded VK player
1675 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1676 if mobj is not None:
1677 return self.url_result(mobj.group('url'), 'VK')
1678
33d4fdab
S
1679 # Look for embedded Odnoklassniki player
1680 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1681 if mobj is not None:
1682 return self.url_result(mobj.group('url'), 'Odnoklassniki')
1683
0364fa8b
S
1684 # Look for embedded ivi player
1685 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1686 if mobj is not None:
1687 return self.url_result(mobj.group('url'), 'Ivi')
1688
db1f3888
PH
1689 # Look for embedded Huffington Post player
1690 mobj = re.search(
c3f51436 1691 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
1692 if mobj is not None:
1693 return self.url_result(mobj.group('url'), 'HuffPost')
1694
1b86cc41 1695 # Look for embed.ly
1696 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1697 if mobj is not None:
1698 return self.url_result(mobj.group('url'))
1699 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1700 if mobj is not None:
f7e6f7fa 1701 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 1702
60cc4dc4
PH
1703 # Look for funnyordie embed
1704 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1705 if matches:
ed2d6a19
PH
1706 return _playlist_from_matches(
1707 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 1708
db546cf8
S
1709 # Look for BBC iPlayer embed
1710 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1711 if matches:
476eae0c 1712 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 1713
93d020dd
S
1714 # Look for embedded RUTV player
1715 rutv_url = RUTVIE._extract_url(webpage)
1716 if rutv_url:
1717 return self.url_result(rutv_url, 'RUTV')
1718
494f20cb 1719 # Look for embedded TVC player
b8599718
S
1720 tvc_url = TVCIE._extract_url(webpage)
1721 if tvc_url:
1722 return self.url_result(tvc_url, 'TVC')
494f20cb 1723
d40a3b5b
S
1724 # Look for embedded SportBox player
1725 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1726 if sportbox_urls:
1727 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1728
78e2b74b 1729 # Look for embedded PornHub player
65d161c4
S
1730 pornhub_url = PornHubIE._extract_url(webpage)
1731 if pornhub_url:
1732 return self.url_result(pornhub_url, 'PornHub')
1733
2bb5b6d0
S
1734 # Look for embedded XHamster player
1735 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1736 if xhamster_urls:
1737 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1738
2c9ca782
S
1739 # Look for embedded TNAFlixNetwork player
1740 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1741 if tnaflix_urls:
1742 return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1743
9872d311
S
1744 # Look for embedded Tvigle player
1745 mobj = re.search(
1746 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1747 if mobj is not None:
1748 return self.url_result(mobj.group('url'), 'Tvigle')
1749
7e2ede98
JMF
1750 # Look for embedded TED player
1751 mobj = re.search(
d7cc31b6 1752 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
1753 if mobj is not None:
1754 return self.url_result(mobj.group('url'), 'TED')
1755
5c386252 1756 # Look for embedded Ustream videos
1757 mobj = re.search(
1758 r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1759 if mobj is not None:
1760 return self.url_result(mobj.group('url'), 'Ustream')
1761
893f8832
PH
1762 # Look for embedded arte.tv player
1763 mobj = re.search(
134c207e 1764 r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
893f8832
PH
1765 webpage)
1766 if mobj is not None:
1767 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1768
cbd55ade
S
1769 # Look for embedded francetv player
1770 mobj = re.search(
1771 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1772 webpage)
1773 if mobj is not None:
1774 return self.url_result(mobj.group('url'))
1775
cb3ac1c6
S
1776 # Look for embedded smotri.com player
1777 smotri_url = SmotriIE._extract_url(webpage)
1778 if smotri_url:
1779 return self.url_result(smotri_url, 'Smotri')
1780
e6c2d9ad 1781 # Look for embedded Myvi.ru player
6dd94d3a 1782 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
1783 if myvi_url:
1784 return self.url_result(myvi_url)
1785
dfb1b146 1786 # Look for embedded soundcloud player
20991253 1787 mobj = re.search(
ac645ac7 1788 r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
20991253
PH
1789 webpage)
1790 if mobj is not None:
1791 url = unescapeHTML(mobj.group('url'))
1792 return self.url_result(url)
1793
826ec77f
PH
1794 # Look for embedded vulture.com player
1795 mobj = re.search(
1796 r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1797 webpage)
1798 if mobj is not None:
1799 url = unescapeHTML(mobj.group('url'))
1800 return self.url_result(url, ie='Vulture')
1801
c5cd249e 1802 # Look for embedded mtvservices player
46fde8a1
S
1803 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1804 if mtvservices_url:
1805 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 1806
49807b4a
S
1807 # Look for embedded yahoo player
1808 mobj = re.search(
1809 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1810 webpage)
1811 if mobj is not None:
1812 return self.url_result(mobj.group('url'), 'Yahoo')
1813
2ef6fcb5
PH
1814 # Look for embedded sbs.com.au player
1815 mobj = re.search(
e98b8e79
PH
1816 r'''(?x)
1817 (?:
1818 <meta\s+property="og:video"\s+content=|
1819 <iframe[^>]+?src=
1820 )
1821 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
1822 webpage)
1823 if mobj is not None:
1824 return self.url_result(mobj.group('url'), 'SBS')
1825
42bdd9d0
PH
1826 # Look for embedded Cinchcast player
1827 mobj = re.search(
1828 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1829 webpage)
1830 if mobj is not None:
1831 return self.url_result(mobj.group('url'), 'Cinchcast')
1832
1a94ff68 1833 mobj = re.search(
5263cdfc 1834 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 1835 webpage)
8001607e
YCH
1836 if not mobj:
1837 mobj = re.search(
1838 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1839 webpage)
1a94ff68
S
1840 if mobj is not None:
1841 return self.url_result(mobj.group('url'), 'MLB')
1842
1419fafd 1843 mobj = re.search(
dd467d33 1844 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
1845 webpage)
1846 if mobj is not None:
1847 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1848
af63fed7
PH
1849 mobj = re.search(
1850 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1851 webpage)
1852 if mobj is not None:
1853 return self.url_result(mobj.group('url'), 'Livestream')
1854
255fca5e
S
1855 # Look for Zapiks embed
1856 mobj = re.search(
1857 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1858 if mobj is not None:
1859 return self.url_result(mobj.group('url'), 'Zapiks')
1860
e3216b82 1861 # Look for Kaltura embeds
6a5d6de1 1862 mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1c31a5b0 1863 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
e3216b82 1864 if mobj is not None:
5b5fae5f
S
1865 return self.url_result(smuggle_url(
1866 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1867 {'source_url': url}), 'Kaltura')
e3216b82 1868
135c9c42
S
1869 # Look for Eagle.Platform embeds
1870 mobj = re.search(
1871 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1872 if mobj is not None:
1873 return self.url_result(mobj.group('url'), 'EaglePlatform')
1874
d47ae7f6
S
1875 # Look for ClipYou (uses Eagle.Platform) embeds
1876 mobj = re.search(
1877 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1878 if mobj is not None:
1879 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1880
f8388757 1881 # Look for Pladform embeds
45dad7ba
S
1882 pladform_url = PladformIE._extract_url(webpage)
1883 if pladform_url:
1884 return self.url_result(pladform_url)
f8388757 1885
ff18735c
S
1886 # Look for Videomore embeds
1887 videomore_url = VideomoreIE._extract_url(webpage)
1888 if videomore_url:
1889 return self.url_result(videomore_url)
1890
2dcc114f
S
1891 # Look for Playwire embeds
1892 mobj = re.search(
1893 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1894 if mobj is not None:
1895 return self.url_result(mobj.group('url'))
1896
ad320e9b
NJ
1897 # Look for 5min embeds
1898 mobj = re.search(
1899 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1900 if mobj is not None:
1901 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1902
18153f1b
S
1903 # Look for Crooks and Liars embeds
1904 mobj = re.search(
1905 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1906 if mobj is not None:
1907 return self.url_result(mobj.group('url'))
1908
a2edf2e7
YCH
1909 # Look for NBC Sports VPlayer embeds
1910 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1911 if nbc_sports_url:
1912 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1913
653789af 1914 # Look for Google Drive embeds
5b251628 1915 google_drive_url = GoogleDriveIE._extract_url(webpage)
653789af 1916 if google_drive_url:
1917 return self.url_result(google_drive_url, 'GoogleDrive')
1918
418c5cc3
YCH
1919 # Look for UDN embeds
1920 mobj = re.search(
c39fd7b1 1921 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
418c5cc3
YCH
1922 if mobj is not None:
1923 return self.url_result(
0a160363 1924 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 1925
2fe1b5bd
YCH
1926 # Look for Senate ISVP iframe
1927 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1928 if senate_isvp_url:
25c3a734 1929 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 1930
756f574e
YCH
1931 # Look for Dailymotion Cloud videos
1932 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1933 if dmcloud_url:
1934 return self.url_result(dmcloud_url, 'DailymotionCloud')
1935
1ac1c4c2
S
1936 # Look for OnionStudios embeds
1937 onionstudios_url = OnionStudiosIE._extract_url(webpage)
1938 if onionstudios_url:
1939 return self.url_result(onionstudios_url)
1940
67167920 1941 # Look for ViewLift embeds
1942 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
1943 if viewlift_url:
1944 return self.url_result(viewlift_url)
eedd20ef 1945
7cb09524 1946 # Look for JWPlatform embeds
1947 jwplatform_url = JWPlatformIE._extract_url(webpage)
1948 if jwplatform_url:
1949 return self.url_result(jwplatform_url, 'JWPlatform')
1950
8ca31a0e 1951 # Look for ScreenwaveMedia embeds
efd712c6 1952 mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
8ca31a0e 1953 if mobj is not None:
efd712c6 1954 return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
8ca31a0e 1955
aecfcd4e
S
1956 # Look for Digiteka embeds
1957 digiteka_url = DigitekaIE._extract_url(webpage)
1958 if digiteka_url:
1959 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
6aeba407 1960
1bf996fa 1961 # Look for Limelight embeds
1962 mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1963 if mobj:
1964 lm = {
1965 'Media': 'media',
1966 'Channel': 'channel',
1967 'ChannelList': 'channel_list',
1968 }
1969 return self.url_result('limelight:%s:%s' % (
1970 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1971
a5158f38
YCH
1972 # Look for AdobeTVVideo embeds
1973 mobj = re.search(
1974 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1975 webpage)
1976 if mobj is not None:
1977 return self.url_result(
1978 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1979 'AdobeTVVideo')
1980
088e1aac
YCH
1981 # Look for Vine embeds
1982 mobj = re.search(
1983 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
1984 webpage)
1985 if mobj is not None:
1986 return self.url_result(
1987 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
1988
5a51775a
YCH
1989 # Look for Instagram embeds
1990 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
1991 if instagram_embed_url is not None:
11e60fca
S
1992 return self.url_result(
1993 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
5a51775a 1994
b8f67449
KM
1995 # Look for LiveLeak embeds
1996 liveleak_url = LiveLeakIE._extract_url(webpage)
1997 if liveleak_url:
1998 return self.url_result(liveleak_url, 'LiveLeak')
1999
5d39176f
S
2000 # Look for 3Q SDN embeds
2001 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2002 if threeqsdn_url:
6f41b2bc
S
2003 return {
2004 '_type': 'url_transparent',
2005 'ie_key': ThreeQSDNIE.ie_key(),
2006 'url': self._proto_relative_url(threeqsdn_url),
2007 'title': video_title,
2008 'description': video_description,
2009 'thumbnail': video_thumbnail,
2010 'uploader': video_uploader,
2011 }
5d39176f 2012
ced659bb 2013 def check_video(vurl):
a0f71985
PH
2014 if YoutubeIE.suitable(vurl):
2015 return True
ced659bb
S
2016 vpath = compat_urlparse.urlparse(vurl).path
2017 vext = determine_ext(vpath)
2018 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
2019
2020 def filter_video(urls):
2021 return list(filter(check_video, urls))
2022
9b122384 2023 # Start with something easy: JW Player in SWFObject
ced659bb 2024 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 2025 if not found:
d981cef6 2026 # Look for gorilla-vid style embedding
ced659bb 2027 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
2028 (?:
2029 jw_plugins|
2030 JWPlayerOptions|
2031 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2032 )
a0f71985
PH
2033 .*?
2034 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 2035 if not found:
9b122384 2036 # Broaden the search a little bit
ced659bb 2037 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
2038 if not found:
2039 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 2040 found = filter_video(re.findall(
54a9328b 2041 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
2042 if not found:
2043 # Flow player
ced659bb 2044 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
2045 flowplayer\("[^"]+",\s*
2046 \{[^}]+?\}\s*,
52585fd6 2047 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 2048 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 2049 ''', webpage))
501f13fb
PH
2050 if not found:
2051 # Cinerama player
2052 found = re.findall(
2053 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 2054 if not found:
9b122384 2055 # Try to find twitter cards info
ced659bb
S
2056 found = filter_video(re.findall(
2057 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 2058 if not found:
9b122384
PH
2059 # We look for Open Graph info:
2060 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 2061 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
2062 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2063 if m_video_type is not None:
ced659bb 2064 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 2065 if not found:
7fea7156 2066 # HTML5 video
12439dd5 2067 found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
b30b8698 2068 if not found:
ed9a25dd 2069 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 2070 found = re.search(
89ef304b 2071 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 2072 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 2073 webpage)
84f81016
S
2074 if not found:
2075 # Look also in Refresh HTTP header
2076 refresh_header = head_response.headers.get('Refresh')
2077 if refresh_header:
6c91a5a7
S
2078 # In python 2 response HTTP headers are bytestrings
2079 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2080 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 2081 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 2082 if found:
b37317d8 2083 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
89ef304b
PH
2084 self.report_following_redirect(new_url)
2085 return {
2086 '_type': 'url',
2087 'url': new_url,
2088 }
b30b8698 2089 if not found:
416c7fcb 2090 raise UnsupportedError(url)
9b122384 2091
b30b8698
PH
2092 entries = []
2093 for video_url in found:
949b6497 2094 video_url = unescapeHTML(video_url)
6cc37c69 2095 video_url = video_url.replace('\\/', '/')
b30b8698 2096 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 2097 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 2098
b30b8698
PH
2099 # Sometimes, jwplayer extraction will result in a YouTube URL
2100 if YoutubeIE.suitable(video_url):
2101 entries.append(self.url_result(video_url, 'Youtube'))
2102 continue
9b122384 2103
b30b8698
PH
2104 # here's a fun little line of code for you:
2105 video_id = os.path.splitext(video_id)[0]
fc9713a1 2106
28602e74
YCH
2107 entry_info_dict = {
2108 'id': video_id,
2109 'uploader': video_uploader,
2110 'title': video_title,
2111 'age_limit': age_limit,
2112 }
2113
729accb4
S
2114 ext = determine_ext(video_url)
2115 if ext == 'smil':
28602e74 2116 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
729accb4
S
2117 elif ext == 'xspf':
2118 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
750b9ff0
YCH
2119 elif ext == 'm3u8':
2120 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
79a35085
S
2121 elif ext == 'mpd':
2122 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
3f2f4a94
S
2123 elif ext == 'f4m':
2124 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
d6fd958c 2125 else:
28602e74
YCH
2126 entry_info_dict['url'] = video_url
2127
19dbaeec
S
2128 if entry_info_dict.get('formats'):
2129 self._sort_formats(entry_info_dict['formats'])
2130
28602e74 2131 entries.append(entry_info_dict)
b30b8698
PH
2132
2133 if len(entries) == 1:
669f0e7c 2134 return entries[0]
b30b8698
PH
2135 else:
2136 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
2137 # 'url' results don't have a title
2138 if e.get('title') is not None:
2139 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
2140 return {
2141 '_type': 'playlist',
2142 'entries': entries,
2143 }