]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[downloader/f4m] Do not update fragment list while test
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
cfe50f04
JMF
1# encoding: utf-8
2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
f7854627 12 compat_etree_fromstring,
1ddb9456 13 compat_urllib_parse_unquote,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
5c2266df 24 sanitized_Request,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
42393ce2 30 url_basename,
76c73715 31 xpath_text,
9b122384 32)
ed126900 33from .brightcove import (
4fcaa4f4 34 BrightcoveLegacyIE,
5c17f0a6 35 BrightcoveNewIE,
ed126900 36)
a2edf2e7 37from .nbc import NBCSportsVPlayerIE
c0d0b01f 38from .ooyala import OoyalaIE
93d020dd 39from .rutv import RUTVIE
954c1d05 40from .tvc import TVCIE
d40a3b5b 41from .sportbox import SportBoxEmbedIE
cb3ac1c6 42from .smotri import SmotriIE
6dd94d3a 43from .myvi import MyviIE
1419fafd 44from .condenast import CondeNastIE
418c5cc3 45from .udn import UDNEmbedIE
2fe1b5bd 46from .senateisvp import SenateISVPIE
bab19a8e 47from .svt import SVTIE
65d161c4 48from .pornhub import PornHubIE
2bb5b6d0 49from .xhamster import XHamsterEmbedIE
b407e173 50from .vimeo import VimeoIE
756f574e 51from .dailymotion import DailymotionCloudIE
1ac1c4c2 52from .onionstudios import OnionStudiosIE
eedd20ef 53from .snagfilms import SnagFilmsEmbedIE
efd712c6 54from .screenwavemedia import ScreenwaveMediaIE
46fde8a1 55from .mtv import MTVServicesEmbeddedIE
45dad7ba 56from .pladform import PladformIE
ff18735c 57from .videomore import VideomoreIE
5b251628 58from .googledrive import GoogleDriveIE
7cb09524 59from .jwplatform import JWPlatformIE
aecfcd4e 60from .digiteka import DigitekaIE
9b122384 61
0838239e 62
9b122384 63class GenericIE(InfoExtractor):
79649588 64 IE_DESC = 'Generic downloader that works on some sites'
9b122384 65 _VALID_URL = r'.*'
79649588 66 IE_NAME = 'generic'
cfe50f04 67 _TESTS = [
c5fa81fe
S
68 # Direct link to a video
69 {
70 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
71 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
72 'info_dict': {
73 'id': 'trailer',
74 'ext': 'mp4',
75 'title': 'trailer',
76 'upload_date': '20100513',
77 }
78 },
c5138a7c 79 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
80 {
81 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
82 'md5': '128c42e68b13950268b648275386fc74',
83 'info_dict': {
84 'id': 'FictionJunction-Parallel_Hearts',
85 'ext': 'flac',
86 'title': 'FictionJunction-Parallel_Hearts',
87 'upload_date': '20140522',
88 },
89 'expected_warnings': [
90 'URL could be a direct video link, returning it as such.'
91 ]
92 },
93 # Direct download with broken HEAD
94 {
95 'url': 'http://ai-radio.org:8000/radio.opus',
96 'info_dict': {
97 'id': 'radio',
98 'ext': 'opus',
99 'title': 'radio',
100 },
101 'params': {
102 'skip_download': True, # infinite live stream
103 },
104 'expected_warnings': [
105 r'501.*Not Implemented'
106 ],
107 },
108 # Direct link with incorrect MIME type
109 {
110 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
111 'md5': '4ccbebe5f36706d85221f204d7eb5913',
112 'info_dict': {
113 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
114 'id': '5_Lennart_Poettering_-_Systemd',
115 'ext': 'webm',
116 'title': '5_Lennart_Poettering_-_Systemd',
117 'upload_date': '20141120',
118 },
119 'expected_warnings': [
120 'URL could be a direct video link, returning it as such.'
121 ]
122 },
123 # RSS feed
124 {
125 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
126 'info_dict': {
127 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
128 'title': 'Zero Punctuation',
129 'description': 're:.*groundbreaking video review series.*'
130 },
131 'playlist_mincount': 11,
132 },
133 # RSS feed with enclosure
134 {
135 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
136 'info_dict': {
137 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
138 'ext': 'm4v',
139 'upload_date': '20150228',
140 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
141 }
142 },
8765222d
S
143 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
144 {
145 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
146 'info_dict': {
147 'id': 'smil',
148 'ext': 'mp4',
149 'title': 'Automatics, robotics and biocybernetics',
150 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
e327b736 151 'upload_date': '20130627',
8765222d
S
152 'formats': 'mincount:16',
153 'subtitles': 'mincount:1',
154 },
155 'params': {
156 'force_generic_extractor': True,
157 'skip_download': True,
158 },
159 },
160 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
161 {
162 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
163 'info_dict': {
164 'id': 'hds',
165 'ext': 'flv',
166 'title': 'hds',
167 'formats': 'mincount:1',
168 },
169 'params': {
170 'skip_download': True,
171 },
172 },
173 # SMIL from https://www.restudy.dk/video/play/id/1637
174 {
175 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
176 'info_dict': {
177 'id': 'video_1637',
178 'ext': 'flv',
179 'title': 'video_1637',
180 'formats': 'mincount:3',
181 },
182 'params': {
183 'skip_download': True,
184 },
185 },
186 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
187 {
188 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
189 'info_dict': {
190 'id': 'smil-service',
191 'ext': 'flv',
192 'title': 'smil-service',
193 'formats': 'mincount:1',
194 },
195 'params': {
196 'skip_download': True,
197 },
198 },
199 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
200 {
201 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
202 'info_dict': {
203 'id': '4719370',
204 'ext': 'mp4',
205 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
206 'formats': 'mincount:3',
207 },
208 'params': {
209 'skip_download': True,
210 },
211 },
1de5cd3b
S
212 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
213 {
214 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
215 'info_dict': {
216 'id': 'mZlp2ctYIUEB',
217 'ext': 'mp4',
218 'title': 'Tikibad ontruimd wegens brand',
219 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
220 'thumbnail': 're:^https?://.*\.jpg$',
221 'duration': 33,
222 },
223 'params': {
224 'skip_download': True,
225 },
226 },
c5fa81fe
S
227 # google redirect
228 {
229 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
230 'info_dict': {
231 'id': 'cmQHVoWB5FY',
232 'ext': 'mp4',
233 'upload_date': '20130224',
234 'uploader_id': 'TheVerge',
235 'description': 're:^Chris Ziegler takes a look at the\.*',
236 'uploader': 'The Verge',
237 'title': 'First Firefox OS phones side-by-side',
238 },
239 'params': {
240 'skip_download': False,
241 }
242 },
6c91a5a7
S
243 {
244 # redirect in Refresh HTTP header
245 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
246 'info_dict': {
247 'id': 'pO8h3EaFRdo',
248 'ext': 'mp4',
249 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
250 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
251 'upload_date': '20150917',
252 'uploader_id': 'brtvofficial',
253 'uploader': 'Boiler Room',
254 },
255 'params': {
256 'skip_download': False,
257 },
258 },
cfe50f04 259 {
79649588 260 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 261 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 262 'info_dict': {
d360a146
S
263 'id': '13601338388002',
264 'ext': 'mp4',
79649588
PH
265 'uploader': 'www.hodiho.fr',
266 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
267 }
268 },
c19f7764
JMF
269 # bandcamp page with custom domain
270 {
79649588
PH
271 'add_ie': ['Bandcamp'],
272 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 273 'info_dict': {
fd50bf62
S
274 'id': '3235767654',
275 'ext': 'mp3',
79649588
PH
276 'title': 'The Pony Mash',
277 'uploader': 'M_Pallante',
c19f7764 278 },
79649588 279 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 280 },
eeb165e6 281 # embedded brightcove video
dd5bcdc4
JMF
282 # it also tests brightcove videos that need to set the 'Referer' in the
283 # http requests
eeb165e6 284 {
3b7d9aa4 285 'add_ie': ['BrightcoveLegacy'],
79649588
PH
286 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
287 'info_dict': {
288 'id': '2765128793001',
289 'ext': 'mp4',
290 'title': 'Le cours de bourse : l’analyse technique',
291 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
292 'uploader': 'BFM BUSINESS',
eeb165e6 293 },
79649588
PH
294 'params': {
295 'skip_download': True,
eeb165e6
JMF
296 },
297 },
17ab4d3b
PH
298 {
299 # https://github.com/rg3/youtube-dl/issues/2253
300 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
301 'md5': '0ba9446db037002366bab3b3eb30c88c',
302 'info_dict': {
fd50bf62
S
303 'id': '3101154703001',
304 'ext': 'mp4',
17ab4d3b
PH
305 'title': 'Still no power',
306 'uploader': 'thestar.com',
307 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
308 },
3b7d9aa4 309 'add_ie': ['BrightcoveLegacy'],
17ab4d3b 310 },
0479c625
S
311 {
312 'url': 'http://www.championat.com/video/football/v/87/87499.html',
313 'md5': 'fb973ecf6e4a78a67453647444222983',
314 'info_dict': {
315 'id': '3414141473001',
316 'ext': 'mp4',
317 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
318 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
319 'uploader': 'Championat',
320 },
321 },
bdf97017 322 {
37aab278 323 # https://github.com/rg3/youtube-dl/issues/3541
3b7d9aa4 324 'add_ie': ['BrightcoveLegacy'],
bdf97017
NJ
325 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
326 'info_dict': {
327 'id': '3866516442001',
37aab278 328 'ext': 'mp4',
bdf97017
NJ
329 'title': 'Leer mij vrouwen kennen: Aflevering 1',
330 'description': 'Leer mij vrouwen kennen: Aflevering 1',
331 'uploader': 'SBS Broadcasting',
332 },
37aab278 333 'skip': 'Restricted to Netherlands',
bdf97017 334 'params': {
37aab278 335 'skip_download': True, # m3u8 download
bdf97017
NJ
336 },
337 },
c0d0b01f
JMF
338 # ooyala video
339 {
79649588 340 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 341 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
342 'info_dict': {
343 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
344 'ext': 'mp4',
3486df38 345 'title': '2cc213299525360.mov', # that's what we get
53e06b25 346 'duration': 238.231,
c0d0b01f 347 },
87830900 348 'add_ie': ['Ooyala'],
c0d0b01f 349 },
bf94d763
S
350 {
351 # ooyala video embedded with http://player.ooyala.com/iframe.js
352 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
353 'info_dict': {
354 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
355 'ext': 'mp4',
356 'title': '"Steve Jobs: Man in the Machine" trailer',
357 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
53e06b25 358 'duration': 135.427,
bf94d763
S
359 },
360 'params': {
361 'skip_download': True,
362 },
363 },
f076b638 364 # multiple ooyala embeds on SBN network websites
365 {
366 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
367 'info_dict': {
368 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
369 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
370 },
371 'playlist_mincount': 3,
372 'params': {
373 'skip_download': True,
374 },
375 'add_ie': ['Ooyala'],
376 },
1b86cc41 377 # embed.ly video
378 {
379 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
380 'info_dict': {
381 'id': '9ODmcdjQcHQ',
382 'ext': 'mp4',
0a5bce56
PH
383 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
384 'upload_date': '20140225',
385 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
386 'uploader': 'Tested',
387 'uploader_id': 'testedcom',
1b86cc41 388 },
389 # No need to test YoutubeIE here
390 'params': {
391 'skip_download': True,
392 },
393 },
60cc4dc4
PH
394 # funnyordie embed
395 {
396 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
397 'info_dict': {
398 'id': '18e820ec3f',
399 'ext': 'mp4',
400 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
401 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 402 },
60cc4dc4 403 },
93d020dd
S
404 # RUTV embed
405 {
406 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
407 'info_dict': {
408 'id': '776940',
409 'ext': 'mp4',
410 'title': 'Охотское море стало целиком российским',
411 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
412 },
413 'params': {
414 # m3u8 download
415 'skip_download': True,
416 },
aab74fa1 417 },
f37bdbe5
S
418 # TVC embed
419 {
420 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
421 'info_dict': {
422 'id': '55304',
423 'ext': 'mp4',
424 'title': 'Дошкольное воспитание',
425 },
426 },
b827a601
S
427 # SportBox embed
428 {
429 'url': 'http://www.vestifinance.ru/articles/25753',
430 'info_dict': {
431 'id': '25753',
432 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
433 },
434 'playlist': [{
435 'info_dict': {
436 'id': '370908',
437 'title': 'Госзаказ. День 3',
438 'ext': 'mp4',
439 }
440 }, {
441 'info_dict': {
442 'id': '370905',
443 'title': 'Госзаказ. День 2',
444 'ext': 'mp4',
445 }
446 }, {
447 'info_dict': {
448 'id': '370902',
449 'title': 'Госзаказ. День 1',
450 'ext': 'mp4',
451 }
452 }],
453 'params': {
454 # m3u8 download
455 'skip_download': True,
456 },
457 },
bf20b9c5
S
458 # Myvi.ru embed
459 {
460 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
461 'info_dict': {
462 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
463 'ext': 'mp4',
464 'title': 'Ужастики, русский трейлер (2015)',
465 'thumbnail': 're:^https?://.*\.jpg$',
466 'duration': 153,
467 }
468 },
c76799c5
S
469 # XHamster embed
470 {
471 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
472 'info_dict': {
473 'id': 'showthread',
474 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
475 },
476 'playlist_mincount': 7,
477 },
aab74fa1
PH
478 # Embedded TED video
479 {
480 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 481 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 482 'info_dict': {
a8eb5a8e 483 'id': '1969',
aab74fa1 484 'ext': 'mp4',
a8eb5a8e
PH
485 'title': 'Hidden miracles of the natural world',
486 'uploader': 'Louie Schwartzberg',
487 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 488 }
60cc4dc4 489 },
dfb1b146 490 # Embedded Ustream video
5c386252 491 {
492 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
493 'md5': '27b99cdb639c9b12a79bca876a073417',
494 'info_dict': {
ca6aada4 495 'id': '45734260',
496 'ext': 'flv',
497 'uploader': 'AU SPA: The NSA and Privacy',
5c386252 498 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
499 }
500 },
d95e35d6
S
501 # nowvideo embed hidden behind percent encoding
502 {
503 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
504 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
505 'info_dict': {
506 'id': '06e53103ca9aa',
507 'ext': 'flv',
508 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
509 'description': 'No description',
510 },
0f2a2ba1 511 },
893f8832
PH
512 # arte embed
513 {
514 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
515 'md5': '7653032cbb25bf6c80d80f217055fa43',
516 'info_dict': {
517 'id': '048195-004_PLUS7-F',
518 'ext': 'flv',
519 'title': 'X:enius',
520 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
521 'upload_date': '20140320',
522 },
523 'params': {
524 'skip_download': 'Requires rtmpdump'
525 }
526 },
cbd55ade
S
527 # francetv embed
528 {
529 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
530 'info_dict': {
531 'id': 'EV_30231',
532 'ext': 'mp4',
533 'title': 'Alcaline, le concert avec Calogero',
534 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
535 'upload_date': '20150226',
536 'timestamp': 1424989860,
537 'duration': 5400,
538 },
539 'params': {
540 # m3u8 downloads
541 'skip_download': True,
542 },
543 'expected_warnings': [
544 'Forbidden'
545 ]
546 },
fa35cdad
PH
547 # Condé Nast embed
548 {
549 'url': 'http://www.wired.com/2014/04/honda-asimo/',
550 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
551 'info_dict': {
552 'id': '53501be369702d3275860000',
553 'ext': 'mp4',
554 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
555 }
ebd3c7b3
PH
556 },
557 # Dailymotion embed
558 {
559 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
560 'md5': '441aeeb82eb72c422c7f14ec533999cd',
561 'info_dict': {
562 'id': 'k2mm4bCdJ6CQ2i7c8o2',
563 'ext': 'mp4',
564 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
565 'uploader': 'Spi0n',
566 },
567 'add_ie': ['Dailymotion'],
2b88feed
PH
568 },
569 # YouTube embed
570 {
571 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
572 'info_dict': {
573 'id': 'FXRb4ykk4S0',
574 'ext': 'mp4',
575 'title': 'The NBL Auction 2014',
576 'uploader': 'BADMINTON England',
577 'uploader_id': 'BADMINTONEvents',
578 'upload_date': '20140603',
579 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
580 },
581 'add_ie': ['Youtube'],
582 'params': {
583 'skip_download': True,
584 }
585 },
c5cd249e
JMF
586 # MTVSercices embed
587 {
588 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
589 'md5': '35727f82f58c76d996fc188f9755b0d5',
590 'info_dict': {
591 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
592 'ext': 'mp4',
593 'title': 'Review',
594 'description': 'Mario\'s life in the fast lane has never looked so good.',
595 },
596 },
61013473 597 # YouTube embed via <data-embed-url="">
598 {
599 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 600 'info_dict': {
a8eb5a8e 601 'id': '4vAffPZIT44',
61013473 602 'ext': 'mp4',
a8eb5a8e 603 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
604 'uploader': 'Gameloft',
605 'uploader_id': 'gameloft',
a8eb5a8e
PH
606 'upload_date': '20140828',
607 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
608 },
609 'params': {
610 'skip_download': True,
61013473 611 }
c8e9a235
PH
612 },
613 # Camtasia studio
614 {
615 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
616 'playlist': [{
617 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
618 'info_dict': {
619 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
620 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
621 'ext': 'flv',
622 'duration': 2235.90,
623 }
624 }, {
625 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
626 'info_dict': {
627 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
628 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
629 'ext': 'flv',
630 'duration': 2235.93,
631 }
632 }],
633 'info_dict': {
634 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
635 }
4d805e06
PH
636 },
637 # Flowplayer
638 {
639 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
640 'md5': '9d65602bf31c6e20014319c7d07fba27',
641 'info_dict': {
642 'id': '5123ea6d5e5a7',
643 'ext': 'mp4',
644 'age_limit': 18,
645 'uploader': 'www.handjobhub.com',
d6d9186f 646 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 647 }
0990305d 648 },
22a6f150
PH
649 # Multiple brightcove videos
650 # https://github.com/rg3/youtube-dl/issues/2283
651 {
652 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
653 'info_dict': {
654 'id': 'always-never',
655 'title': 'Always / Never - The New Yorker',
656 },
657 'playlist_count': 3,
658 'params': {
659 'extract_flat': False,
660 'skip_download': True,
661 }
1a94ff68
S
662 },
663 # MLB embed
664 {
665 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
666 'md5': '96f09a37e44da40dd083e12d9a683327',
667 'info_dict': {
668 'id': '33322633',
669 'ext': 'mp4',
670 'title': 'Ump changes call to ball',
671 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
672 'duration': 48,
673 'timestamp': 1401537900,
674 'upload_date': '20140531',
675 'thumbnail': 're:^https?://.*\.jpg$',
676 },
677 },
746c67d7
NJ
678 # Wistia embed
679 {
680 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
681 'md5': '8788b683c777a5cf25621eaf286d0c23',
682 'info_dict': {
683 'id': '1cfaf6b7ea',
684 'ext': 'mov',
685 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
686 'duration': 643.0,
687 'filesize': 182808282,
688 'uploader': 'education-portal.com',
689 },
690 },
52cffcb1 691 {
692 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
693 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
694 'info_dict': {
695 'id': 'uxjb0lwrcz',
696 'ext': 'mp4',
85d7b765 697 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
52cffcb1 698 'duration': 1715.0,
85d7b765 699 'uploader': 'thoughtworks.wistia.com',
70b7e3fb 700 },
52cffcb1 701 },
ac645ac7
PH
702 # Soundcloud embed
703 {
704 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
705 'info_dict': {
706 'id': '174391317',
707 'ext': 'mp3',
708 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
709 'uploader': 'Sophos Security',
710 'title': 'Chet Chat 171 - Oct 29, 2014',
711 'upload_date': '20141029',
712 }
af63fed7
PH
713 },
714 # Livestream embed
715 {
716 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
717 'info_dict': {
718 'id': '67864563',
719 'ext': 'flv',
720 'upload_date': '20141112',
721 'title': 'Rosetta #CometLanding webcast HL 10',
722 }
723 },
65f3a228
PH
724 # LazyYT
725 {
726 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
727 'info_dict': {
11e611a7 728 'id': '1986',
65f3a228
PH
729 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
730 },
731 'playlist_mincount': 2,
4e262a88 732 },
42bdd9d0
PH
733 # Cinchcast embed
734 {
735 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
736 'info_dict': {
737 'id': '7141703',
738 'ext': 'mp3',
739 'upload_date': '20141126',
740 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
741 }
742 },
501f13fb
PH
743 # Cinerama player
744 {
745 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
746 'info_dict': {
747 'id': '730m_DandD_1901_512k',
748 'ext': 'mp4',
749 'uploader': 'www.abc.net.au',
750 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
751 }
796df3c6
S
752 },
753 # embedded viddler video
754 {
755 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
756 'info_dict': {
757 'id': '4d03aad9',
758 'ext': 'mp4',
759 'uploader': 'deadspin',
760 'title': 'WALL-TO-GORTAT',
761 'timestamp': 1422285291,
762 'upload_date': '20150126',
763 },
764 'add_ie': ['Viddler'],
a0f71985 765 },
2051acde
S
766 # Libsyn embed
767 {
768 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
769 'info_dict': {
770 'id': '3377616',
771 'ext': 'mp3',
772 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
773 'description': 'md5:601cb790edd05908957dae8aaa866465',
774 'upload_date': '20150220',
775 },
776 },
a0f71985
PH
777 # jwplayer YouTube
778 {
779 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
780 'info_dict': {
781 'id': 'Mrj4DVp2zeA',
782 'ext': 'mp4',
f37e3f99 783 'upload_date': '20150212',
a0f71985
PH
784 'uploader': 'The National Archives UK',
785 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
786 'uploader_id': 'NationalArchives08',
787 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
788 },
59b8ab58
PH
789 },
790 # rtl.nl embed
791 {
792 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
793 'playlist_mincount': 5,
794 'info_dict': {
795 'id': 'aanslagen-kopenhagen',
796 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
797 }
255fca5e
S
798 },
799 # Zapiks embed
800 {
801 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
802 'info_dict': {
803 'id': '118046',
804 'ext': 'mp4',
805 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
806 }
807 },
e3216b82
NJ
808 # Kaltura embed
809 {
810 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
811 'info_dict': {
812 'id': '1_eergr3h1',
813 'ext': 'mp4',
814 'upload_date': '20150226',
815 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
816 'timestamp': int,
817 'title': 'John Carlson Postgame 2/25/15',
818 },
819 },
66e568de
S
820 # Kaltura embed (different embed code)
821 {
822 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
823 'info_dict': {
824 'id': '1_a52wc67y',
825 'ext': 'flv',
826 'upload_date': '20150127',
827 'uploader_id': 'PremierMedia',
828 'timestamp': int,
829 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
830 },
831 },
6da620de
S
832 # Kaltura embed protected with referrer
833 {
834 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
835 'info_dict': {
836 'id': '1_g4fbemnq',
837 'ext': 'mp4',
838 'title': 'Violetta - Achter De Schermen - Ruggero',
839 'description': 'Achter de schermen met Ruggero',
840 'timestamp': 1435133761,
841 'upload_date': '20150624',
842 'uploader_id': 'echojecka',
843 },
844 },
135c9c42
S
845 # Eagle.Platform embed (generic URL)
846 {
847 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
848 'info_dict': {
849 'id': '227304',
850 'ext': 'mp4',
851 'title': 'Навальный вышел на свободу',
852 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
853 'thumbnail': 're:^https?://.*\.jpg$',
854 'duration': 87,
855 'view_count': int,
856 'age_limit': 0,
857 },
858 },
d47ae7f6
S
859 # ClipYou (Eagle.Platform) embed (custom URL)
860 {
861 'url': 'http://muz-tv.ru/play/7129/',
862 'info_dict': {
863 'id': '12820',
864 'ext': 'mp4',
865 'title': "'O Sole Mio",
866 'thumbnail': 're:^https?://.*\.jpg$',
867 'duration': 216,
868 'view_count': int,
869 },
870 },
f8388757
S
871 # Pladform embed
872 {
873 'url': 'http://muz-tv.ru/kinozal/view/7400/',
874 'info_dict': {
875 'id': '100183293',
876 'ext': 'mp4',
62259846 877 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757
S
878 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
879 'thumbnail': 're:^https?://.*\.jpg$',
880 'duration': 694,
881 'age_limit': 0,
882 },
883 },
c798f15b
S
884 # Playwire embed
885 {
886 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
887 'info_dict': {
888 'id': '3519514',
889 'ext': 'mp4',
890 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
891 'thumbnail': 're:^https?://.*\.png$',
892 'duration': 45.115,
893 },
894 },
ad320e9b
NJ
895 # 5min embed
896 {
897 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
898 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
899 'info_dict': {
900 'id': '518726732',
901 'ext': 'mp4',
902 'title': 'Facebook Creates "On This Day" | Crunch Report',
903 },
904 },
dc455a5f
S
905 # SVT embed
906 {
907 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
908 'info_dict': {
909 'id': '2900353',
910 'ext': 'flv',
911 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
912 'duration': 27,
913 'age_limit': 0,
914 },
915 },
a4257017
S
916 # Crooks and Liars embed
917 {
918 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
919 'info_dict': {
920 'id': '8RUoRhRi',
921 'ext': 'mp4',
922 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
923 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
924 'timestamp': 1428207000,
925 'upload_date': '20150405',
926 'uploader': 'Heather',
927 },
928 },
929 # Crooks and Liars external embed
930 {
931 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
932 'info_dict': {
933 'id': 'MTE3MjUtMzQ2MzA',
934 'ext': 'mp4',
935 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
936 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
937 'timestamp': 1265032391,
938 'upload_date': '20100201',
939 'uploader': 'Heather',
940 },
941 },
facecb84 942 # NBC Sports vplayer embed
a2edf2e7 943 {
facecb84 944 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 945 'info_dict': {
facecb84
S
946 'id': 'ln7x1qSThw4k',
947 'ext': 'flv',
948 'title': "PFT Live: New leader in the 'new-look' defense",
949 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
a2edf2e7 950 },
418c5cc3
YCH
951 },
952 # UDN embed
953 {
954 'url': 'http://www.udn.com/news/story/7314/822787',
01c58f84 955 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 956 'info_dict': {
01c58f84 957 'id': '300346',
418c5cc3 958 'ext': 'mp4',
01c58f84 959 'title': '中一中男師變性 全校師生力挺',
418c5cc3
YCH
960 'thumbnail': 're:^https?://.*\.jpg$',
961 }
edfcf7ab
YCH
962 },
963 # Ooyala embed
964 {
965 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
966 'info_dict': {
967 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
968 'ext': 'mp4',
cce9d15d 969 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
edfcf7ab 970 'title': 'This is what separates the Excel masters from the wannabes',
53e06b25 971 'duration': 191.933,
edfcf7ab
YCH
972 },
973 'params': {
974 # m3u8 downloads
975 'skip_download': True,
976 }
d6fd958c
YCH
977 },
978 # Contains a SMIL manifest
979 {
980 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
981 'info_dict': {
982 'id': 'file',
983 'ext': 'flv',
984 'title': '+ Football: Lottery Champions League Europe',
985 'uploader': 'www.telewebion.com',
986 },
987 'params': {
988 # rtmpe downloads
989 'skip_download': True,
990 }
b26733ba
YCH
991 },
992 # Brightcove URL in single quotes
993 {
994 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
995 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
996 'info_dict': {
997 'id': '4255764656001',
998 'ext': 'mp4',
999 'title': 'SN Presents: Russell Martin, World Citizen',
1000 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1001 'uploader': 'Rogers Sportsnet',
1002 },
756f574e
YCH
1003 },
1004 # Dailymotion Cloud video
1005 {
1006 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1007 'md5': '49444254273501a64675a7e68c502681',
1008 'info_dict': {
1009 'id': '5585de919473990de4bee11b',
1010 'ext': 'mp4',
1011 'title': 'Le débat',
1012 'thumbnail': 're:^https?://.*\.jpe?g$',
1013 }
a5158f38 1014 },
8084be78
S
1015 # OnionStudios embed
1016 {
1017 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1018 'info_dict': {
1019 'id': '2855',
1020 'ext': 'mp4',
1021 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1022 'thumbnail': 're:^https?://.*\.jpe?g$',
1023 'uploader': 'ClickHole',
1024 'uploader_id': 'clickhole',
1025 }
1026 },
b8c1cc1a
S
1027 # SnagFilms embed
1028 {
1029 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1030 'info_dict': {
1031 'id': '74849a00-85a9-11e1-9660-123139220831',
1032 'ext': 'mp4',
1033 'title': '#whilewewatch',
1034 }
1035 },
a5158f38
YCH
1036 # AdobeTVVideo embed
1037 {
1038 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1039 'md5': '43662b577c018ad707a63766462b1e87',
1040 'info_dict': {
1041 'id': '2456',
1042 'ext': 'mp4',
1043 'title': 'New experience with Acrobat DC',
1044 'description': 'New experience with Acrobat DC',
1045 'duration': 248.667,
1046 },
1f812580
S
1047 },
1048 # ScreenwaveMedia embed
1049 {
1050 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1051 'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1052 'info_dict': {
1053 'id': 'cinemasnob-55d26273809dd',
1054 'ext': 'mp4',
1055 'title': 'cinemasnob',
1056 },
ed126900 1057 },
1058 # BrightcoveInPageEmbed embed
1059 {
1060 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1061 'info_dict': {
1062 'id': '4238694884001',
1063 'ext': 'flv',
1064 'title': 'Tabletop: Dread, Last Thoughts',
1065 'description': 'Tabletop: Dread, Last Thoughts',
1066 'duration': 51690,
1067 },
750b9ff0
YCH
1068 },
1069 # JWPlayer with M3U8
1070 {
1071 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1072 'info_dict': {
1073 'id': 'playlist',
1074 'ext': 'mp4',
1075 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1076 'uploader': 'ren.tv',
1077 },
1078 'params': {
1079 # m3u8 downloads
1080 'skip_download': True,
1081 }
76c73715 1082 }
cfe50f04 1083 ]
9b122384 1084
9b122384
PH
1085 def report_following_redirect(self, new_url):
1086 """Report information extraction."""
79649588 1087 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 1088
4fc946b5
PH
1089 def _extract_rss(self, url, video_id, doc):
1090 playlist_title = doc.find('./channel/title').text
1091 playlist_desc_el = doc.find('./channel/description')
1092 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1093
76c73715
PH
1094 entries = []
1095 for it in doc.findall('./channel/item'):
1096 next_url = xpath_text(it, 'link', fatal=False)
1097 if not next_url:
1098 enclosure_nodes = it.findall('./enclosure')
1099 for e in enclosure_nodes:
1100 next_url = e.attrib.get('url')
1101 if next_url:
1102 break
1103
1104 if not next_url:
1105 continue
1106
1107 entries.append({
1108 '_type': 'url',
1109 'url': next_url,
1110 'title': it.find('title').text,
1111 })
4fc946b5
PH
1112
1113 return {
1114 '_type': 'playlist',
1115 'id': url,
1116 'title': playlist_title,
1117 'description': playlist_desc,
1118 'entries': entries,
1119 }
1120
c8e9a235
PH
1121 def _extract_camtasia(self, url, video_id, webpage):
1122 """ Returns None if no camtasia video can be found. """
1123
1124 camtasia_cfg = self._search_regex(
1125 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1126 webpage, 'camtasia configuration file', default=None)
1127 if camtasia_cfg is None:
1128 return None
1129
1130 title = self._html_search_meta('DC.title', webpage, fatal=True)
1131
1132 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1133 camtasia_cfg = self._download_xml(
1134 camtasia_url, video_id,
1135 note='Downloading camtasia configuration',
1136 errnote='Failed to download camtasia configuration')
1137 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1138
1139 entries = []
1140 for n in fileset_node.getchildren():
1141 url_n = n.find('./uri')
1142 if url_n is None:
1143 continue
1144
1145 entries.append({
1146 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1147 'title': '%s - %s' % (title, n.tag),
1148 'url': compat_urlparse.urljoin(url, url_n.text),
1149 'duration': float_or_none(n.find('./duration').text),
1150 })
1151
1152 return {
1153 '_type': 'playlist',
1154 'entries': entries,
1155 'title': title,
1156 }
1157
9b122384 1158 def _real_extract(self, url):
ebd3c7b3
PH
1159 if url.startswith('//'):
1160 return {
1161 '_type': 'url',
20991253 1162 'url': self.http_scheme() + url,
ebd3c7b3
PH
1163 }
1164
a7130543
JMF
1165 parsed_url = compat_urlparse.urlparse(url)
1166 if not parsed_url.scheme:
04b4d394
PH
1167 default_search = self._downloader.params.get('default_search')
1168 if default_search is None:
1f7ccb90 1169 default_search = 'fixup_error'
04b4d394 1170
1f7ccb90 1171 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
1172 if '/' in url:
1173 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1174 return self.url_result('http://' + url)
1f7ccb90 1175 elif default_search != 'fixup_error':
9c1fc022 1176 if default_search == 'auto_warning':
0e67ab0d
PH
1177 if re.match(r'^(?:url|URL)$', url):
1178 raise ExtractorError(
1179 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
1180 expected=True)
1181 else:
1182 self._downloader.report_warning(
7571c02c 1183 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 1184 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
1185
1186 if default_search in ('error', 'fixup_error'):
7571c02c 1187 raise ExtractorError(
b74e86f4
PH
1188 '%r is not a valid URL. '
1189 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
1190 % (url, url), expected=True)
04b4d394 1191 else:
f2f2c0c2
PH
1192 if ':' not in default_search:
1193 default_search += ':'
04b4d394 1194 return self.url_result(default_search + url)
4d54ef20
PH
1195
1196 url, smuggled_data = unsmuggle_url(url)
1197 force_videoid = None
d6e6a422 1198 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
1199 if smuggled_data and 'force_videoid' in smuggled_data:
1200 force_videoid = smuggled_data['force_videoid']
1201 video_id = force_videoid
1202 else:
1ddb9456 1203 video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
a7130543 1204
79649588 1205 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 1206
ebab4520 1207 head_req = HEADRequest(url)
23be51d8 1208 head_response = self._request_webpage(
ebab4520
PH
1209 head_req, video_id,
1210 note=False, errnote='Could not send HEAD request to %s' % url,
1211 fatal=False)
42393ce2 1212
23be51d8 1213 if head_response is not False:
42393ce2 1214 # Check for redirect
23be51d8 1215 new_url = head_response.geturl()
42393ce2
PH
1216 if url != new_url:
1217 self.report_following_redirect(new_url)
4d54ef20
PH
1218 if force_videoid:
1219 new_url = smuggle_url(
1220 new_url, {'force_videoid': force_videoid})
cecaaf3f 1221 return self.url_result(new_url)
42393ce2 1222
23be51d8
PH
1223 full_response = None
1224 if head_response is False:
5c2266df 1225 request = sanitized_Request(url)
58bde34a
S
1226 request.add_header('Accept-Encoding', '*')
1227 full_response = self._request_webpage(request, video_id)
23be51d8
PH
1228 head_response = full_response
1229
1230 # Check for direct link to a video
1231 content_type = head_response.headers.get('Content-Type', '')
1232 m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1233 if m:
1234 upload_date = unified_strdate(
1235 head_response.headers.get('Last-Modified'))
1236 return {
1237 'id': video_id,
1ddb9456 1238 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
ccdd0ffb 1239 'direct': True,
23be51d8
PH
1240 'formats': [{
1241 'format_id': m.group('format_id'),
1242 'url': url,
1243 'vcodec': 'none' if m.group('type') == 'audio' else None
1244 }],
1245 'upload_date': upload_date,
1246 }
42393ce2 1247
d6e6a422 1248 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
1249 force = self._downloader.params.get('force_generic_extractor', False)
1250 self._downloader.report_warning(
1251 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 1252
4e262a88 1253 if not full_response:
5c2266df 1254 request = sanitized_Request(url)
58bde34a
S
1255 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1256 # making it impossible to download only chunk of the file (yet we need only 512kB to
1257 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1258 # that will always result in downloading the whole file that is not desirable.
1259 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1260 # to accept raw bytes and being able to download only a chunk.
1261 # It may probably better to solve this by checking Content-Type for application/octet-stream
1262 # after HEAD request finishes, but not sure if we can rely on this.
1263 request.add_header('Accept-Encoding', '*')
1264 full_response = self._request_webpage(request, video_id)
4e262a88
PH
1265
1266 # Maybe it's a direct link to a video?
1267 # Be careful not to download the whole thing!
1268 first_bytes = full_response.read(512)
61ca9a80 1269 if not is_html(first_bytes):
4e262a88
PH
1270 self._downloader.report_warning(
1271 'URL could be a direct video link, returning it as such.')
1272 upload_date = unified_strdate(
1273 head_response.headers.get('Last-Modified'))
1274 return {
1275 'id': video_id,
1ddb9456 1276 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
4e262a88
PH
1277 'direct': True,
1278 'url': url,
1279 'upload_date': upload_date,
1280 }
1281
1282 webpage = self._webpage_read_content(
1283 full_response, url, video_id, prefix=first_bytes)
1284
9b122384 1285 self.report_extraction(video_id)
887c6acd 1286
0791ac1b 1287 # Is it an RSS feed, a SMIL file or a XSPF playlist?
4fc946b5 1288 try:
f7854627 1289 doc = compat_etree_fromstring(webpage.encode('utf-8'))
4fc946b5
PH
1290 if doc.tag == 'rss':
1291 return self._extract_rss(url, video_id, doc)
e5e8d20a
S
1292 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1293 return self._parse_smil(doc, url, video_id)
729accb4
S
1294 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1295 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
f7300c5c 1296 except compat_xml_parse_error:
4fc946b5
PH
1297 pass
1298
c8e9a235
PH
1299 # Is it a Camtasia project?
1300 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1301 if camtasia_res is not None:
1302 return camtasia_res
1303
14390730
S
1304 # Sometimes embedded video player is hidden behind percent encoding
1305 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1306 # Unescaping the whole page allows to handle those cases in a generic way
45eedbe5 1307 webpage = compat_urllib_parse_unquote(webpage)
1f7659db 1308
887c6acd
PH
1309 # it's tempting to parse this further, but you would
1310 # have to take into account all the variations like
1311 # Video Title - Site Name
1312 # Site Name | Video Title
1313 # Video Title - Tagline | Site Name
1314 # and so on and so forth; it's just not practical
ef4fd848 1315 video_title = self._html_search_regex(
79649588
PH
1316 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1317 default='video')
ef4fd848 1318
4d805e06
PH
1319 # Try to detect age limit automatically
1320 age_limit = self._rta_search(webpage)
1321 # And then there are the jokers who advertise that they use RTA,
1322 # but actually don't.
1323 AGE_LIMIT_MARKERS = [
1324 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1325 ]
1326 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1327 age_limit = 18
1328
ef4fd848
PH
1329 # video uploader is domain name
1330 video_uploader = self._search_regex(
79649588 1331 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1332
ed2d6a19 1333 # Helper method
83992676 1334 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1335 urlrs = orderedSet(
83992676 1336 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1337 for m in matches)
ed2d6a19
PH
1338 return self.playlist_result(
1339 urlrs, playlist_id=video_id, playlist_title=video_title)
1340
1f4b722b 1341 # Look for Brightcove Legacy Studio embeds
4fcaa4f4 1342 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
99877772 1343 if bc_urls:
79649588 1344 self.to_screen('Brightcove video detected.')
99877772
PH
1345 entries = [{
1346 '_type': 'url',
1347 'url': smuggle_url(bc_url, {'Referer': url}),
3b7d9aa4 1348 'ie_key': 'BrightcoveLegacy'
99877772
PH
1349 } for bc_url in bc_urls]
1350
1351 return {
1352 '_type': 'playlist',
1353 'title': video_title,
1354 'id': video_id,
1355 'entries': entries,
1356 }
cfe50f04 1357
f6519f89
S
1358 # Look for Brightcove New Studio embeds
1359 bc_urls = BrightcoveNewIE._extract_urls(webpage)
1360 if bc_urls:
1361 return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
ed126900 1362
59b8ab58
PH
1363 # Look for embedded rtl.nl player
1364 matches = re.findall(
97b570a9 1365 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1366 webpage)
1367 if matches:
1368 return _playlist_from_matches(matches, ie='RtlNl')
1369
b407e173
YCH
1370 vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1371 if vimeo_url is not None:
1372 return self.url_result(vimeo_url)
7115ca84 1373
a1b85269
YCH
1374 vid_me_embed_url = self._search_regex(
1375 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1376 webpage, 'vid.me embed', default=None)
1377 if vid_me_embed_url is not None:
1378 return self.url_result(vid_me_embed_url, 'Vidme')
1379
53c1d3ef 1380 # Look for embedded YouTube player
1f9da904 1381 matches = re.findall(r'''(?x)
2b88feed
PH
1382 (?:
1383 <iframe[^>]+?src=|
c71dfccc 1384 data-video-url=|
2b88feed 1385 <embed[^>]+?src=|
a7e97f6d
PH
1386 embedSWF\(?:\s*|
1387 new\s+SWFObject\(
2b88feed
PH
1388 )
1389 (["\'])
1bf5423e 1390 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1391 (?:embed|v|p)/.+?)
1f9da904 1392 \1''', webpage)
887c6acd 1393 if matches:
ed2d6a19 1394 return _playlist_from_matches(
3b2f933b 1395 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1396
65f3a228
PH
1397 # Look for lazyYT YouTube embed
1398 matches = re.findall(
1399 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1400 if matches:
1401 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1402
355e4fd0
PH
1403 # Look for embedded Dailymotion player
1404 matches = re.findall(
1fa2b984 1405 r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
355e4fd0 1406 if matches:
ed2d6a19
PH
1407 return _playlist_from_matches(
1408 matches, lambda m: unescapeHTML(m[1]))
355e4fd0 1409
8489578d
NJ
1410 # Look for embedded Dailymotion playlist player (#3822)
1411 m = re.search(
1412 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1413 if m:
1414 playlists = re.findall(
1415 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1416 if playlists:
1417 return _playlist_from_matches(
1418 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1419
ef4fd848
PH
1420 # Look for embedded Wistia player
1421 match = re.search(
281d3f1d 1422 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1423 if match:
9471c444
NJ
1424 embed_url = self._proto_relative_url(
1425 unescapeHTML(match.group('url')))
ef4fd848
PH
1426 return {
1427 '_type': 'url_transparent',
9471c444 1428 'url': embed_url,
ef4fd848
PH
1429 'ie_key': 'Wistia',
1430 'uploader': video_uploader,
1431 'title': video_title,
1432 'id': video_id,
1433 }
5f6a1245 1434
9471c444 1435 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1436 if match:
1437 return {
1438 '_type': 'url_transparent',
1439 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1440 'ie_key': 'Wistia',
1441 'uploader': video_uploader,
1442 'title': video_title,
1443 'id': match.group('id')
1444 }
ef4fd848 1445
bab19a8e
S
1446 # Look for SVT player
1447 svt_url = SVTIE._extract_url(webpage)
1448 if svt_url:
1449 return self.url_result(svt_url, 'SVT')
1450
fa35cdad
PH
1451 # Look for embedded condenast player
1452 matches = re.findall(
1453 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1454 webpage)
1455 if matches:
1456 return {
1457 '_type': 'playlist',
1458 'entries': [{
1459 '_type': 'url',
1460 'ie_key': 'CondeNast',
1461 'url': ma,
1462 } for ma in matches],
1463 'title': video_title,
1464 'id': video_id,
1465 }
1466
c19f7764
JMF
1467 # Look for Bandcamp pages with custom domain
1468 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1469 if mobj is not None:
1470 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1471 # Don't set the extractor because it can be a track url or an album
1472 return self.url_result(burl)
c19f7764 1473
f25571ff
PH
1474 # Look for embedded Vevo player
1475 mobj = re.search(
1476 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1477 if mobj is not None:
1478 return self.url_result(mobj.group('url'))
796df3c6
S
1479
1480 # Look for embedded Viddler player
cb454b33
S
1481 mobj = re.search(
1482 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1483 webpage)
796df3c6
S
1484 if mobj is not None:
1485 return self.url_result(mobj.group('url'))
f25571ff 1486
3378d67a
S
1487 # Look for NYTimes player
1488 mobj = re.search(
1489 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1490 webpage)
1491 if mobj is not None:
1492 return self.url_result(mobj.group('url'))
1493
cefdf970
S
1494 # Look for Libsyn player
1495 mobj = re.search(
1496 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1497 if mobj is not None:
1498 return self.url_result(mobj.group('url'))
1499
c0d0b01f 1500 # Look for Ooyala videos
8a37aa15 1501 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1502 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1503 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1504 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1505 if mobj is not None:
cce9d15d 1506 return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
c0d0b01f 1507
f076b638 1508 # Look for multiple Ooyala embeds on SBN network websites
1509 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1510 if mobj is not None:
1511 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1512 if embeds:
1513 return _playlist_from_matches(
cce9d15d 1514 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
f076b638 1515
aa94a6d3 1516 # Look for Aparat videos
48099643 1517 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
1518 if mobj is not None:
1519 return self.url_result(mobj.group(1), 'Aparat')
1520
c93c2ab1 1521 # Look for MPORA videos
c3f51436 1522 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
1523 if mobj is not None:
1524 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 1525
15c0e8e7 1526 # Look for embedded NovaMov-based player
8f89e687 1527 mobj = re.search(
8dfa187b 1528 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
1529 (?P<url>http://(?:(?:embed|www)\.)?
1530 (?:novamov\.com|
1531 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1532 videoweed\.(?:es|com)|
1533 movshare\.(?:net|sx|ag)|
1534 divxstage\.(?:eu|net|ch|co|at|ag))
1535 /embed\.php.+?)\1''', webpage)
8f89e687 1536 if mobj is not None:
15c0e8e7 1537 return self.url_result(mobj.group('url'))
50f56607 1538
9834872b
PH
1539 # Look for embedded Facebook player
1540 mobj = re.search(
db1f3888 1541 r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
9834872b
PH
1542 if mobj is not None:
1543 return self.url_result(mobj.group('url'), 'Facebook')
1544
ca97a56e
S
1545 # Look for embedded VK player
1546 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1547 if mobj is not None:
1548 return self.url_result(mobj.group('url'), 'VK')
1549
0364fa8b
S
1550 # Look for embedded ivi player
1551 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1552 if mobj is not None:
1553 return self.url_result(mobj.group('url'), 'Ivi')
1554
db1f3888
PH
1555 # Look for embedded Huffington Post player
1556 mobj = re.search(
c3f51436 1557 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
1558 if mobj is not None:
1559 return self.url_result(mobj.group('url'), 'HuffPost')
1560
1b86cc41 1561 # Look for embed.ly
1562 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1563 if mobj is not None:
1564 return self.url_result(mobj.group('url'))
1565 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1566 if mobj is not None:
f7e6f7fa 1567 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 1568
60cc4dc4
PH
1569 # Look for funnyordie embed
1570 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1571 if matches:
ed2d6a19
PH
1572 return _playlist_from_matches(
1573 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 1574
db546cf8
S
1575 # Look for BBC iPlayer embed
1576 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1577 if matches:
476eae0c 1578 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 1579
93d020dd
S
1580 # Look for embedded RUTV player
1581 rutv_url = RUTVIE._extract_url(webpage)
1582 if rutv_url:
1583 return self.url_result(rutv_url, 'RUTV')
1584
494f20cb 1585 # Look for embedded TVC player
b8599718
S
1586 tvc_url = TVCIE._extract_url(webpage)
1587 if tvc_url:
1588 return self.url_result(tvc_url, 'TVC')
494f20cb 1589
d40a3b5b
S
1590 # Look for embedded SportBox player
1591 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1592 if sportbox_urls:
1593 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1594
78e2b74b 1595 # Look for embedded PornHub player
65d161c4
S
1596 pornhub_url = PornHubIE._extract_url(webpage)
1597 if pornhub_url:
1598 return self.url_result(pornhub_url, 'PornHub')
1599
2bb5b6d0
S
1600 # Look for embedded XHamster player
1601 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1602 if xhamster_urls:
1603 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1604
9872d311
S
1605 # Look for embedded Tvigle player
1606 mobj = re.search(
1607 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1608 if mobj is not None:
1609 return self.url_result(mobj.group('url'), 'Tvigle')
1610
7e2ede98
JMF
1611 # Look for embedded TED player
1612 mobj = re.search(
d7cc31b6 1613 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
1614 if mobj is not None:
1615 return self.url_result(mobj.group('url'), 'TED')
1616
5c386252 1617 # Look for embedded Ustream videos
1618 mobj = re.search(
1619 r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1620 if mobj is not None:
1621 return self.url_result(mobj.group('url'), 'Ustream')
1622
893f8832
PH
1623 # Look for embedded arte.tv player
1624 mobj = re.search(
1625 r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1626 webpage)
1627 if mobj is not None:
1628 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1629
cbd55ade
S
1630 # Look for embedded francetv player
1631 mobj = re.search(
1632 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1633 webpage)
1634 if mobj is not None:
1635 return self.url_result(mobj.group('url'))
1636
cb3ac1c6
S
1637 # Look for embedded smotri.com player
1638 smotri_url = SmotriIE._extract_url(webpage)
1639 if smotri_url:
1640 return self.url_result(smotri_url, 'Smotri')
1641
e6c2d9ad 1642 # Look for embedded Myvi.ru player
6dd94d3a 1643 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
1644 if myvi_url:
1645 return self.url_result(myvi_url)
1646
dfb1b146 1647 # Look for embedded soundcloud player
20991253 1648 mobj = re.search(
ac645ac7 1649 r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
20991253
PH
1650 webpage)
1651 if mobj is not None:
1652 url = unescapeHTML(mobj.group('url'))
1653 return self.url_result(url)
1654
826ec77f
PH
1655 # Look for embedded vulture.com player
1656 mobj = re.search(
1657 r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1658 webpage)
1659 if mobj is not None:
1660 url = unescapeHTML(mobj.group('url'))
1661 return self.url_result(url, ie='Vulture')
1662
c5cd249e 1663 # Look for embedded mtvservices player
46fde8a1
S
1664 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1665 if mtvservices_url:
1666 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 1667
49807b4a
S
1668 # Look for embedded yahoo player
1669 mobj = re.search(
1670 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1671 webpage)
1672 if mobj is not None:
1673 return self.url_result(mobj.group('url'), 'Yahoo')
1674
2ef6fcb5
PH
1675 # Look for embedded sbs.com.au player
1676 mobj = re.search(
e98b8e79
PH
1677 r'''(?x)
1678 (?:
1679 <meta\s+property="og:video"\s+content=|
1680 <iframe[^>]+?src=
1681 )
1682 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
1683 webpage)
1684 if mobj is not None:
1685 return self.url_result(mobj.group('url'), 'SBS')
1686
42bdd9d0
PH
1687 # Look for embedded Cinchcast player
1688 mobj = re.search(
1689 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1690 webpage)
1691 if mobj is not None:
1692 return self.url_result(mobj.group('url'), 'Cinchcast')
1693
1a94ff68 1694 mobj = re.search(
5263cdfc 1695 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 1696 webpage)
8001607e
YCH
1697 if not mobj:
1698 mobj = re.search(
1699 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1700 webpage)
1a94ff68
S
1701 if mobj is not None:
1702 return self.url_result(mobj.group('url'), 'MLB')
1703
1419fafd 1704 mobj = re.search(
dd467d33 1705 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
1706 webpage)
1707 if mobj is not None:
1708 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1709
af63fed7
PH
1710 mobj = re.search(
1711 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1712 webpage)
1713 if mobj is not None:
1714 return self.url_result(mobj.group('url'), 'Livestream')
1715
255fca5e
S
1716 # Look for Zapiks embed
1717 mobj = re.search(
1718 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1719 if mobj is not None:
1720 return self.url_result(mobj.group('url'), 'Zapiks')
1721
e3216b82 1722 # Look for Kaltura embeds
6a5d6de1 1723 mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1c31a5b0 1724 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
e3216b82 1725 if mobj is not None:
5b5fae5f
S
1726 return self.url_result(smuggle_url(
1727 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1728 {'source_url': url}), 'Kaltura')
e3216b82 1729
135c9c42
S
1730 # Look for Eagle.Platform embeds
1731 mobj = re.search(
1732 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1733 if mobj is not None:
1734 return self.url_result(mobj.group('url'), 'EaglePlatform')
1735
d47ae7f6
S
1736 # Look for ClipYou (uses Eagle.Platform) embeds
1737 mobj = re.search(
1738 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1739 if mobj is not None:
1740 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1741
f8388757 1742 # Look for Pladform embeds
45dad7ba
S
1743 pladform_url = PladformIE._extract_url(webpage)
1744 if pladform_url:
1745 return self.url_result(pladform_url)
f8388757 1746
ff18735c
S
1747 # Look for Videomore embeds
1748 videomore_url = VideomoreIE._extract_url(webpage)
1749 if videomore_url:
1750 return self.url_result(videomore_url)
1751
2dcc114f
S
1752 # Look for Playwire embeds
1753 mobj = re.search(
1754 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1755 if mobj is not None:
1756 return self.url_result(mobj.group('url'))
1757
ad320e9b
NJ
1758 # Look for 5min embeds
1759 mobj = re.search(
1760 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1761 if mobj is not None:
1762 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1763
18153f1b
S
1764 # Look for Crooks and Liars embeds
1765 mobj = re.search(
1766 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1767 if mobj is not None:
1768 return self.url_result(mobj.group('url'))
1769
a2edf2e7
YCH
1770 # Look for NBC Sports VPlayer embeds
1771 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1772 if nbc_sports_url:
1773 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1774
653789af 1775 # Look for Google Drive embeds
5b251628 1776 google_drive_url = GoogleDriveIE._extract_url(webpage)
653789af 1777 if google_drive_url:
1778 return self.url_result(google_drive_url, 'GoogleDrive')
1779
418c5cc3
YCH
1780 # Look for UDN embeds
1781 mobj = re.search(
c39fd7b1 1782 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
418c5cc3
YCH
1783 if mobj is not None:
1784 return self.url_result(
0a160363 1785 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 1786
2fe1b5bd
YCH
1787 # Look for Senate ISVP iframe
1788 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1789 if senate_isvp_url:
25c3a734 1790 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 1791
756f574e
YCH
1792 # Look for Dailymotion Cloud videos
1793 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1794 if dmcloud_url:
1795 return self.url_result(dmcloud_url, 'DailymotionCloud')
1796
1ac1c4c2
S
1797 # Look for OnionStudios embeds
1798 onionstudios_url = OnionStudiosIE._extract_url(webpage)
1799 if onionstudios_url:
1800 return self.url_result(onionstudios_url)
1801
eedd20ef
S
1802 # Look for SnagFilms embeds
1803 snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1804 if snagfilms_url:
1805 return self.url_result(snagfilms_url)
1806
7cb09524 1807 # Look for JWPlatform embeds
1808 jwplatform_url = JWPlatformIE._extract_url(webpage)
1809 if jwplatform_url:
1810 return self.url_result(jwplatform_url, 'JWPlatform')
1811
8ca31a0e 1812 # Look for ScreenwaveMedia embeds
efd712c6 1813 mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
8ca31a0e 1814 if mobj is not None:
efd712c6 1815 return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
8ca31a0e 1816
aecfcd4e
S
1817 # Look for Digiteka embeds
1818 digiteka_url = DigitekaIE._extract_url(webpage)
1819 if digiteka_url:
1820 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
6aeba407 1821
a5158f38
YCH
1822 # Look for AdobeTVVideo embeds
1823 mobj = re.search(
1824 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1825 webpage)
1826 if mobj is not None:
1827 return self.url_result(
1828 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1829 'AdobeTVVideo')
1830
ced659bb 1831 def check_video(vurl):
a0f71985
PH
1832 if YoutubeIE.suitable(vurl):
1833 return True
ced659bb
S
1834 vpath = compat_urlparse.urlparse(vurl).path
1835 vext = determine_ext(vpath)
1836 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1837
1838 def filter_video(urls):
1839 return list(filter(check_video, urls))
1840
9b122384 1841 # Start with something easy: JW Player in SWFObject
ced659bb 1842 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 1843 if not found:
d981cef6 1844 # Look for gorilla-vid style embedding
ced659bb 1845 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
1846 (?:
1847 jw_plugins|
1848 JWPlayerOptions|
1849 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1850 )
a0f71985
PH
1851 .*?
1852 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 1853 if not found:
9b122384 1854 # Broaden the search a little bit
ced659bb 1855 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
1856 if not found:
1857 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 1858 found = filter_video(re.findall(
54a9328b 1859 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
1860 if not found:
1861 # Flow player
ced659bb 1862 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
1863 flowplayer\("[^"]+",\s*
1864 \{[^}]+?\}\s*,
52585fd6 1865 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 1866 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 1867 ''', webpage))
501f13fb
PH
1868 if not found:
1869 # Cinerama player
1870 found = re.findall(
1871 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 1872 if not found:
9b122384 1873 # Try to find twitter cards info
ced659bb
S
1874 found = filter_video(re.findall(
1875 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 1876 if not found:
9b122384
PH
1877 # We look for Open Graph info:
1878 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 1879 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
1880 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1881 if m_video_type is not None:
ced659bb 1882 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 1883 if not found:
7fea7156 1884 # HTML5 video
12439dd5 1885 found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
b30b8698 1886 if not found:
ed9a25dd 1887 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 1888 found = re.search(
89ef304b 1889 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 1890 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 1891 webpage)
84f81016
S
1892 if not found:
1893 # Look also in Refresh HTTP header
1894 refresh_header = head_response.headers.get('Refresh')
1895 if refresh_header:
6c91a5a7
S
1896 # In python 2 response HTTP headers are bytestrings
1897 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1898 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 1899 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 1900 if found:
b37317d8 1901 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
89ef304b
PH
1902 self.report_following_redirect(new_url)
1903 return {
1904 '_type': 'url',
1905 'url': new_url,
1906 }
b30b8698 1907 if not found:
416c7fcb 1908 raise UnsupportedError(url)
9b122384 1909
b30b8698
PH
1910 entries = []
1911 for video_url in found:
6cc37c69 1912 video_url = video_url.replace('\\/', '/')
b30b8698 1913 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 1914 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 1915
b30b8698
PH
1916 # Sometimes, jwplayer extraction will result in a YouTube URL
1917 if YoutubeIE.suitable(video_url):
1918 entries.append(self.url_result(video_url, 'Youtube'))
1919 continue
9b122384 1920
b30b8698
PH
1921 # here's a fun little line of code for you:
1922 video_id = os.path.splitext(video_id)[0]
fc9713a1 1923
28602e74
YCH
1924 entry_info_dict = {
1925 'id': video_id,
1926 'uploader': video_uploader,
1927 'title': video_title,
1928 'age_limit': age_limit,
1929 }
1930
729accb4
S
1931 ext = determine_ext(video_url)
1932 if ext == 'smil':
28602e74 1933 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
729accb4
S
1934 elif ext == 'xspf':
1935 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
750b9ff0
YCH
1936 elif ext == 'm3u8':
1937 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
d6fd958c 1938 else:
28602e74
YCH
1939 entry_info_dict['url'] = video_url
1940
1941 entries.append(entry_info_dict)
b30b8698
PH
1942
1943 if len(entries) == 1:
669f0e7c 1944 return entries[0]
b30b8698
PH
1945 else:
1946 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
1947 # 'url' results don't have a title
1948 if e.get('title') is not None:
1949 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
1950 return {
1951 '_type': 'playlist',
1952 'entries': entries,
1953 }