]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[mdr] PEP 8
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
cfe50f04
JMF
1# encoding: utf-8
2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
1ddb9456
S
12 compat_urllib_parse_unquote,
13 compat_urllib_request,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
bcf89ce6 24 parse_xml,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
42393ce2 30 url_basename,
76c73715 31 xpath_text,
9b122384 32)
cfe50f04 33from .brightcove import BrightcoveIE
a2edf2e7 34from .nbc import NBCSportsVPlayerIE
c0d0b01f 35from .ooyala import OoyalaIE
93d020dd 36from .rutv import RUTVIE
954c1d05 37from .tvc import TVCIE
d40a3b5b 38from .sportbox import SportBoxEmbedIE
cb3ac1c6 39from .smotri import SmotriIE
6dd94d3a 40from .myvi import MyviIE
1419fafd 41from .condenast import CondeNastIE
418c5cc3 42from .udn import UDNEmbedIE
2fe1b5bd 43from .senateisvp import SenateISVPIE
0954cd8a 44from .bliptv import BlipTVIE
bab19a8e 45from .svt import SVTIE
65d161c4 46from .pornhub import PornHubIE
2bb5b6d0 47from .xhamster import XHamsterEmbedIE
b407e173 48from .vimeo import VimeoIE
756f574e 49from .dailymotion import DailymotionCloudIE
1ac1c4c2 50from .onionstudios import OnionStudiosIE
eedd20ef 51from .snagfilms import SnagFilmsEmbedIE
efd712c6 52from .screenwavemedia import ScreenwaveMediaIE
46fde8a1 53from .mtv import MTVServicesEmbeddedIE
9b122384 54
0838239e 55
9b122384 56class GenericIE(InfoExtractor):
79649588 57 IE_DESC = 'Generic downloader that works on some sites'
9b122384 58 _VALID_URL = r'.*'
79649588 59 IE_NAME = 'generic'
cfe50f04 60 _TESTS = [
c5fa81fe
S
61 # Direct link to a video
62 {
63 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
64 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
65 'info_dict': {
66 'id': 'trailer',
67 'ext': 'mp4',
68 'title': 'trailer',
69 'upload_date': '20100513',
70 }
71 },
c5138a7c 72 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
73 {
74 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
75 'md5': '128c42e68b13950268b648275386fc74',
76 'info_dict': {
77 'id': 'FictionJunction-Parallel_Hearts',
78 'ext': 'flac',
79 'title': 'FictionJunction-Parallel_Hearts',
80 'upload_date': '20140522',
81 },
82 'expected_warnings': [
83 'URL could be a direct video link, returning it as such.'
84 ]
85 },
86 # Direct download with broken HEAD
87 {
88 'url': 'http://ai-radio.org:8000/radio.opus',
89 'info_dict': {
90 'id': 'radio',
91 'ext': 'opus',
92 'title': 'radio',
93 },
94 'params': {
95 'skip_download': True, # infinite live stream
96 },
97 'expected_warnings': [
98 r'501.*Not Implemented'
99 ],
100 },
101 # Direct link with incorrect MIME type
102 {
103 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
104 'md5': '4ccbebe5f36706d85221f204d7eb5913',
105 'info_dict': {
106 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
107 'id': '5_Lennart_Poettering_-_Systemd',
108 'ext': 'webm',
109 'title': '5_Lennart_Poettering_-_Systemd',
110 'upload_date': '20141120',
111 },
112 'expected_warnings': [
113 'URL could be a direct video link, returning it as such.'
114 ]
115 },
116 # RSS feed
117 {
118 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
119 'info_dict': {
120 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
121 'title': 'Zero Punctuation',
122 'description': 're:.*groundbreaking video review series.*'
123 },
124 'playlist_mincount': 11,
125 },
126 # RSS feed with enclosure
127 {
128 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
129 'info_dict': {
130 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
131 'ext': 'm4v',
132 'upload_date': '20150228',
133 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
134 }
135 },
8765222d
S
136 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
137 {
138 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
139 'info_dict': {
140 'id': 'smil',
141 'ext': 'mp4',
142 'title': 'Automatics, robotics and biocybernetics',
143 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
144 'formats': 'mincount:16',
145 'subtitles': 'mincount:1',
146 },
147 'params': {
148 'force_generic_extractor': True,
149 'skip_download': True,
150 },
151 },
152 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
153 {
154 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
155 'info_dict': {
156 'id': 'hds',
157 'ext': 'flv',
158 'title': 'hds',
159 'formats': 'mincount:1',
160 },
161 'params': {
162 'skip_download': True,
163 },
164 },
165 # SMIL from https://www.restudy.dk/video/play/id/1637
166 {
167 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
168 'info_dict': {
169 'id': 'video_1637',
170 'ext': 'flv',
171 'title': 'video_1637',
172 'formats': 'mincount:3',
173 },
174 'params': {
175 'skip_download': True,
176 },
177 },
178 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
179 {
180 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
181 'info_dict': {
182 'id': 'smil-service',
183 'ext': 'flv',
184 'title': 'smil-service',
185 'formats': 'mincount:1',
186 },
187 'params': {
188 'skip_download': True,
189 },
190 },
191 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
192 {
193 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
194 'info_dict': {
195 'id': '4719370',
196 'ext': 'mp4',
197 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
198 'formats': 'mincount:3',
199 },
200 'params': {
201 'skip_download': True,
202 },
203 },
1de5cd3b
S
204 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
205 {
206 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
207 'info_dict': {
208 'id': 'mZlp2ctYIUEB',
209 'ext': 'mp4',
210 'title': 'Tikibad ontruimd wegens brand',
211 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
212 'thumbnail': 're:^https?://.*\.jpg$',
213 'duration': 33,
214 },
215 'params': {
216 'skip_download': True,
217 },
218 },
c5fa81fe
S
219 # google redirect
220 {
221 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
222 'info_dict': {
223 'id': 'cmQHVoWB5FY',
224 'ext': 'mp4',
225 'upload_date': '20130224',
226 'uploader_id': 'TheVerge',
227 'description': 're:^Chris Ziegler takes a look at the\.*',
228 'uploader': 'The Verge',
229 'title': 'First Firefox OS phones side-by-side',
230 },
231 'params': {
232 'skip_download': False,
233 }
234 },
6c91a5a7
S
235 {
236 # redirect in Refresh HTTP header
237 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
238 'info_dict': {
239 'id': 'pO8h3EaFRdo',
240 'ext': 'mp4',
241 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
242 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
243 'upload_date': '20150917',
244 'uploader_id': 'brtvofficial',
245 'uploader': 'Boiler Room',
246 },
247 'params': {
248 'skip_download': False,
249 },
250 },
cfe50f04 251 {
79649588 252 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 253 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 254 'info_dict': {
d360a146
S
255 'id': '13601338388002',
256 'ext': 'mp4',
79649588
PH
257 'uploader': 'www.hodiho.fr',
258 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
259 }
260 },
c19f7764
JMF
261 # bandcamp page with custom domain
262 {
79649588
PH
263 'add_ie': ['Bandcamp'],
264 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 265 'info_dict': {
fd50bf62
S
266 'id': '3235767654',
267 'ext': 'mp3',
79649588
PH
268 'title': 'The Pony Mash',
269 'uploader': 'M_Pallante',
c19f7764 270 },
79649588 271 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 272 },
eeb165e6 273 # embedded brightcove video
dd5bcdc4
JMF
274 # it also tests brightcove videos that need to set the 'Referer' in the
275 # http requests
eeb165e6 276 {
79649588
PH
277 'add_ie': ['Brightcove'],
278 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
279 'info_dict': {
280 'id': '2765128793001',
281 'ext': 'mp4',
282 'title': 'Le cours de bourse : l’analyse technique',
283 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
284 'uploader': 'BFM BUSINESS',
eeb165e6 285 },
79649588
PH
286 'params': {
287 'skip_download': True,
eeb165e6
JMF
288 },
289 },
17ab4d3b
PH
290 {
291 # https://github.com/rg3/youtube-dl/issues/2253
292 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
293 'md5': '0ba9446db037002366bab3b3eb30c88c',
294 'info_dict': {
fd50bf62
S
295 'id': '3101154703001',
296 'ext': 'mp4',
17ab4d3b
PH
297 'title': 'Still no power',
298 'uploader': 'thestar.com',
299 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
300 },
301 'add_ie': ['Brightcove'],
302 },
0479c625
S
303 {
304 'url': 'http://www.championat.com/video/football/v/87/87499.html',
305 'md5': 'fb973ecf6e4a78a67453647444222983',
306 'info_dict': {
307 'id': '3414141473001',
308 'ext': 'mp4',
309 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
310 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
311 'uploader': 'Championat',
312 },
313 },
bdf97017 314 {
37aab278 315 # https://github.com/rg3/youtube-dl/issues/3541
bdf97017
NJ
316 'add_ie': ['Brightcove'],
317 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
318 'info_dict': {
319 'id': '3866516442001',
37aab278 320 'ext': 'mp4',
bdf97017
NJ
321 'title': 'Leer mij vrouwen kennen: Aflevering 1',
322 'description': 'Leer mij vrouwen kennen: Aflevering 1',
323 'uploader': 'SBS Broadcasting',
324 },
37aab278 325 'skip': 'Restricted to Netherlands',
bdf97017 326 'params': {
37aab278 327 'skip_download': True, # m3u8 download
bdf97017
NJ
328 },
329 },
c0d0b01f
JMF
330 # ooyala video
331 {
79649588 332 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 333 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
334 'info_dict': {
335 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
336 'ext': 'mp4',
3486df38 337 'title': '2cc213299525360.mov', # that's what we get
c0d0b01f 338 },
87830900 339 'add_ie': ['Ooyala'],
c0d0b01f 340 },
bf94d763
S
341 {
342 # ooyala video embedded with http://player.ooyala.com/iframe.js
343 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
344 'info_dict': {
345 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
346 'ext': 'mp4',
347 'title': '"Steve Jobs: Man in the Machine" trailer',
348 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
349 },
350 'params': {
351 'skip_download': True,
352 },
353 },
f076b638 354 # multiple ooyala embeds on SBN network websites
355 {
356 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
357 'info_dict': {
358 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
359 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
360 },
361 'playlist_mincount': 3,
362 'params': {
363 'skip_download': True,
364 },
365 'add_ie': ['Ooyala'],
366 },
1b86cc41 367 # embed.ly video
368 {
369 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
370 'info_dict': {
371 'id': '9ODmcdjQcHQ',
372 'ext': 'mp4',
0a5bce56
PH
373 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
374 'upload_date': '20140225',
375 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
376 'uploader': 'Tested',
377 'uploader_id': 'testedcom',
1b86cc41 378 },
379 # No need to test YoutubeIE here
380 'params': {
381 'skip_download': True,
382 },
383 },
60cc4dc4
PH
384 # funnyordie embed
385 {
386 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
387 'info_dict': {
388 'id': '18e820ec3f',
389 'ext': 'mp4',
390 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
391 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 392 },
60cc4dc4 393 },
93d020dd
S
394 # RUTV embed
395 {
396 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
397 'info_dict': {
398 'id': '776940',
399 'ext': 'mp4',
400 'title': 'Охотское море стало целиком российским',
401 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
402 },
403 'params': {
404 # m3u8 download
405 'skip_download': True,
406 },
aab74fa1 407 },
f37bdbe5
S
408 # TVC embed
409 {
410 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
411 'info_dict': {
412 'id': '55304',
413 'ext': 'mp4',
414 'title': 'Дошкольное воспитание',
415 },
416 },
b827a601
S
417 # SportBox embed
418 {
419 'url': 'http://www.vestifinance.ru/articles/25753',
420 'info_dict': {
421 'id': '25753',
422 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
423 },
424 'playlist': [{
425 'info_dict': {
426 'id': '370908',
427 'title': 'Госзаказ. День 3',
428 'ext': 'mp4',
429 }
430 }, {
431 'info_dict': {
432 'id': '370905',
433 'title': 'Госзаказ. День 2',
434 'ext': 'mp4',
435 }
436 }, {
437 'info_dict': {
438 'id': '370902',
439 'title': 'Госзаказ. День 1',
440 'ext': 'mp4',
441 }
442 }],
443 'params': {
444 # m3u8 download
445 'skip_download': True,
446 },
447 },
bf20b9c5
S
448 # Myvi.ru embed
449 {
450 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
451 'info_dict': {
452 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
453 'ext': 'mp4',
454 'title': 'Ужастики, русский трейлер (2015)',
455 'thumbnail': 're:^https?://.*\.jpg$',
456 'duration': 153,
457 }
458 },
c76799c5
S
459 # XHamster embed
460 {
461 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
462 'info_dict': {
463 'id': 'showthread',
464 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
465 },
466 'playlist_mincount': 7,
467 },
aab74fa1
PH
468 # Embedded TED video
469 {
470 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 471 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 472 'info_dict': {
a8eb5a8e 473 'id': '1969',
aab74fa1 474 'ext': 'mp4',
a8eb5a8e
PH
475 'title': 'Hidden miracles of the natural world',
476 'uploader': 'Louie Schwartzberg',
477 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 478 }
60cc4dc4 479 },
5c386252 480 # Embeded Ustream video
481 {
482 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
483 'md5': '27b99cdb639c9b12a79bca876a073417',
484 'info_dict': {
ca6aada4 485 'id': '45734260',
486 'ext': 'flv',
487 'uploader': 'AU SPA: The NSA and Privacy',
5c386252 488 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
489 }
490 },
d95e35d6
S
491 # nowvideo embed hidden behind percent encoding
492 {
493 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
494 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
495 'info_dict': {
496 'id': '06e53103ca9aa',
497 'ext': 'flv',
498 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
499 'description': 'No description',
500 },
0f2a2ba1 501 },
893f8832
PH
502 # arte embed
503 {
504 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
505 'md5': '7653032cbb25bf6c80d80f217055fa43',
506 'info_dict': {
507 'id': '048195-004_PLUS7-F',
508 'ext': 'flv',
509 'title': 'X:enius',
510 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
511 'upload_date': '20140320',
512 },
513 'params': {
514 'skip_download': 'Requires rtmpdump'
515 }
516 },
cbd55ade
S
517 # francetv embed
518 {
519 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
520 'info_dict': {
521 'id': 'EV_30231',
522 'ext': 'mp4',
523 'title': 'Alcaline, le concert avec Calogero',
524 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
525 'upload_date': '20150226',
526 'timestamp': 1424989860,
527 'duration': 5400,
528 },
529 'params': {
530 # m3u8 downloads
531 'skip_download': True,
532 },
533 'expected_warnings': [
534 'Forbidden'
535 ]
536 },
fa35cdad
PH
537 # Condé Nast embed
538 {
539 'url': 'http://www.wired.com/2014/04/honda-asimo/',
540 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
541 'info_dict': {
542 'id': '53501be369702d3275860000',
543 'ext': 'mp4',
544 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
545 }
ebd3c7b3
PH
546 },
547 # Dailymotion embed
548 {
549 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
550 'md5': '441aeeb82eb72c422c7f14ec533999cd',
551 'info_dict': {
552 'id': 'k2mm4bCdJ6CQ2i7c8o2',
553 'ext': 'mp4',
554 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
555 'uploader': 'Spi0n',
556 },
557 'add_ie': ['Dailymotion'],
2b88feed
PH
558 },
559 # YouTube embed
560 {
561 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
562 'info_dict': {
563 'id': 'FXRb4ykk4S0',
564 'ext': 'mp4',
565 'title': 'The NBL Auction 2014',
566 'uploader': 'BADMINTON England',
567 'uploader_id': 'BADMINTONEvents',
568 'upload_date': '20140603',
569 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
570 },
571 'add_ie': ['Youtube'],
572 'params': {
573 'skip_download': True,
574 }
575 },
c5cd249e
JMF
576 # MTVSercices embed
577 {
578 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
579 'md5': '35727f82f58c76d996fc188f9755b0d5',
580 'info_dict': {
581 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
582 'ext': 'mp4',
583 'title': 'Review',
584 'description': 'Mario\'s life in the fast lane has never looked so good.',
585 },
586 },
61013473 587 # YouTube embed via <data-embed-url="">
588 {
589 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 590 'info_dict': {
a8eb5a8e 591 'id': '4vAffPZIT44',
61013473 592 'ext': 'mp4',
a8eb5a8e 593 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
594 'uploader': 'Gameloft',
595 'uploader_id': 'gameloft',
a8eb5a8e
PH
596 'upload_date': '20140828',
597 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
598 },
599 'params': {
600 'skip_download': True,
61013473 601 }
c8e9a235
PH
602 },
603 # Camtasia studio
604 {
605 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
606 'playlist': [{
607 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
608 'info_dict': {
609 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
610 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
611 'ext': 'flv',
612 'duration': 2235.90,
613 }
614 }, {
615 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
616 'info_dict': {
617 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
618 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
619 'ext': 'flv',
620 'duration': 2235.93,
621 }
622 }],
623 'info_dict': {
624 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
625 }
4d805e06
PH
626 },
627 # Flowplayer
628 {
629 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
630 'md5': '9d65602bf31c6e20014319c7d07fba27',
631 'info_dict': {
632 'id': '5123ea6d5e5a7',
633 'ext': 'mp4',
634 'age_limit': 18,
635 'uploader': 'www.handjobhub.com',
d6d9186f 636 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 637 }
0990305d 638 },
22a6f150
PH
639 # Multiple brightcove videos
640 # https://github.com/rg3/youtube-dl/issues/2283
641 {
642 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
643 'info_dict': {
644 'id': 'always-never',
645 'title': 'Always / Never - The New Yorker',
646 },
647 'playlist_count': 3,
648 'params': {
649 'extract_flat': False,
650 'skip_download': True,
651 }
1a94ff68
S
652 },
653 # MLB embed
654 {
655 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
656 'md5': '96f09a37e44da40dd083e12d9a683327',
657 'info_dict': {
658 'id': '33322633',
659 'ext': 'mp4',
660 'title': 'Ump changes call to ball',
661 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
662 'duration': 48,
663 'timestamp': 1401537900,
664 'upload_date': '20140531',
665 'thumbnail': 're:^https?://.*\.jpg$',
666 },
667 },
746c67d7
NJ
668 # Wistia embed
669 {
670 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
671 'md5': '8788b683c777a5cf25621eaf286d0c23',
672 'info_dict': {
673 'id': '1cfaf6b7ea',
674 'ext': 'mov',
675 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
676 'duration': 643.0,
677 'filesize': 182808282,
678 'uploader': 'education-portal.com',
679 },
680 },
52cffcb1 681 {
682 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
683 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
684 'info_dict': {
685 'id': 'uxjb0lwrcz',
686 'ext': 'mp4',
85d7b765 687 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
52cffcb1 688 'duration': 1715.0,
85d7b765 689 'uploader': 'thoughtworks.wistia.com',
70b7e3fb 690 },
52cffcb1 691 },
ac645ac7
PH
692 # Soundcloud embed
693 {
694 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
695 'info_dict': {
696 'id': '174391317',
697 'ext': 'mp3',
698 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
699 'uploader': 'Sophos Security',
700 'title': 'Chet Chat 171 - Oct 29, 2014',
701 'upload_date': '20141029',
702 }
af63fed7
PH
703 },
704 # Livestream embed
705 {
706 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
707 'info_dict': {
708 'id': '67864563',
709 'ext': 'flv',
710 'upload_date': '20141112',
711 'title': 'Rosetta #CometLanding webcast HL 10',
712 }
713 },
65f3a228
PH
714 # LazyYT
715 {
716 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
717 'info_dict': {
11e611a7 718 'id': '1986',
65f3a228
PH
719 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
720 },
721 'playlist_mincount': 2,
4e262a88 722 },
42bdd9d0
PH
723 # Cinchcast embed
724 {
725 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
726 'info_dict': {
727 'id': '7141703',
728 'ext': 'mp3',
729 'upload_date': '20141126',
730 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
731 }
732 },
501f13fb
PH
733 # Cinerama player
734 {
735 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
736 'info_dict': {
737 'id': '730m_DandD_1901_512k',
738 'ext': 'mp4',
739 'uploader': 'www.abc.net.au',
740 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
741 }
796df3c6
S
742 },
743 # embedded viddler video
744 {
745 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
746 'info_dict': {
747 'id': '4d03aad9',
748 'ext': 'mp4',
749 'uploader': 'deadspin',
750 'title': 'WALL-TO-GORTAT',
751 'timestamp': 1422285291,
752 'upload_date': '20150126',
753 },
754 'add_ie': ['Viddler'],
a0f71985 755 },
2051acde
S
756 # Libsyn embed
757 {
758 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
759 'info_dict': {
760 'id': '3377616',
761 'ext': 'mp3',
762 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
763 'description': 'md5:601cb790edd05908957dae8aaa866465',
764 'upload_date': '20150220',
765 },
766 },
a0f71985
PH
767 # jwplayer YouTube
768 {
769 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
770 'info_dict': {
771 'id': 'Mrj4DVp2zeA',
772 'ext': 'mp4',
f37e3f99 773 'upload_date': '20150212',
a0f71985
PH
774 'uploader': 'The National Archives UK',
775 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
776 'uploader_id': 'NationalArchives08',
777 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
778 },
59b8ab58
PH
779 },
780 # rtl.nl embed
781 {
782 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
783 'playlist_mincount': 5,
784 'info_dict': {
785 'id': 'aanslagen-kopenhagen',
786 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
787 }
255fca5e
S
788 },
789 # Zapiks embed
790 {
791 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
792 'info_dict': {
793 'id': '118046',
794 'ext': 'mp4',
795 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
796 }
797 },
e3216b82
NJ
798 # Kaltura embed
799 {
800 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
801 'info_dict': {
802 'id': '1_eergr3h1',
803 'ext': 'mp4',
804 'upload_date': '20150226',
805 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
806 'timestamp': int,
807 'title': 'John Carlson Postgame 2/25/15',
808 },
809 },
66e568de
S
810 # Kaltura embed (different embed code)
811 {
812 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
813 'info_dict': {
814 'id': '1_a52wc67y',
815 'ext': 'flv',
816 'upload_date': '20150127',
817 'uploader_id': 'PremierMedia',
818 'timestamp': int,
819 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
820 },
821 },
135c9c42
S
822 # Eagle.Platform embed (generic URL)
823 {
824 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
825 'info_dict': {
826 'id': '227304',
827 'ext': 'mp4',
828 'title': 'Навальный вышел на свободу',
829 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
830 'thumbnail': 're:^https?://.*\.jpg$',
831 'duration': 87,
832 'view_count': int,
833 'age_limit': 0,
834 },
835 },
d47ae7f6
S
836 # ClipYou (Eagle.Platform) embed (custom URL)
837 {
838 'url': 'http://muz-tv.ru/play/7129/',
839 'info_dict': {
840 'id': '12820',
841 'ext': 'mp4',
842 'title': "'O Sole Mio",
843 'thumbnail': 're:^https?://.*\.jpg$',
844 'duration': 216,
845 'view_count': int,
846 },
847 },
f8388757
S
848 # Pladform embed
849 {
850 'url': 'http://muz-tv.ru/kinozal/view/7400/',
851 'info_dict': {
852 'id': '100183293',
853 'ext': 'mp4',
62259846 854 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757
S
855 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
856 'thumbnail': 're:^https?://.*\.jpg$',
857 'duration': 694,
858 'age_limit': 0,
859 },
860 },
c798f15b
S
861 # Playwire embed
862 {
863 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
864 'info_dict': {
865 'id': '3519514',
866 'ext': 'mp4',
867 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
868 'thumbnail': 're:^https?://.*\.png$',
869 'duration': 45.115,
870 },
871 },
ad320e9b
NJ
872 # 5min embed
873 {
874 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
875 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
876 'info_dict': {
877 'id': '518726732',
878 'ext': 'mp4',
879 'title': 'Facebook Creates "On This Day" | Crunch Report',
880 },
881 },
dc455a5f
S
882 # SVT embed
883 {
884 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
885 'info_dict': {
886 'id': '2900353',
887 'ext': 'flv',
888 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
889 'duration': 27,
890 'age_limit': 0,
891 },
892 },
a4257017
S
893 # Crooks and Liars embed
894 {
895 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
896 'info_dict': {
897 'id': '8RUoRhRi',
898 'ext': 'mp4',
899 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
900 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
901 'timestamp': 1428207000,
902 'upload_date': '20150405',
903 'uploader': 'Heather',
904 },
905 },
906 # Crooks and Liars external embed
907 {
908 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
909 'info_dict': {
910 'id': 'MTE3MjUtMzQ2MzA',
911 'ext': 'mp4',
912 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
913 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
914 'timestamp': 1265032391,
915 'upload_date': '20100201',
916 'uploader': 'Heather',
917 },
918 },
facecb84 919 # NBC Sports vplayer embed
a2edf2e7 920 {
facecb84 921 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 922 'info_dict': {
facecb84
S
923 'id': 'ln7x1qSThw4k',
924 'ext': 'flv',
925 'title': "PFT Live: New leader in the 'new-look' defense",
926 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
a2edf2e7 927 },
418c5cc3
YCH
928 },
929 # UDN embed
930 {
931 'url': 'http://www.udn.com/news/story/7314/822787',
01c58f84 932 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 933 'info_dict': {
01c58f84 934 'id': '300346',
418c5cc3 935 'ext': 'mp4',
01c58f84 936 'title': '中一中男師變性 全校師生力挺',
418c5cc3
YCH
937 'thumbnail': 're:^https?://.*\.jpg$',
938 }
edfcf7ab
YCH
939 },
940 # Ooyala embed
941 {
942 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
943 'info_dict': {
944 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
945 'ext': 'mp4',
946 'description': 'VIDEO: Index/Match versus VLOOKUP.',
947 'title': 'This is what separates the Excel masters from the wannabes',
948 },
949 'params': {
950 # m3u8 downloads
951 'skip_download': True,
952 }
d6fd958c
YCH
953 },
954 # Contains a SMIL manifest
955 {
956 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
957 'info_dict': {
958 'id': 'file',
959 'ext': 'flv',
960 'title': '+ Football: Lottery Champions League Europe',
961 'uploader': 'www.telewebion.com',
962 },
963 'params': {
964 # rtmpe downloads
965 'skip_download': True,
966 }
b26733ba
YCH
967 },
968 # Brightcove URL in single quotes
969 {
970 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
971 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
972 'info_dict': {
973 'id': '4255764656001',
974 'ext': 'mp4',
975 'title': 'SN Presents: Russell Martin, World Citizen',
976 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
977 'uploader': 'Rogers Sportsnet',
978 },
756f574e
YCH
979 },
980 # Dailymotion Cloud video
981 {
982 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
983 'md5': '49444254273501a64675a7e68c502681',
984 'info_dict': {
985 'id': '5585de919473990de4bee11b',
986 'ext': 'mp4',
987 'title': 'Le débat',
988 'thumbnail': 're:^https?://.*\.jpe?g$',
989 }
a5158f38 990 },
8084be78
S
991 # OnionStudios embed
992 {
993 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
994 'info_dict': {
995 'id': '2855',
996 'ext': 'mp4',
997 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
998 'thumbnail': 're:^https?://.*\.jpe?g$',
999 'uploader': 'ClickHole',
1000 'uploader_id': 'clickhole',
1001 }
1002 },
b8c1cc1a
S
1003 # SnagFilms embed
1004 {
1005 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1006 'info_dict': {
1007 'id': '74849a00-85a9-11e1-9660-123139220831',
1008 'ext': 'mp4',
1009 'title': '#whilewewatch',
1010 }
1011 },
a5158f38
YCH
1012 # AdobeTVVideo embed
1013 {
1014 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1015 'md5': '43662b577c018ad707a63766462b1e87',
1016 'info_dict': {
1017 'id': '2456',
1018 'ext': 'mp4',
1019 'title': 'New experience with Acrobat DC',
1020 'description': 'New experience with Acrobat DC',
1021 'duration': 248.667,
1022 },
1f812580
S
1023 },
1024 # ScreenwaveMedia embed
1025 {
1026 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1027 'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1028 'info_dict': {
1029 'id': 'cinemasnob-55d26273809dd',
1030 'ext': 'mp4',
1031 'title': 'cinemasnob',
1032 },
76c73715 1033 }
cfe50f04 1034 ]
9b122384 1035
9b122384
PH
1036 def report_following_redirect(self, new_url):
1037 """Report information extraction."""
79649588 1038 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 1039
4fc946b5
PH
1040 def _extract_rss(self, url, video_id, doc):
1041 playlist_title = doc.find('./channel/title').text
1042 playlist_desc_el = doc.find('./channel/description')
1043 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1044
76c73715
PH
1045 entries = []
1046 for it in doc.findall('./channel/item'):
1047 next_url = xpath_text(it, 'link', fatal=False)
1048 if not next_url:
1049 enclosure_nodes = it.findall('./enclosure')
1050 for e in enclosure_nodes:
1051 next_url = e.attrib.get('url')
1052 if next_url:
1053 break
1054
1055 if not next_url:
1056 continue
1057
1058 entries.append({
1059 '_type': 'url',
1060 'url': next_url,
1061 'title': it.find('title').text,
1062 })
4fc946b5
PH
1063
1064 return {
1065 '_type': 'playlist',
1066 'id': url,
1067 'title': playlist_title,
1068 'description': playlist_desc,
1069 'entries': entries,
1070 }
1071
c8e9a235
PH
1072 def _extract_camtasia(self, url, video_id, webpage):
1073 """ Returns None if no camtasia video can be found. """
1074
1075 camtasia_cfg = self._search_regex(
1076 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1077 webpage, 'camtasia configuration file', default=None)
1078 if camtasia_cfg is None:
1079 return None
1080
1081 title = self._html_search_meta('DC.title', webpage, fatal=True)
1082
1083 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1084 camtasia_cfg = self._download_xml(
1085 camtasia_url, video_id,
1086 note='Downloading camtasia configuration',
1087 errnote='Failed to download camtasia configuration')
1088 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1089
1090 entries = []
1091 for n in fileset_node.getchildren():
1092 url_n = n.find('./uri')
1093 if url_n is None:
1094 continue
1095
1096 entries.append({
1097 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1098 'title': '%s - %s' % (title, n.tag),
1099 'url': compat_urlparse.urljoin(url, url_n.text),
1100 'duration': float_or_none(n.find('./duration').text),
1101 })
1102
1103 return {
1104 '_type': 'playlist',
1105 'entries': entries,
1106 'title': title,
1107 }
1108
9b122384 1109 def _real_extract(self, url):
ebd3c7b3
PH
1110 if url.startswith('//'):
1111 return {
1112 '_type': 'url',
20991253 1113 'url': self.http_scheme() + url,
ebd3c7b3
PH
1114 }
1115
a7130543
JMF
1116 parsed_url = compat_urlparse.urlparse(url)
1117 if not parsed_url.scheme:
04b4d394
PH
1118 default_search = self._downloader.params.get('default_search')
1119 if default_search is None:
1f7ccb90 1120 default_search = 'fixup_error'
04b4d394 1121
1f7ccb90 1122 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
1123 if '/' in url:
1124 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1125 return self.url_result('http://' + url)
1f7ccb90 1126 elif default_search != 'fixup_error':
9c1fc022 1127 if default_search == 'auto_warning':
0e67ab0d
PH
1128 if re.match(r'^(?:url|URL)$', url):
1129 raise ExtractorError(
1130 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
1131 expected=True)
1132 else:
1133 self._downloader.report_warning(
7571c02c 1134 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 1135 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
1136
1137 if default_search in ('error', 'fixup_error'):
7571c02c 1138 raise ExtractorError(
b74e86f4
PH
1139 '%r is not a valid URL. '
1140 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
1141 % (url, url), expected=True)
04b4d394 1142 else:
f2f2c0c2
PH
1143 if ':' not in default_search:
1144 default_search += ':'
04b4d394 1145 return self.url_result(default_search + url)
4d54ef20
PH
1146
1147 url, smuggled_data = unsmuggle_url(url)
1148 force_videoid = None
d6e6a422 1149 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
1150 if smuggled_data and 'force_videoid' in smuggled_data:
1151 force_videoid = smuggled_data['force_videoid']
1152 video_id = force_videoid
1153 else:
1ddb9456 1154 video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
a7130543 1155
79649588 1156 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 1157
ebab4520 1158 head_req = HEADRequest(url)
23be51d8 1159 head_response = self._request_webpage(
ebab4520
PH
1160 head_req, video_id,
1161 note=False, errnote='Could not send HEAD request to %s' % url,
1162 fatal=False)
42393ce2 1163
23be51d8 1164 if head_response is not False:
42393ce2 1165 # Check for redirect
23be51d8 1166 new_url = head_response.geturl()
42393ce2
PH
1167 if url != new_url:
1168 self.report_following_redirect(new_url)
4d54ef20
PH
1169 if force_videoid:
1170 new_url = smuggle_url(
1171 new_url, {'force_videoid': force_videoid})
cecaaf3f 1172 return self.url_result(new_url)
42393ce2 1173
23be51d8
PH
1174 full_response = None
1175 if head_response is False:
58bde34a
S
1176 request = compat_urllib_request.Request(url)
1177 request.add_header('Accept-Encoding', '*')
1178 full_response = self._request_webpage(request, video_id)
23be51d8
PH
1179 head_response = full_response
1180
1181 # Check for direct link to a video
1182 content_type = head_response.headers.get('Content-Type', '')
1183 m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1184 if m:
1185 upload_date = unified_strdate(
1186 head_response.headers.get('Last-Modified'))
1187 return {
1188 'id': video_id,
1ddb9456 1189 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
ccdd0ffb 1190 'direct': True,
23be51d8
PH
1191 'formats': [{
1192 'format_id': m.group('format_id'),
1193 'url': url,
1194 'vcodec': 'none' if m.group('type') == 'audio' else None
1195 }],
1196 'upload_date': upload_date,
1197 }
42393ce2 1198
d6e6a422 1199 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
1200 force = self._downloader.params.get('force_generic_extractor', False)
1201 self._downloader.report_warning(
1202 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 1203
4e262a88 1204 if not full_response:
58bde34a
S
1205 request = compat_urllib_request.Request(url)
1206 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1207 # making it impossible to download only chunk of the file (yet we need only 512kB to
1208 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1209 # that will always result in downloading the whole file that is not desirable.
1210 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1211 # to accept raw bytes and being able to download only a chunk.
1212 # It may probably better to solve this by checking Content-Type for application/octet-stream
1213 # after HEAD request finishes, but not sure if we can rely on this.
1214 request.add_header('Accept-Encoding', '*')
1215 full_response = self._request_webpage(request, video_id)
4e262a88
PH
1216
1217 # Maybe it's a direct link to a video?
1218 # Be careful not to download the whole thing!
1219 first_bytes = full_response.read(512)
61ca9a80 1220 if not is_html(first_bytes):
4e262a88
PH
1221 self._downloader.report_warning(
1222 'URL could be a direct video link, returning it as such.')
1223 upload_date = unified_strdate(
1224 head_response.headers.get('Last-Modified'))
1225 return {
1226 'id': video_id,
1ddb9456 1227 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
4e262a88
PH
1228 'direct': True,
1229 'url': url,
1230 'upload_date': upload_date,
1231 }
1232
1233 webpage = self._webpage_read_content(
1234 full_response, url, video_id, prefix=first_bytes)
1235
9b122384 1236 self.report_extraction(video_id)
887c6acd 1237
0791ac1b 1238 # Is it an RSS feed, a SMIL file or a XSPF playlist?
4fc946b5 1239 try:
bcf89ce6 1240 doc = parse_xml(webpage)
4fc946b5
PH
1241 if doc.tag == 'rss':
1242 return self._extract_rss(url, video_id, doc)
e5e8d20a
S
1243 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1244 return self._parse_smil(doc, url, video_id)
729accb4
S
1245 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1246 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
f7300c5c 1247 except compat_xml_parse_error:
4fc946b5
PH
1248 pass
1249
c8e9a235
PH
1250 # Is it a Camtasia project?
1251 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1252 if camtasia_res is not None:
1253 return camtasia_res
1254
14390730
S
1255 # Sometimes embedded video player is hidden behind percent encoding
1256 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1257 # Unescaping the whole page allows to handle those cases in a generic way
45eedbe5 1258 webpage = compat_urllib_parse_unquote(webpage)
1f7659db 1259
887c6acd
PH
1260 # it's tempting to parse this further, but you would
1261 # have to take into account all the variations like
1262 # Video Title - Site Name
1263 # Site Name | Video Title
1264 # Video Title - Tagline | Site Name
1265 # and so on and so forth; it's just not practical
ef4fd848 1266 video_title = self._html_search_regex(
79649588
PH
1267 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1268 default='video')
ef4fd848 1269
4d805e06
PH
1270 # Try to detect age limit automatically
1271 age_limit = self._rta_search(webpage)
1272 # And then there are the jokers who advertise that they use RTA,
1273 # but actually don't.
1274 AGE_LIMIT_MARKERS = [
1275 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1276 ]
1277 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1278 age_limit = 18
1279
ef4fd848
PH
1280 # video uploader is domain name
1281 video_uploader = self._search_regex(
79649588 1282 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1283
ed2d6a19 1284 # Helper method
83992676 1285 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1286 urlrs = orderedSet(
83992676 1287 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1288 for m in matches)
ed2d6a19
PH
1289 return self.playlist_result(
1290 urlrs, playlist_id=video_id, playlist_title=video_title)
1291
627a91a9 1292 # Look for BrightCove:
99877772
PH
1293 bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1294 if bc_urls:
79649588 1295 self.to_screen('Brightcove video detected.')
99877772
PH
1296 entries = [{
1297 '_type': 'url',
1298 'url': smuggle_url(bc_url, {'Referer': url}),
1299 'ie_key': 'Brightcove'
1300 } for bc_url in bc_urls]
1301
1302 return {
1303 '_type': 'playlist',
1304 'title': video_title,
1305 'id': video_id,
1306 'entries': entries,
1307 }
cfe50f04 1308
59b8ab58
PH
1309 # Look for embedded rtl.nl player
1310 matches = re.findall(
97b570a9 1311 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1312 webpage)
1313 if matches:
1314 return _playlist_from_matches(matches, ie='RtlNl')
1315
b407e173
YCH
1316 vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1317 if vimeo_url is not None:
1318 return self.url_result(vimeo_url)
7115ca84 1319
a1b85269
YCH
1320 vid_me_embed_url = self._search_regex(
1321 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1322 webpage, 'vid.me embed', default=None)
1323 if vid_me_embed_url is not None:
1324 return self.url_result(vid_me_embed_url, 'Vidme')
1325
53c1d3ef 1326 # Look for embedded YouTube player
1f9da904 1327 matches = re.findall(r'''(?x)
2b88feed
PH
1328 (?:
1329 <iframe[^>]+?src=|
c71dfccc 1330 data-video-url=|
2b88feed 1331 <embed[^>]+?src=|
a7e97f6d
PH
1332 embedSWF\(?:\s*|
1333 new\s+SWFObject\(
2b88feed
PH
1334 )
1335 (["\'])
1bf5423e 1336 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1337 (?:embed|v|p)/.+?)
1f9da904 1338 \1''', webpage)
887c6acd 1339 if matches:
ed2d6a19 1340 return _playlist_from_matches(
3b2f933b 1341 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1342
65f3a228
PH
1343 # Look for lazyYT YouTube embed
1344 matches = re.findall(
1345 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1346 if matches:
1347 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1348
355e4fd0
PH
1349 # Look for embedded Dailymotion player
1350 matches = re.findall(
ef4fd848 1351 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
355e4fd0 1352 if matches:
ed2d6a19
PH
1353 return _playlist_from_matches(
1354 matches, lambda m: unescapeHTML(m[1]))
355e4fd0 1355
8489578d
NJ
1356 # Look for embedded Dailymotion playlist player (#3822)
1357 m = re.search(
1358 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1359 if m:
1360 playlists = re.findall(
1361 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1362 if playlists:
1363 return _playlist_from_matches(
1364 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1365
ef4fd848
PH
1366 # Look for embedded Wistia player
1367 match = re.search(
281d3f1d 1368 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1369 if match:
9471c444
NJ
1370 embed_url = self._proto_relative_url(
1371 unescapeHTML(match.group('url')))
ef4fd848
PH
1372 return {
1373 '_type': 'url_transparent',
9471c444 1374 'url': embed_url,
ef4fd848
PH
1375 'ie_key': 'Wistia',
1376 'uploader': video_uploader,
1377 'title': video_title,
1378 'id': video_id,
1379 }
5f6a1245 1380
9471c444 1381 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1382 if match:
1383 return {
1384 '_type': 'url_transparent',
1385 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1386 'ie_key': 'Wistia',
1387 'uploader': video_uploader,
1388 'title': video_title,
1389 'id': match.group('id')
1390 }
ef4fd848 1391
ee3e63e4 1392 # Look for embedded blip.tv player
0954cd8a
YCH
1393 bliptv_url = BlipTVIE._extract_url(webpage)
1394 if bliptv_url:
1395 return self.url_result(bliptv_url, 'BlipTV')
ee3e63e4 1396
bab19a8e
S
1397 # Look for SVT player
1398 svt_url = SVTIE._extract_url(webpage)
1399 if svt_url:
1400 return self.url_result(svt_url, 'SVT')
1401
fa35cdad
PH
1402 # Look for embedded condenast player
1403 matches = re.findall(
1404 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1405 webpage)
1406 if matches:
1407 return {
1408 '_type': 'playlist',
1409 'entries': [{
1410 '_type': 'url',
1411 'ie_key': 'CondeNast',
1412 'url': ma,
1413 } for ma in matches],
1414 'title': video_title,
1415 'id': video_id,
1416 }
1417
c19f7764
JMF
1418 # Look for Bandcamp pages with custom domain
1419 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1420 if mobj is not None:
1421 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1422 # Don't set the extractor because it can be a track url or an album
1423 return self.url_result(burl)
c19f7764 1424
f25571ff
PH
1425 # Look for embedded Vevo player
1426 mobj = re.search(
1427 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1428 if mobj is not None:
1429 return self.url_result(mobj.group('url'))
796df3c6
S
1430
1431 # Look for embedded Viddler player
cb454b33
S
1432 mobj = re.search(
1433 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1434 webpage)
796df3c6
S
1435 if mobj is not None:
1436 return self.url_result(mobj.group('url'))
f25571ff 1437
3378d67a
S
1438 # Look for NYTimes player
1439 mobj = re.search(
1440 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1441 webpage)
1442 if mobj is not None:
1443 return self.url_result(mobj.group('url'))
1444
cefdf970
S
1445 # Look for Libsyn player
1446 mobj = re.search(
1447 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1448 if mobj is not None:
1449 return self.url_result(mobj.group('url'))
1450
c0d0b01f 1451 # Look for Ooyala videos
8a37aa15 1452 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1453 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1454 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1455 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1456 if mobj is not None:
750f9020 1457 return OoyalaIE._build_url_result(mobj.group('ec'))
c0d0b01f 1458
f076b638 1459 # Look for multiple Ooyala embeds on SBN network websites
1460 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1461 if mobj is not None:
1462 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1463 if embeds:
1464 return _playlist_from_matches(
1465 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1466
aa94a6d3 1467 # Look for Aparat videos
48099643 1468 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
1469 if mobj is not None:
1470 return self.url_result(mobj.group(1), 'Aparat')
1471
c93c2ab1 1472 # Look for MPORA videos
c3f51436 1473 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
1474 if mobj is not None:
1475 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 1476
15c0e8e7 1477 # Look for embedded NovaMov-based player
8f89e687 1478 mobj = re.search(
8dfa187b 1479 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
1480 (?P<url>http://(?:(?:embed|www)\.)?
1481 (?:novamov\.com|
1482 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1483 videoweed\.(?:es|com)|
1484 movshare\.(?:net|sx|ag)|
1485 divxstage\.(?:eu|net|ch|co|at|ag))
1486 /embed\.php.+?)\1''', webpage)
8f89e687 1487 if mobj is not None:
15c0e8e7 1488 return self.url_result(mobj.group('url'))
50f56607 1489
9834872b
PH
1490 # Look for embedded Facebook player
1491 mobj = re.search(
db1f3888 1492 r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
9834872b
PH
1493 if mobj is not None:
1494 return self.url_result(mobj.group('url'), 'Facebook')
1495
ca97a56e
S
1496 # Look for embedded VK player
1497 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1498 if mobj is not None:
1499 return self.url_result(mobj.group('url'), 'VK')
1500
0364fa8b
S
1501 # Look for embedded ivi player
1502 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1503 if mobj is not None:
1504 return self.url_result(mobj.group('url'), 'Ivi')
1505
db1f3888
PH
1506 # Look for embedded Huffington Post player
1507 mobj = re.search(
c3f51436 1508 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
1509 if mobj is not None:
1510 return self.url_result(mobj.group('url'), 'HuffPost')
1511
1b86cc41 1512 # Look for embed.ly
1513 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1514 if mobj is not None:
1515 return self.url_result(mobj.group('url'))
1516 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1517 if mobj is not None:
f7e6f7fa 1518 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 1519
60cc4dc4
PH
1520 # Look for funnyordie embed
1521 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1522 if matches:
ed2d6a19
PH
1523 return _playlist_from_matches(
1524 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 1525
db546cf8
S
1526 # Look for BBC iPlayer embed
1527 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1528 if matches:
476eae0c 1529 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 1530
93d020dd
S
1531 # Look for embedded RUTV player
1532 rutv_url = RUTVIE._extract_url(webpage)
1533 if rutv_url:
1534 return self.url_result(rutv_url, 'RUTV')
1535
494f20cb 1536 # Look for embedded TVC player
b8599718
S
1537 tvc_url = TVCIE._extract_url(webpage)
1538 if tvc_url:
1539 return self.url_result(tvc_url, 'TVC')
494f20cb 1540
d40a3b5b
S
1541 # Look for embedded SportBox player
1542 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1543 if sportbox_urls:
1544 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1545
78e2b74b 1546 # Look for embedded PornHub player
65d161c4
S
1547 pornhub_url = PornHubIE._extract_url(webpage)
1548 if pornhub_url:
1549 return self.url_result(pornhub_url, 'PornHub')
1550
2bb5b6d0
S
1551 # Look for embedded XHamster player
1552 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1553 if xhamster_urls:
1554 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1555
9872d311
S
1556 # Look for embedded Tvigle player
1557 mobj = re.search(
1558 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1559 if mobj is not None:
1560 return self.url_result(mobj.group('url'), 'Tvigle')
1561
7e2ede98
JMF
1562 # Look for embedded TED player
1563 mobj = re.search(
d7cc31b6 1564 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
1565 if mobj is not None:
1566 return self.url_result(mobj.group('url'), 'TED')
1567
5c386252 1568 # Look for embedded Ustream videos
1569 mobj = re.search(
1570 r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1571 if mobj is not None:
1572 return self.url_result(mobj.group('url'), 'Ustream')
1573
893f8832
PH
1574 # Look for embedded arte.tv player
1575 mobj = re.search(
1576 r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1577 webpage)
1578 if mobj is not None:
1579 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1580
cbd55ade
S
1581 # Look for embedded francetv player
1582 mobj = re.search(
1583 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1584 webpage)
1585 if mobj is not None:
1586 return self.url_result(mobj.group('url'))
1587
cb3ac1c6
S
1588 # Look for embedded smotri.com player
1589 smotri_url = SmotriIE._extract_url(webpage)
1590 if smotri_url:
1591 return self.url_result(smotri_url, 'Smotri')
1592
e6c2d9ad 1593 # Look for embedded Myvi.ru player
6dd94d3a 1594 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
1595 if myvi_url:
1596 return self.url_result(myvi_url)
1597
20991253
PH
1598 # Look for embeded soundcloud player
1599 mobj = re.search(
ac645ac7 1600 r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
20991253
PH
1601 webpage)
1602 if mobj is not None:
1603 url = unescapeHTML(mobj.group('url'))
1604 return self.url_result(url)
1605
826ec77f
PH
1606 # Look for embedded vulture.com player
1607 mobj = re.search(
1608 r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1609 webpage)
1610 if mobj is not None:
1611 url = unescapeHTML(mobj.group('url'))
1612 return self.url_result(url, ie='Vulture')
1613
c5cd249e 1614 # Look for embedded mtvservices player
46fde8a1
S
1615 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1616 if mtvservices_url:
1617 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 1618
49807b4a
S
1619 # Look for embedded yahoo player
1620 mobj = re.search(
1621 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1622 webpage)
1623 if mobj is not None:
1624 return self.url_result(mobj.group('url'), 'Yahoo')
1625
2ef6fcb5
PH
1626 # Look for embedded sbs.com.au player
1627 mobj = re.search(
e98b8e79
PH
1628 r'''(?x)
1629 (?:
1630 <meta\s+property="og:video"\s+content=|
1631 <iframe[^>]+?src=
1632 )
1633 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
1634 webpage)
1635 if mobj is not None:
1636 return self.url_result(mobj.group('url'), 'SBS')
1637
42bdd9d0
PH
1638 # Look for embedded Cinchcast player
1639 mobj = re.search(
1640 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1641 webpage)
1642 if mobj is not None:
1643 return self.url_result(mobj.group('url'), 'Cinchcast')
1644
1a94ff68 1645 mobj = re.search(
5263cdfc 1646 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 1647 webpage)
8001607e
YCH
1648 if not mobj:
1649 mobj = re.search(
1650 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1651 webpage)
1a94ff68
S
1652 if mobj is not None:
1653 return self.url_result(mobj.group('url'), 'MLB')
1654
1419fafd 1655 mobj = re.search(
dd467d33 1656 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
1657 webpage)
1658 if mobj is not None:
1659 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1660
af63fed7
PH
1661 mobj = re.search(
1662 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1663 webpage)
1664 if mobj is not None:
1665 return self.url_result(mobj.group('url'), 'Livestream')
1666
255fca5e
S
1667 # Look for Zapiks embed
1668 mobj = re.search(
1669 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1670 if mobj is not None:
1671 return self.url_result(mobj.group('url'), 'Zapiks')
1672
e3216b82 1673 # Look for Kaltura embeds
66e568de
S
1674 mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1675 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
e3216b82
NJ
1676 if mobj is not None:
1677 return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1678
135c9c42
S
1679 # Look for Eagle.Platform embeds
1680 mobj = re.search(
1681 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1682 if mobj is not None:
1683 return self.url_result(mobj.group('url'), 'EaglePlatform')
1684
d47ae7f6
S
1685 # Look for ClipYou (uses Eagle.Platform) embeds
1686 mobj = re.search(
1687 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1688 if mobj is not None:
1689 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1690
f8388757
S
1691 # Look for Pladform embeds
1692 mobj = re.search(
1693 r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1694 if mobj is not None:
1695 return self.url_result(mobj.group('url'), 'Pladform')
1696
2dcc114f
S
1697 # Look for Playwire embeds
1698 mobj = re.search(
1699 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1700 if mobj is not None:
1701 return self.url_result(mobj.group('url'))
1702
ad320e9b
NJ
1703 # Look for 5min embeds
1704 mobj = re.search(
1705 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1706 if mobj is not None:
1707 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1708
18153f1b
S
1709 # Look for Crooks and Liars embeds
1710 mobj = re.search(
1711 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1712 if mobj is not None:
1713 return self.url_result(mobj.group('url'))
1714
a2edf2e7
YCH
1715 # Look for NBC Sports VPlayer embeds
1716 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1717 if nbc_sports_url:
1718 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1719
418c5cc3
YCH
1720 # Look for UDN embeds
1721 mobj = re.search(
1722 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1723 if mobj is not None:
1724 return self.url_result(
0a160363 1725 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 1726
2fe1b5bd
YCH
1727 # Look for Senate ISVP iframe
1728 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1729 if senate_isvp_url:
25c3a734 1730 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 1731
756f574e
YCH
1732 # Look for Dailymotion Cloud videos
1733 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1734 if dmcloud_url:
1735 return self.url_result(dmcloud_url, 'DailymotionCloud')
1736
1ac1c4c2
S
1737 # Look for OnionStudios embeds
1738 onionstudios_url = OnionStudiosIE._extract_url(webpage)
1739 if onionstudios_url:
1740 return self.url_result(onionstudios_url)
1741
eedd20ef
S
1742 # Look for SnagFilms embeds
1743 snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1744 if snagfilms_url:
1745 return self.url_result(snagfilms_url)
1746
8ca31a0e 1747 # Look for ScreenwaveMedia embeds
efd712c6 1748 mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
8ca31a0e 1749 if mobj is not None:
efd712c6 1750 return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
8ca31a0e 1751
a5158f38
YCH
1752 # Look for AdobeTVVideo embeds
1753 mobj = re.search(
1754 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1755 webpage)
1756 if mobj is not None:
1757 return self.url_result(
1758 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1759 'AdobeTVVideo')
1760
ced659bb 1761 def check_video(vurl):
a0f71985
PH
1762 if YoutubeIE.suitable(vurl):
1763 return True
ced659bb
S
1764 vpath = compat_urlparse.urlparse(vurl).path
1765 vext = determine_ext(vpath)
1766 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1767
1768 def filter_video(urls):
1769 return list(filter(check_video, urls))
1770
9b122384 1771 # Start with something easy: JW Player in SWFObject
ced659bb 1772 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 1773 if not found:
d981cef6 1774 # Look for gorilla-vid style embedding
ced659bb 1775 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
1776 (?:
1777 jw_plugins|
1778 JWPlayerOptions|
1779 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1780 )
a0f71985
PH
1781 .*?
1782 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 1783 if not found:
9b122384 1784 # Broaden the search a little bit
ced659bb 1785 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
1786 if not found:
1787 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 1788 found = filter_video(re.findall(
54a9328b 1789 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
1790 if not found:
1791 # Flow player
ced659bb 1792 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
1793 flowplayer\("[^"]+",\s*
1794 \{[^}]+?\}\s*,
52585fd6 1795 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 1796 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 1797 ''', webpage))
501f13fb
PH
1798 if not found:
1799 # Cinerama player
1800 found = re.findall(
1801 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 1802 if not found:
9b122384 1803 # Try to find twitter cards info
ced659bb
S
1804 found = filter_video(re.findall(
1805 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 1806 if not found:
9b122384
PH
1807 # We look for Open Graph info:
1808 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 1809 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
1810 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1811 if m_video_type is not None:
ced659bb 1812 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 1813 if not found:
7fea7156 1814 # HTML5 video
12439dd5 1815 found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
b30b8698 1816 if not found:
ed9a25dd 1817 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 1818 found = re.search(
89ef304b 1819 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 1820 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 1821 webpage)
84f81016
S
1822 if not found:
1823 # Look also in Refresh HTTP header
1824 refresh_header = head_response.headers.get('Refresh')
1825 if refresh_header:
6c91a5a7
S
1826 # In python 2 response HTTP headers are bytestrings
1827 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1828 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 1829 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 1830 if found:
b37317d8 1831 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
89ef304b
PH
1832 self.report_following_redirect(new_url)
1833 return {
1834 '_type': 'url',
1835 'url': new_url,
1836 }
b30b8698 1837 if not found:
416c7fcb 1838 raise UnsupportedError(url)
9b122384 1839
b30b8698
PH
1840 entries = []
1841 for video_url in found:
1842 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 1843 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 1844
b30b8698
PH
1845 # Sometimes, jwplayer extraction will result in a YouTube URL
1846 if YoutubeIE.suitable(video_url):
1847 entries.append(self.url_result(video_url, 'Youtube'))
1848 continue
9b122384 1849
b30b8698
PH
1850 # here's a fun little line of code for you:
1851 video_id = os.path.splitext(video_id)[0]
fc9713a1 1852
729accb4
S
1853 ext = determine_ext(video_url)
1854 if ext == 'smil':
d6fd958c
YCH
1855 entries.append({
1856 'id': video_id,
1857 'formats': self._extract_smil_formats(video_url, video_id),
1858 'uploader': video_uploader,
1859 'title': video_title,
1860 'age_limit': age_limit,
1861 })
729accb4
S
1862 elif ext == 'xspf':
1863 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
d6fd958c
YCH
1864 else:
1865 entries.append({
1866 'id': video_id,
1867 'url': video_url,
1868 'uploader': video_uploader,
1869 'title': video_title,
1870 'age_limit': age_limit,
1871 })
b30b8698
PH
1872
1873 if len(entries) == 1:
669f0e7c 1874 return entries[0]
b30b8698
PH
1875 else:
1876 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
1877 # 'url' results don't have a title
1878 if e.get('title') is not None:
1879 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
1880 return {
1881 '_type': 'playlist',
1882 'entries': entries,
1883 }