]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[extractor/generic] Add support for OnionStudios embeds (Closes #5841)
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
cfe50f04
JMF
1# encoding: utf-8
2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
7
8from .common import InfoExtractor
fc9713a1 9from .youtube import YoutubeIE
8c25f81b 10from ..compat import (
9b122384 11 compat_urllib_parse,
1ddb9456
S
12 compat_urllib_parse_unquote,
13 compat_urllib_request,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
bcf89ce6 24 parse_xml,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
42393ce2 30 url_basename,
76c73715 31 xpath_text,
9b122384 32)
cfe50f04 33from .brightcove import BrightcoveIE
a2edf2e7 34from .nbc import NBCSportsVPlayerIE
c0d0b01f 35from .ooyala import OoyalaIE
93d020dd 36from .rutv import RUTVIE
954c1d05 37from .tvc import TVCIE
d40a3b5b 38from .sportbox import SportBoxEmbedIE
cb3ac1c6 39from .smotri import SmotriIE
1419fafd 40from .condenast import CondeNastIE
418c5cc3 41from .udn import UDNEmbedIE
2fe1b5bd 42from .senateisvp import SenateISVPIE
0954cd8a 43from .bliptv import BlipTVIE
bab19a8e 44from .svt import SVTIE
65d161c4 45from .pornhub import PornHubIE
2bb5b6d0 46from .xhamster import XHamsterEmbedIE
b407e173 47from .vimeo import VimeoIE
756f574e 48from .dailymotion import DailymotionCloudIE
1ac1c4c2 49from .onionstudios import OnionStudiosIE
9b122384 50
0838239e 51
9b122384 52class GenericIE(InfoExtractor):
79649588 53 IE_DESC = 'Generic downloader that works on some sites'
9b122384 54 _VALID_URL = r'.*'
79649588 55 IE_NAME = 'generic'
cfe50f04 56 _TESTS = [
c5fa81fe
S
57 # Direct link to a video
58 {
59 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
60 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
61 'info_dict': {
62 'id': 'trailer',
63 'ext': 'mp4',
64 'title': 'trailer',
65 'upload_date': '20100513',
66 }
67 },
c5138a7c 68 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
69 {
70 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
71 'md5': '128c42e68b13950268b648275386fc74',
72 'info_dict': {
73 'id': 'FictionJunction-Parallel_Hearts',
74 'ext': 'flac',
75 'title': 'FictionJunction-Parallel_Hearts',
76 'upload_date': '20140522',
77 },
78 'expected_warnings': [
79 'URL could be a direct video link, returning it as such.'
80 ]
81 },
82 # Direct download with broken HEAD
83 {
84 'url': 'http://ai-radio.org:8000/radio.opus',
85 'info_dict': {
86 'id': 'radio',
87 'ext': 'opus',
88 'title': 'radio',
89 },
90 'params': {
91 'skip_download': True, # infinite live stream
92 },
93 'expected_warnings': [
94 r'501.*Not Implemented'
95 ],
96 },
97 # Direct link with incorrect MIME type
98 {
99 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
100 'md5': '4ccbebe5f36706d85221f204d7eb5913',
101 'info_dict': {
102 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
103 'id': '5_Lennart_Poettering_-_Systemd',
104 'ext': 'webm',
105 'title': '5_Lennart_Poettering_-_Systemd',
106 'upload_date': '20141120',
107 },
108 'expected_warnings': [
109 'URL could be a direct video link, returning it as such.'
110 ]
111 },
112 # RSS feed
113 {
114 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
115 'info_dict': {
116 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
117 'title': 'Zero Punctuation',
118 'description': 're:.*groundbreaking video review series.*'
119 },
120 'playlist_mincount': 11,
121 },
122 # RSS feed with enclosure
123 {
124 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
125 'info_dict': {
126 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
127 'ext': 'm4v',
128 'upload_date': '20150228',
129 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
130 }
131 },
132 # google redirect
133 {
134 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
135 'info_dict': {
136 'id': 'cmQHVoWB5FY',
137 'ext': 'mp4',
138 'upload_date': '20130224',
139 'uploader_id': 'TheVerge',
140 'description': 're:^Chris Ziegler takes a look at the\.*',
141 'uploader': 'The Verge',
142 'title': 'First Firefox OS phones side-by-side',
143 },
144 'params': {
145 'skip_download': False,
146 }
147 },
cfe50f04 148 {
79649588 149 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 150 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 151 'info_dict': {
d360a146
S
152 'id': '13601338388002',
153 'ext': 'mp4',
79649588
PH
154 'uploader': 'www.hodiho.fr',
155 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
156 }
157 },
c19f7764
JMF
158 # bandcamp page with custom domain
159 {
79649588
PH
160 'add_ie': ['Bandcamp'],
161 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 162 'info_dict': {
fd50bf62
S
163 'id': '3235767654',
164 'ext': 'mp3',
79649588
PH
165 'title': 'The Pony Mash',
166 'uploader': 'M_Pallante',
c19f7764 167 },
79649588 168 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 169 },
eeb165e6 170 # embedded brightcove video
dd5bcdc4
JMF
171 # it also tests brightcove videos that need to set the 'Referer' in the
172 # http requests
eeb165e6 173 {
79649588
PH
174 'add_ie': ['Brightcove'],
175 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
176 'info_dict': {
177 'id': '2765128793001',
178 'ext': 'mp4',
179 'title': 'Le cours de bourse : l’analyse technique',
180 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
181 'uploader': 'BFM BUSINESS',
eeb165e6 182 },
79649588
PH
183 'params': {
184 'skip_download': True,
eeb165e6
JMF
185 },
186 },
17ab4d3b
PH
187 {
188 # https://github.com/rg3/youtube-dl/issues/2253
189 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
190 'md5': '0ba9446db037002366bab3b3eb30c88c',
191 'info_dict': {
fd50bf62
S
192 'id': '3101154703001',
193 'ext': 'mp4',
17ab4d3b
PH
194 'title': 'Still no power',
195 'uploader': 'thestar.com',
196 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
197 },
198 'add_ie': ['Brightcove'],
199 },
0479c625
S
200 {
201 'url': 'http://www.championat.com/video/football/v/87/87499.html',
202 'md5': 'fb973ecf6e4a78a67453647444222983',
203 'info_dict': {
204 'id': '3414141473001',
205 'ext': 'mp4',
206 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
207 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
208 'uploader': 'Championat',
209 },
210 },
bdf97017 211 {
37aab278 212 # https://github.com/rg3/youtube-dl/issues/3541
bdf97017
NJ
213 'add_ie': ['Brightcove'],
214 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
215 'info_dict': {
216 'id': '3866516442001',
37aab278 217 'ext': 'mp4',
bdf97017
NJ
218 'title': 'Leer mij vrouwen kennen: Aflevering 1',
219 'description': 'Leer mij vrouwen kennen: Aflevering 1',
220 'uploader': 'SBS Broadcasting',
221 },
37aab278 222 'skip': 'Restricted to Netherlands',
bdf97017 223 'params': {
37aab278 224 'skip_download': True, # m3u8 download
bdf97017
NJ
225 },
226 },
c0d0b01f
JMF
227 # ooyala video
228 {
79649588 229 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 230 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
231 'info_dict': {
232 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
233 'ext': 'mp4',
3486df38 234 'title': '2cc213299525360.mov', # that's what we get
c0d0b01f 235 },
87830900 236 'add_ie': ['Ooyala'],
c0d0b01f 237 },
f076b638 238 # multiple ooyala embeds on SBN network websites
239 {
240 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
241 'info_dict': {
242 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
243 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
244 },
245 'playlist_mincount': 3,
246 'params': {
247 'skip_download': True,
248 },
249 'add_ie': ['Ooyala'],
250 },
1b86cc41 251 # embed.ly video
252 {
253 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
254 'info_dict': {
255 'id': '9ODmcdjQcHQ',
256 'ext': 'mp4',
0a5bce56
PH
257 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
258 'upload_date': '20140225',
259 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
260 'uploader': 'Tested',
261 'uploader_id': 'testedcom',
1b86cc41 262 },
263 # No need to test YoutubeIE here
264 'params': {
265 'skip_download': True,
266 },
267 },
60cc4dc4
PH
268 # funnyordie embed
269 {
270 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
271 'info_dict': {
272 'id': '18e820ec3f',
273 'ext': 'mp4',
274 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
275 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 276 },
60cc4dc4 277 },
faa4ea68
S
278 # BBC iPlayer embeds
279 {
280 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
281 'info_dict': {
282 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
283 },
284 'playlist_mincount': 18,
285 },
93d020dd
S
286 # RUTV embed
287 {
288 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
289 'info_dict': {
290 'id': '776940',
291 'ext': 'mp4',
292 'title': 'Охотское море стало целиком российским',
293 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
294 },
295 'params': {
296 # m3u8 download
297 'skip_download': True,
298 },
aab74fa1 299 },
f37bdbe5
S
300 # TVC embed
301 {
302 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
303 'info_dict': {
304 'id': '55304',
305 'ext': 'mp4',
306 'title': 'Дошкольное воспитание',
307 },
308 },
b827a601
S
309 # SportBox embed
310 {
311 'url': 'http://www.vestifinance.ru/articles/25753',
312 'info_dict': {
313 'id': '25753',
314 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
315 },
316 'playlist': [{
317 'info_dict': {
318 'id': '370908',
319 'title': 'Госзаказ. День 3',
320 'ext': 'mp4',
321 }
322 }, {
323 'info_dict': {
324 'id': '370905',
325 'title': 'Госзаказ. День 2',
326 'ext': 'mp4',
327 }
328 }, {
329 'info_dict': {
330 'id': '370902',
331 'title': 'Госзаказ. День 1',
332 'ext': 'mp4',
333 }
334 }],
335 'params': {
336 # m3u8 download
337 'skip_download': True,
338 },
339 },
c76799c5
S
340 # XHamster embed
341 {
342 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
343 'info_dict': {
344 'id': 'showthread',
345 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
346 },
347 'playlist_mincount': 7,
348 },
aab74fa1
PH
349 # Embedded TED video
350 {
351 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 352 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 353 'info_dict': {
a8eb5a8e 354 'id': '1969',
aab74fa1 355 'ext': 'mp4',
a8eb5a8e
PH
356 'title': 'Hidden miracles of the natural world',
357 'uploader': 'Louie Schwartzberg',
358 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 359 }
60cc4dc4 360 },
5c386252 361 # Embeded Ustream video
362 {
363 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
364 'md5': '27b99cdb639c9b12a79bca876a073417',
365 'info_dict': {
ca6aada4 366 'id': '45734260',
367 'ext': 'flv',
368 'uploader': 'AU SPA: The NSA and Privacy',
5c386252 369 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
370 }
371 },
d95e35d6
S
372 # nowvideo embed hidden behind percent encoding
373 {
374 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
375 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
376 'info_dict': {
377 'id': '06e53103ca9aa',
378 'ext': 'flv',
379 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
380 'description': 'No description',
381 },
0f2a2ba1 382 },
893f8832
PH
383 # arte embed
384 {
385 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
386 'md5': '7653032cbb25bf6c80d80f217055fa43',
387 'info_dict': {
388 'id': '048195-004_PLUS7-F',
389 'ext': 'flv',
390 'title': 'X:enius',
391 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
392 'upload_date': '20140320',
393 },
394 'params': {
395 'skip_download': 'Requires rtmpdump'
396 }
397 },
fa35cdad
PH
398 # Condé Nast embed
399 {
400 'url': 'http://www.wired.com/2014/04/honda-asimo/',
401 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
402 'info_dict': {
403 'id': '53501be369702d3275860000',
404 'ext': 'mp4',
405 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
406 }
ebd3c7b3
PH
407 },
408 # Dailymotion embed
409 {
410 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
411 'md5': '441aeeb82eb72c422c7f14ec533999cd',
412 'info_dict': {
413 'id': 'k2mm4bCdJ6CQ2i7c8o2',
414 'ext': 'mp4',
415 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
416 'uploader': 'Spi0n',
417 },
418 'add_ie': ['Dailymotion'],
2b88feed
PH
419 },
420 # YouTube embed
421 {
422 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
423 'info_dict': {
424 'id': 'FXRb4ykk4S0',
425 'ext': 'mp4',
426 'title': 'The NBL Auction 2014',
427 'uploader': 'BADMINTON England',
428 'uploader_id': 'BADMINTONEvents',
429 'upload_date': '20140603',
430 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
431 },
432 'add_ie': ['Youtube'],
433 'params': {
434 'skip_download': True,
435 }
436 },
c5cd249e
JMF
437 # MTVSercices embed
438 {
439 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
440 'md5': '35727f82f58c76d996fc188f9755b0d5',
441 'info_dict': {
442 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
443 'ext': 'mp4',
444 'title': 'Review',
445 'description': 'Mario\'s life in the fast lane has never looked so good.',
446 },
447 },
61013473 448 # YouTube embed via <data-embed-url="">
449 {
450 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 451 'info_dict': {
a8eb5a8e 452 'id': '4vAffPZIT44',
61013473 453 'ext': 'mp4',
a8eb5a8e 454 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
455 'uploader': 'Gameloft',
456 'uploader_id': 'gameloft',
a8eb5a8e
PH
457 'upload_date': '20140828',
458 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
459 },
460 'params': {
461 'skip_download': True,
61013473 462 }
c8e9a235
PH
463 },
464 # Camtasia studio
465 {
466 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
467 'playlist': [{
468 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
469 'info_dict': {
470 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
471 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
472 'ext': 'flv',
473 'duration': 2235.90,
474 }
475 }, {
476 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
477 'info_dict': {
478 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
479 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
480 'ext': 'flv',
481 'duration': 2235.93,
482 }
483 }],
484 'info_dict': {
485 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
486 }
4d805e06
PH
487 },
488 # Flowplayer
489 {
490 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
491 'md5': '9d65602bf31c6e20014319c7d07fba27',
492 'info_dict': {
493 'id': '5123ea6d5e5a7',
494 'ext': 'mp4',
495 'age_limit': 18,
496 'uploader': 'www.handjobhub.com',
d6d9186f 497 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 498 }
0990305d 499 },
22a6f150
PH
500 # Multiple brightcove videos
501 # https://github.com/rg3/youtube-dl/issues/2283
502 {
503 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
504 'info_dict': {
505 'id': 'always-never',
506 'title': 'Always / Never - The New Yorker',
507 },
508 'playlist_count': 3,
509 'params': {
510 'extract_flat': False,
511 'skip_download': True,
512 }
1a94ff68
S
513 },
514 # MLB embed
515 {
516 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
517 'md5': '96f09a37e44da40dd083e12d9a683327',
518 'info_dict': {
519 'id': '33322633',
520 'ext': 'mp4',
521 'title': 'Ump changes call to ball',
522 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
523 'duration': 48,
524 'timestamp': 1401537900,
525 'upload_date': '20140531',
526 'thumbnail': 're:^https?://.*\.jpg$',
527 },
528 },
746c67d7
NJ
529 # Wistia embed
530 {
531 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
532 'md5': '8788b683c777a5cf25621eaf286d0c23',
533 'info_dict': {
534 'id': '1cfaf6b7ea',
535 'ext': 'mov',
536 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
537 'duration': 643.0,
538 'filesize': 182808282,
539 'uploader': 'education-portal.com',
540 },
541 },
52cffcb1 542 {
543 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
544 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
545 'info_dict': {
546 'id': 'uxjb0lwrcz',
547 'ext': 'mp4',
85d7b765 548 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
52cffcb1 549 'duration': 1715.0,
85d7b765 550 'uploader': 'thoughtworks.wistia.com',
70b7e3fb 551 },
52cffcb1 552 },
ac645ac7
PH
553 # Soundcloud embed
554 {
555 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
556 'info_dict': {
557 'id': '174391317',
558 'ext': 'mp3',
559 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
560 'uploader': 'Sophos Security',
561 'title': 'Chet Chat 171 - Oct 29, 2014',
562 'upload_date': '20141029',
563 }
af63fed7
PH
564 },
565 # Livestream embed
566 {
567 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
568 'info_dict': {
569 'id': '67864563',
570 'ext': 'flv',
571 'upload_date': '20141112',
572 'title': 'Rosetta #CometLanding webcast HL 10',
573 }
574 },
65f3a228
PH
575 # LazyYT
576 {
577 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
578 'info_dict': {
11e611a7 579 'id': '1986',
65f3a228
PH
580 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
581 },
582 'playlist_mincount': 2,
4e262a88 583 },
42bdd9d0
PH
584 # Cinchcast embed
585 {
586 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
587 'info_dict': {
588 'id': '7141703',
589 'ext': 'mp3',
590 'upload_date': '20141126',
591 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
592 }
593 },
501f13fb
PH
594 # Cinerama player
595 {
596 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
597 'info_dict': {
598 'id': '730m_DandD_1901_512k',
599 'ext': 'mp4',
600 'uploader': 'www.abc.net.au',
601 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
602 }
796df3c6
S
603 },
604 # embedded viddler video
605 {
606 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
607 'info_dict': {
608 'id': '4d03aad9',
609 'ext': 'mp4',
610 'uploader': 'deadspin',
611 'title': 'WALL-TO-GORTAT',
612 'timestamp': 1422285291,
613 'upload_date': '20150126',
614 },
615 'add_ie': ['Viddler'],
a0f71985 616 },
2051acde
S
617 # Libsyn embed
618 {
619 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
620 'info_dict': {
621 'id': '3377616',
622 'ext': 'mp3',
623 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
624 'description': 'md5:601cb790edd05908957dae8aaa866465',
625 'upload_date': '20150220',
626 },
627 },
a0f71985
PH
628 # jwplayer YouTube
629 {
630 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
631 'info_dict': {
632 'id': 'Mrj4DVp2zeA',
633 'ext': 'mp4',
f37e3f99 634 'upload_date': '20150212',
a0f71985
PH
635 'uploader': 'The National Archives UK',
636 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
637 'uploader_id': 'NationalArchives08',
638 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
639 },
59b8ab58
PH
640 },
641 # rtl.nl embed
642 {
643 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
644 'playlist_mincount': 5,
645 'info_dict': {
646 'id': 'aanslagen-kopenhagen',
647 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
648 }
255fca5e
S
649 },
650 # Zapiks embed
651 {
652 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
653 'info_dict': {
654 'id': '118046',
655 'ext': 'mp4',
656 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
657 }
658 },
e3216b82
NJ
659 # Kaltura embed
660 {
661 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
662 'info_dict': {
663 'id': '1_eergr3h1',
664 'ext': 'mp4',
665 'upload_date': '20150226',
666 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
667 'timestamp': int,
668 'title': 'John Carlson Postgame 2/25/15',
669 },
670 },
135c9c42
S
671 # Eagle.Platform embed (generic URL)
672 {
673 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
674 'info_dict': {
675 'id': '227304',
676 'ext': 'mp4',
677 'title': 'Навальный вышел на свободу',
678 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
679 'thumbnail': 're:^https?://.*\.jpg$',
680 'duration': 87,
681 'view_count': int,
682 'age_limit': 0,
683 },
684 },
d47ae7f6
S
685 # ClipYou (Eagle.Platform) embed (custom URL)
686 {
687 'url': 'http://muz-tv.ru/play/7129/',
688 'info_dict': {
689 'id': '12820',
690 'ext': 'mp4',
691 'title': "'O Sole Mio",
692 'thumbnail': 're:^https?://.*\.jpg$',
693 'duration': 216,
694 'view_count': int,
695 },
696 },
f8388757
S
697 # Pladform embed
698 {
699 'url': 'http://muz-tv.ru/kinozal/view/7400/',
700 'info_dict': {
701 'id': '100183293',
702 'ext': 'mp4',
62259846 703 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757
S
704 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
705 'thumbnail': 're:^https?://.*\.jpg$',
706 'duration': 694,
707 'age_limit': 0,
708 },
709 },
c798f15b
S
710 # Playwire embed
711 {
712 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
713 'info_dict': {
714 'id': '3519514',
715 'ext': 'mp4',
716 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
717 'thumbnail': 're:^https?://.*\.png$',
718 'duration': 45.115,
719 },
720 },
ad320e9b
NJ
721 # 5min embed
722 {
723 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
724 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
725 'info_dict': {
726 'id': '518726732',
727 'ext': 'mp4',
728 'title': 'Facebook Creates "On This Day" | Crunch Report',
729 },
730 },
dc455a5f
S
731 # SVT embed
732 {
733 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
734 'info_dict': {
735 'id': '2900353',
736 'ext': 'flv',
737 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
738 'duration': 27,
739 'age_limit': 0,
740 },
741 },
a4257017
S
742 # Crooks and Liars embed
743 {
744 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
745 'info_dict': {
746 'id': '8RUoRhRi',
747 'ext': 'mp4',
748 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
749 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
750 'timestamp': 1428207000,
751 'upload_date': '20150405',
752 'uploader': 'Heather',
753 },
754 },
755 # Crooks and Liars external embed
756 {
757 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
758 'info_dict': {
759 'id': 'MTE3MjUtMzQ2MzA',
760 'ext': 'mp4',
761 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
762 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
763 'timestamp': 1265032391,
764 'upload_date': '20100201',
765 'uploader': 'Heather',
766 },
767 },
facecb84 768 # NBC Sports vplayer embed
a2edf2e7 769 {
facecb84 770 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 771 'info_dict': {
facecb84
S
772 'id': 'ln7x1qSThw4k',
773 'ext': 'flv',
774 'title': "PFT Live: New leader in the 'new-look' defense",
775 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
a2edf2e7 776 },
418c5cc3
YCH
777 },
778 # UDN embed
779 {
780 'url': 'http://www.udn.com/news/story/7314/822787',
01c58f84 781 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 782 'info_dict': {
01c58f84 783 'id': '300346',
418c5cc3 784 'ext': 'mp4',
01c58f84 785 'title': '中一中男師變性 全校師生力挺',
418c5cc3
YCH
786 'thumbnail': 're:^https?://.*\.jpg$',
787 }
edfcf7ab
YCH
788 },
789 # Ooyala embed
790 {
791 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
792 'info_dict': {
793 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
794 'ext': 'mp4',
795 'description': 'VIDEO: Index/Match versus VLOOKUP.',
796 'title': 'This is what separates the Excel masters from the wannabes',
797 },
798 'params': {
799 # m3u8 downloads
800 'skip_download': True,
801 }
d6fd958c
YCH
802 },
803 # Contains a SMIL manifest
804 {
805 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
806 'info_dict': {
807 'id': 'file',
808 'ext': 'flv',
809 'title': '+ Football: Lottery Champions League Europe',
810 'uploader': 'www.telewebion.com',
811 },
812 'params': {
813 # rtmpe downloads
814 'skip_download': True,
815 }
b26733ba
YCH
816 },
817 # Brightcove URL in single quotes
818 {
819 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
820 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
821 'info_dict': {
822 'id': '4255764656001',
823 'ext': 'mp4',
824 'title': 'SN Presents: Russell Martin, World Citizen',
825 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
826 'uploader': 'Rogers Sportsnet',
827 },
756f574e
YCH
828 },
829 # Dailymotion Cloud video
830 {
831 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
832 'md5': '49444254273501a64675a7e68c502681',
833 'info_dict': {
834 'id': '5585de919473990de4bee11b',
835 'ext': 'mp4',
836 'title': 'Le débat',
837 'thumbnail': 're:^https?://.*\.jpe?g$',
838 }
a5158f38
YCH
839 },
840 # AdobeTVVideo embed
841 {
842 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
843 'md5': '43662b577c018ad707a63766462b1e87',
844 'info_dict': {
845 'id': '2456',
846 'ext': 'mp4',
847 'title': 'New experience with Acrobat DC',
848 'description': 'New experience with Acrobat DC',
849 'duration': 248.667,
850 },
76c73715 851 }
cfe50f04 852 ]
9b122384 853
9b122384
PH
854 def report_following_redirect(self, new_url):
855 """Report information extraction."""
79649588 856 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 857
4fc946b5
PH
858 def _extract_rss(self, url, video_id, doc):
859 playlist_title = doc.find('./channel/title').text
860 playlist_desc_el = doc.find('./channel/description')
861 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
862
76c73715
PH
863 entries = []
864 for it in doc.findall('./channel/item'):
865 next_url = xpath_text(it, 'link', fatal=False)
866 if not next_url:
867 enclosure_nodes = it.findall('./enclosure')
868 for e in enclosure_nodes:
869 next_url = e.attrib.get('url')
870 if next_url:
871 break
872
873 if not next_url:
874 continue
875
876 entries.append({
877 '_type': 'url',
878 'url': next_url,
879 'title': it.find('title').text,
880 })
4fc946b5
PH
881
882 return {
883 '_type': 'playlist',
884 'id': url,
885 'title': playlist_title,
886 'description': playlist_desc,
887 'entries': entries,
888 }
889
c8e9a235
PH
890 def _extract_camtasia(self, url, video_id, webpage):
891 """ Returns None if no camtasia video can be found. """
892
893 camtasia_cfg = self._search_regex(
894 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
895 webpage, 'camtasia configuration file', default=None)
896 if camtasia_cfg is None:
897 return None
898
899 title = self._html_search_meta('DC.title', webpage, fatal=True)
900
901 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
902 camtasia_cfg = self._download_xml(
903 camtasia_url, video_id,
904 note='Downloading camtasia configuration',
905 errnote='Failed to download camtasia configuration')
906 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
907
908 entries = []
909 for n in fileset_node.getchildren():
910 url_n = n.find('./uri')
911 if url_n is None:
912 continue
913
914 entries.append({
915 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
916 'title': '%s - %s' % (title, n.tag),
917 'url': compat_urlparse.urljoin(url, url_n.text),
918 'duration': float_or_none(n.find('./duration').text),
919 })
920
921 return {
922 '_type': 'playlist',
923 'entries': entries,
924 'title': title,
925 }
926
9b122384 927 def _real_extract(self, url):
ebd3c7b3
PH
928 if url.startswith('//'):
929 return {
930 '_type': 'url',
20991253 931 'url': self.http_scheme() + url,
ebd3c7b3
PH
932 }
933
a7130543
JMF
934 parsed_url = compat_urlparse.urlparse(url)
935 if not parsed_url.scheme:
04b4d394
PH
936 default_search = self._downloader.params.get('default_search')
937 if default_search is None:
1f7ccb90 938 default_search = 'fixup_error'
04b4d394 939
1f7ccb90 940 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
941 if '/' in url:
942 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
943 return self.url_result('http://' + url)
1f7ccb90 944 elif default_search != 'fixup_error':
9c1fc022 945 if default_search == 'auto_warning':
0e67ab0d
PH
946 if re.match(r'^(?:url|URL)$', url):
947 raise ExtractorError(
948 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
949 expected=True)
950 else:
951 self._downloader.report_warning(
7571c02c 952 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 953 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
954
955 if default_search in ('error', 'fixup_error'):
7571c02c 956 raise ExtractorError(
b74e86f4
PH
957 '%r is not a valid URL. '
958 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
959 % (url, url), expected=True)
04b4d394 960 else:
f2f2c0c2
PH
961 if ':' not in default_search:
962 default_search += ':'
04b4d394 963 return self.url_result(default_search + url)
4d54ef20
PH
964
965 url, smuggled_data = unsmuggle_url(url)
966 force_videoid = None
d6e6a422 967 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
968 if smuggled_data and 'force_videoid' in smuggled_data:
969 force_videoid = smuggled_data['force_videoid']
970 video_id = force_videoid
971 else:
1ddb9456 972 video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
a7130543 973
79649588 974 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 975
ebab4520 976 head_req = HEADRequest(url)
23be51d8 977 head_response = self._request_webpage(
ebab4520
PH
978 head_req, video_id,
979 note=False, errnote='Could not send HEAD request to %s' % url,
980 fatal=False)
42393ce2 981
23be51d8 982 if head_response is not False:
42393ce2 983 # Check for redirect
23be51d8 984 new_url = head_response.geturl()
42393ce2
PH
985 if url != new_url:
986 self.report_following_redirect(new_url)
4d54ef20
PH
987 if force_videoid:
988 new_url = smuggle_url(
989 new_url, {'force_videoid': force_videoid})
cecaaf3f 990 return self.url_result(new_url)
42393ce2 991
23be51d8
PH
992 full_response = None
993 if head_response is False:
58bde34a
S
994 request = compat_urllib_request.Request(url)
995 request.add_header('Accept-Encoding', '*')
996 full_response = self._request_webpage(request, video_id)
23be51d8
PH
997 head_response = full_response
998
999 # Check for direct link to a video
1000 content_type = head_response.headers.get('Content-Type', '')
1001 m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1002 if m:
1003 upload_date = unified_strdate(
1004 head_response.headers.get('Last-Modified'))
1005 return {
1006 'id': video_id,
1ddb9456 1007 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
ccdd0ffb 1008 'direct': True,
23be51d8
PH
1009 'formats': [{
1010 'format_id': m.group('format_id'),
1011 'url': url,
1012 'vcodec': 'none' if m.group('type') == 'audio' else None
1013 }],
1014 'upload_date': upload_date,
1015 }
42393ce2 1016
d6e6a422 1017 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
1018 force = self._downloader.params.get('force_generic_extractor', False)
1019 self._downloader.report_warning(
1020 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 1021
4e262a88 1022 if not full_response:
58bde34a
S
1023 request = compat_urllib_request.Request(url)
1024 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1025 # making it impossible to download only chunk of the file (yet we need only 512kB to
1026 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1027 # that will always result in downloading the whole file that is not desirable.
1028 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1029 # to accept raw bytes and being able to download only a chunk.
1030 # It may probably better to solve this by checking Content-Type for application/octet-stream
1031 # after HEAD request finishes, but not sure if we can rely on this.
1032 request.add_header('Accept-Encoding', '*')
1033 full_response = self._request_webpage(request, video_id)
4e262a88
PH
1034
1035 # Maybe it's a direct link to a video?
1036 # Be careful not to download the whole thing!
1037 first_bytes = full_response.read(512)
61ca9a80 1038 if not is_html(first_bytes):
4e262a88
PH
1039 self._downloader.report_warning(
1040 'URL could be a direct video link, returning it as such.')
1041 upload_date = unified_strdate(
1042 head_response.headers.get('Last-Modified'))
1043 return {
1044 'id': video_id,
1ddb9456 1045 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
4e262a88
PH
1046 'direct': True,
1047 'url': url,
1048 'upload_date': upload_date,
1049 }
1050
1051 webpage = self._webpage_read_content(
1052 full_response, url, video_id, prefix=first_bytes)
1053
9b122384 1054 self.report_extraction(video_id)
887c6acd 1055
4fc946b5
PH
1056 # Is it an RSS feed?
1057 try:
bcf89ce6 1058 doc = parse_xml(webpage)
4fc946b5
PH
1059 if doc.tag == 'rss':
1060 return self._extract_rss(url, video_id, doc)
f7300c5c 1061 except compat_xml_parse_error:
4fc946b5
PH
1062 pass
1063
c8e9a235
PH
1064 # Is it a Camtasia project?
1065 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1066 if camtasia_res is not None:
1067 return camtasia_res
1068
14390730
S
1069 # Sometimes embedded video player is hidden behind percent encoding
1070 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1071 # Unescaping the whole page allows to handle those cases in a generic way
1f7659db
S
1072 webpage = compat_urllib_parse.unquote(webpage)
1073
887c6acd
PH
1074 # it's tempting to parse this further, but you would
1075 # have to take into account all the variations like
1076 # Video Title - Site Name
1077 # Site Name | Video Title
1078 # Video Title - Tagline | Site Name
1079 # and so on and so forth; it's just not practical
ef4fd848 1080 video_title = self._html_search_regex(
79649588
PH
1081 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1082 default='video')
ef4fd848 1083
4d805e06
PH
1084 # Try to detect age limit automatically
1085 age_limit = self._rta_search(webpage)
1086 # And then there are the jokers who advertise that they use RTA,
1087 # but actually don't.
1088 AGE_LIMIT_MARKERS = [
1089 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1090 ]
1091 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1092 age_limit = 18
1093
ef4fd848
PH
1094 # video uploader is domain name
1095 video_uploader = self._search_regex(
79649588 1096 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1097
ed2d6a19 1098 # Helper method
83992676 1099 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1100 urlrs = orderedSet(
83992676 1101 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1102 for m in matches)
ed2d6a19
PH
1103 return self.playlist_result(
1104 urlrs, playlist_id=video_id, playlist_title=video_title)
1105
627a91a9 1106 # Look for BrightCove:
99877772
PH
1107 bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1108 if bc_urls:
79649588 1109 self.to_screen('Brightcove video detected.')
99877772
PH
1110 entries = [{
1111 '_type': 'url',
1112 'url': smuggle_url(bc_url, {'Referer': url}),
1113 'ie_key': 'Brightcove'
1114 } for bc_url in bc_urls]
1115
1116 return {
1117 '_type': 'playlist',
1118 'title': video_title,
1119 'id': video_id,
1120 'entries': entries,
1121 }
cfe50f04 1122
59b8ab58
PH
1123 # Look for embedded rtl.nl player
1124 matches = re.findall(
97b570a9 1125 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1126 webpage)
1127 if matches:
1128 return _playlist_from_matches(matches, ie='RtlNl')
1129
b407e173
YCH
1130 vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1131 if vimeo_url is not None:
1132 return self.url_result(vimeo_url)
7115ca84 1133
53c1d3ef 1134 # Look for embedded YouTube player
1f9da904 1135 matches = re.findall(r'''(?x)
2b88feed
PH
1136 (?:
1137 <iframe[^>]+?src=|
c71dfccc 1138 data-video-url=|
2b88feed 1139 <embed[^>]+?src=|
a7e97f6d
PH
1140 embedSWF\(?:\s*|
1141 new\s+SWFObject\(
2b88feed
PH
1142 )
1143 (["\'])
1bf5423e 1144 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1145 (?:embed|v|p)/.+?)
1f9da904 1146 \1''', webpage)
887c6acd 1147 if matches:
ed2d6a19 1148 return _playlist_from_matches(
3b2f933b 1149 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1150
65f3a228
PH
1151 # Look for lazyYT YouTube embed
1152 matches = re.findall(
1153 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1154 if matches:
1155 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1156
355e4fd0
PH
1157 # Look for embedded Dailymotion player
1158 matches = re.findall(
ef4fd848 1159 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
355e4fd0 1160 if matches:
ed2d6a19
PH
1161 return _playlist_from_matches(
1162 matches, lambda m: unescapeHTML(m[1]))
355e4fd0 1163
8489578d
NJ
1164 # Look for embedded Dailymotion playlist player (#3822)
1165 m = re.search(
1166 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1167 if m:
1168 playlists = re.findall(
1169 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1170 if playlists:
1171 return _playlist_from_matches(
1172 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1173
ef4fd848
PH
1174 # Look for embedded Wistia player
1175 match = re.search(
281d3f1d 1176 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1177 if match:
9471c444
NJ
1178 embed_url = self._proto_relative_url(
1179 unescapeHTML(match.group('url')))
ef4fd848
PH
1180 return {
1181 '_type': 'url_transparent',
9471c444 1182 'url': embed_url,
ef4fd848
PH
1183 'ie_key': 'Wistia',
1184 'uploader': video_uploader,
1185 'title': video_title,
1186 'id': video_id,
1187 }
5f6a1245 1188
9471c444 1189 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1190 if match:
1191 return {
1192 '_type': 'url_transparent',
1193 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1194 'ie_key': 'Wistia',
1195 'uploader': video_uploader,
1196 'title': video_title,
1197 'id': match.group('id')
1198 }
ef4fd848 1199
ee3e63e4 1200 # Look for embedded blip.tv player
0954cd8a
YCH
1201 bliptv_url = BlipTVIE._extract_url(webpage)
1202 if bliptv_url:
1203 return self.url_result(bliptv_url, 'BlipTV')
ee3e63e4 1204
bab19a8e
S
1205 # Look for SVT player
1206 svt_url = SVTIE._extract_url(webpage)
1207 if svt_url:
1208 return self.url_result(svt_url, 'SVT')
1209
fa35cdad
PH
1210 # Look for embedded condenast player
1211 matches = re.findall(
1212 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1213 webpage)
1214 if matches:
1215 return {
1216 '_type': 'playlist',
1217 'entries': [{
1218 '_type': 'url',
1219 'ie_key': 'CondeNast',
1220 'url': ma,
1221 } for ma in matches],
1222 'title': video_title,
1223 'id': video_id,
1224 }
1225
c19f7764
JMF
1226 # Look for Bandcamp pages with custom domain
1227 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1228 if mobj is not None:
1229 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1230 # Don't set the extractor because it can be a track url or an album
1231 return self.url_result(burl)
c19f7764 1232
f25571ff
PH
1233 # Look for embedded Vevo player
1234 mobj = re.search(
1235 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1236 if mobj is not None:
1237 return self.url_result(mobj.group('url'))
796df3c6
S
1238
1239 # Look for embedded Viddler player
cb454b33
S
1240 mobj = re.search(
1241 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1242 webpage)
796df3c6
S
1243 if mobj is not None:
1244 return self.url_result(mobj.group('url'))
f25571ff 1245
3378d67a
S
1246 # Look for NYTimes player
1247 mobj = re.search(
1248 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1249 webpage)
1250 if mobj is not None:
1251 return self.url_result(mobj.group('url'))
1252
cefdf970
S
1253 # Look for Libsyn player
1254 mobj = re.search(
1255 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1256 if mobj is not None:
1257 return self.url_result(mobj.group('url'))
1258
c0d0b01f 1259 # Look for Ooyala videos
cb454b33 1260 mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1261 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1262 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1263 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1264 if mobj is not None:
750f9020 1265 return OoyalaIE._build_url_result(mobj.group('ec'))
c0d0b01f 1266
f076b638 1267 # Look for multiple Ooyala embeds on SBN network websites
1268 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1269 if mobj is not None:
1270 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1271 if embeds:
1272 return _playlist_from_matches(
1273 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1274
aa94a6d3 1275 # Look for Aparat videos
48099643 1276 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
1277 if mobj is not None:
1278 return self.url_result(mobj.group(1), 'Aparat')
1279
c93c2ab1 1280 # Look for MPORA videos
c3f51436 1281 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
1282 if mobj is not None:
1283 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 1284
15c0e8e7 1285 # Look for embedded NovaMov-based player
8f89e687 1286 mobj = re.search(
8dfa187b 1287 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
1288 (?P<url>http://(?:(?:embed|www)\.)?
1289 (?:novamov\.com|
1290 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1291 videoweed\.(?:es|com)|
1292 movshare\.(?:net|sx|ag)|
1293 divxstage\.(?:eu|net|ch|co|at|ag))
1294 /embed\.php.+?)\1''', webpage)
8f89e687 1295 if mobj is not None:
15c0e8e7 1296 return self.url_result(mobj.group('url'))
50f56607 1297
9834872b
PH
1298 # Look for embedded Facebook player
1299 mobj = re.search(
db1f3888 1300 r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
9834872b
PH
1301 if mobj is not None:
1302 return self.url_result(mobj.group('url'), 'Facebook')
1303
ca97a56e
S
1304 # Look for embedded VK player
1305 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1306 if mobj is not None:
1307 return self.url_result(mobj.group('url'), 'VK')
1308
0364fa8b
S
1309 # Look for embedded ivi player
1310 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1311 if mobj is not None:
1312 return self.url_result(mobj.group('url'), 'Ivi')
1313
db1f3888
PH
1314 # Look for embedded Huffington Post player
1315 mobj = re.search(
c3f51436 1316 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
1317 if mobj is not None:
1318 return self.url_result(mobj.group('url'), 'HuffPost')
1319
1b86cc41 1320 # Look for embed.ly
1321 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1322 if mobj is not None:
1323 return self.url_result(mobj.group('url'))
1324 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1325 if mobj is not None:
1326 return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1327
60cc4dc4
PH
1328 # Look for funnyordie embed
1329 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1330 if matches:
ed2d6a19
PH
1331 return _playlist_from_matches(
1332 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 1333
db546cf8
S
1334 # Look for BBC iPlayer embed
1335 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1336 if matches:
476eae0c 1337 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 1338
93d020dd
S
1339 # Look for embedded RUTV player
1340 rutv_url = RUTVIE._extract_url(webpage)
1341 if rutv_url:
1342 return self.url_result(rutv_url, 'RUTV')
1343
494f20cb 1344 # Look for embedded TVC player
b8599718
S
1345 tvc_url = TVCIE._extract_url(webpage)
1346 if tvc_url:
1347 return self.url_result(tvc_url, 'TVC')
494f20cb 1348
d40a3b5b
S
1349 # Look for embedded SportBox player
1350 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1351 if sportbox_urls:
1352 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1353
78e2b74b 1354 # Look for embedded PornHub player
65d161c4
S
1355 pornhub_url = PornHubIE._extract_url(webpage)
1356 if pornhub_url:
1357 return self.url_result(pornhub_url, 'PornHub')
1358
2bb5b6d0
S
1359 # Look for embedded XHamster player
1360 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1361 if xhamster_urls:
1362 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1363
9872d311
S
1364 # Look for embedded Tvigle player
1365 mobj = re.search(
1366 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1367 if mobj is not None:
1368 return self.url_result(mobj.group('url'), 'Tvigle')
1369
7e2ede98
JMF
1370 # Look for embedded TED player
1371 mobj = re.search(
d7cc31b6 1372 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
1373 if mobj is not None:
1374 return self.url_result(mobj.group('url'), 'TED')
1375
5c386252 1376 # Look for embedded Ustream videos
1377 mobj = re.search(
1378 r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1379 if mobj is not None:
1380 return self.url_result(mobj.group('url'), 'Ustream')
1381
893f8832
PH
1382 # Look for embedded arte.tv player
1383 mobj = re.search(
1384 r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1385 webpage)
1386 if mobj is not None:
1387 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1388
cb3ac1c6
S
1389 # Look for embedded smotri.com player
1390 smotri_url = SmotriIE._extract_url(webpage)
1391 if smotri_url:
1392 return self.url_result(smotri_url, 'Smotri')
1393
20991253
PH
1394 # Look for embeded soundcloud player
1395 mobj = re.search(
ac645ac7 1396 r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
20991253
PH
1397 webpage)
1398 if mobj is not None:
1399 url = unescapeHTML(mobj.group('url'))
1400 return self.url_result(url)
1401
826ec77f
PH
1402 # Look for embedded vulture.com player
1403 mobj = re.search(
1404 r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1405 webpage)
1406 if mobj is not None:
1407 url = unescapeHTML(mobj.group('url'))
1408 return self.url_result(url, ie='Vulture')
1409
c5cd249e
JMF
1410 # Look for embedded mtvservices player
1411 mobj = re.search(
1412 r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1413 webpage)
1414 if mobj is not None:
1415 url = unescapeHTML(mobj.group('url'))
1416 return self.url_result(url, ie='MTVServicesEmbedded')
1417
49807b4a
S
1418 # Look for embedded yahoo player
1419 mobj = re.search(
1420 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1421 webpage)
1422 if mobj is not None:
1423 return self.url_result(mobj.group('url'), 'Yahoo')
1424
2ef6fcb5
PH
1425 # Look for embedded sbs.com.au player
1426 mobj = re.search(
e98b8e79
PH
1427 r'''(?x)
1428 (?:
1429 <meta\s+property="og:video"\s+content=|
1430 <iframe[^>]+?src=
1431 )
1432 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
1433 webpage)
1434 if mobj is not None:
1435 return self.url_result(mobj.group('url'), 'SBS')
1436
42bdd9d0
PH
1437 # Look for embedded Cinchcast player
1438 mobj = re.search(
1439 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1440 webpage)
1441 if mobj is not None:
1442 return self.url_result(mobj.group('url'), 'Cinchcast')
1443
1a94ff68 1444 mobj = re.search(
5263cdfc 1445 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 1446 webpage)
8001607e
YCH
1447 if not mobj:
1448 mobj = re.search(
1449 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1450 webpage)
1a94ff68
S
1451 if mobj is not None:
1452 return self.url_result(mobj.group('url'), 'MLB')
1453
1419fafd
S
1454 mobj = re.search(
1455 r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1456 webpage)
1457 if mobj is not None:
1458 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1459
af63fed7
PH
1460 mobj = re.search(
1461 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1462 webpage)
1463 if mobj is not None:
1464 return self.url_result(mobj.group('url'), 'Livestream')
1465
255fca5e
S
1466 # Look for Zapiks embed
1467 mobj = re.search(
1468 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1469 if mobj is not None:
1470 return self.url_result(mobj.group('url'), 'Zapiks')
1471
e3216b82
NJ
1472 # Look for Kaltura embeds
1473 mobj = re.search(
1474 r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1475 if mobj is not None:
1476 return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1477
135c9c42
S
1478 # Look for Eagle.Platform embeds
1479 mobj = re.search(
1480 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1481 if mobj is not None:
1482 return self.url_result(mobj.group('url'), 'EaglePlatform')
1483
d47ae7f6
S
1484 # Look for ClipYou (uses Eagle.Platform) embeds
1485 mobj = re.search(
1486 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1487 if mobj is not None:
1488 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1489
f8388757
S
1490 # Look for Pladform embeds
1491 mobj = re.search(
1492 r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1493 if mobj is not None:
1494 return self.url_result(mobj.group('url'), 'Pladform')
1495
2dcc114f
S
1496 # Look for Playwire embeds
1497 mobj = re.search(
1498 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1499 if mobj is not None:
1500 return self.url_result(mobj.group('url'))
1501
ad320e9b
NJ
1502 # Look for 5min embeds
1503 mobj = re.search(
1504 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1505 if mobj is not None:
1506 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1507
18153f1b
S
1508 # Look for Crooks and Liars embeds
1509 mobj = re.search(
1510 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1511 if mobj is not None:
1512 return self.url_result(mobj.group('url'))
1513
a2edf2e7
YCH
1514 # Look for NBC Sports VPlayer embeds
1515 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1516 if nbc_sports_url:
1517 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1518
418c5cc3
YCH
1519 # Look for UDN embeds
1520 mobj = re.search(
1521 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1522 if mobj is not None:
1523 return self.url_result(
0a160363 1524 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 1525
2fe1b5bd
YCH
1526 # Look for Senate ISVP iframe
1527 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1528 if senate_isvp_url:
25c3a734 1529 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 1530
756f574e
YCH
1531 # Look for Dailymotion Cloud videos
1532 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1533 if dmcloud_url:
1534 return self.url_result(dmcloud_url, 'DailymotionCloud')
1535
1ac1c4c2
S
1536 # Look for OnionStudios embeds
1537 onionstudios_url = OnionStudiosIE._extract_url(webpage)
1538 if onionstudios_url:
1539 return self.url_result(onionstudios_url)
1540
a5158f38
YCH
1541 # Look for AdobeTVVideo embeds
1542 mobj = re.search(
1543 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1544 webpage)
1545 if mobj is not None:
1546 return self.url_result(
1547 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1548 'AdobeTVVideo')
1549
ced659bb 1550 def check_video(vurl):
a0f71985
PH
1551 if YoutubeIE.suitable(vurl):
1552 return True
ced659bb
S
1553 vpath = compat_urlparse.urlparse(vurl).path
1554 vext = determine_ext(vpath)
1555 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1556
1557 def filter_video(urls):
1558 return list(filter(check_video, urls))
1559
9b122384 1560 # Start with something easy: JW Player in SWFObject
ced659bb 1561 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 1562 if not found:
d981cef6 1563 # Look for gorilla-vid style embedding
ced659bb 1564 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
1565 (?:
1566 jw_plugins|
1567 JWPlayerOptions|
1568 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1569 )
a0f71985
PH
1570 .*?
1571 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 1572 if not found:
9b122384 1573 # Broaden the search a little bit
ced659bb 1574 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
1575 if not found:
1576 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb
S
1577 found = filter_video(re.findall(
1578 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
1579 if not found:
1580 # Flow player
ced659bb 1581 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
1582 flowplayer\("[^"]+",\s*
1583 \{[^}]+?\}\s*,
52585fd6 1584 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 1585 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 1586 ''', webpage))
501f13fb
PH
1587 if not found:
1588 # Cinerama player
1589 found = re.findall(
1590 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 1591 if not found:
9b122384 1592 # Try to find twitter cards info
ced659bb
S
1593 found = filter_video(re.findall(
1594 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 1595 if not found:
9b122384
PH
1596 # We look for Open Graph info:
1597 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 1598 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
1599 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1600 if m_video_type is not None:
ced659bb 1601 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 1602 if not found:
7fea7156 1603 # HTML5 video
9b32eca3 1604 found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
b30b8698 1605 if not found:
ed9a25dd 1606 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 1607 found = re.search(
89ef304b 1608 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 1609 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 1610 webpage)
84f81016
S
1611 if not found:
1612 # Look also in Refresh HTTP header
1613 refresh_header = head_response.headers.get('Refresh')
1614 if refresh_header:
ed9a25dd 1615 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 1616 if found:
406224be 1617 new_url = compat_urlparse.urljoin(url, found.group(1))
89ef304b
PH
1618 self.report_following_redirect(new_url)
1619 return {
1620 '_type': 'url',
1621 'url': new_url,
1622 }
b30b8698 1623 if not found:
416c7fcb 1624 raise UnsupportedError(url)
9b122384 1625
b30b8698
PH
1626 entries = []
1627 for video_url in found:
1628 video_url = compat_urlparse.urljoin(url, video_url)
1629 video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
9b122384 1630
b30b8698
PH
1631 # Sometimes, jwplayer extraction will result in a YouTube URL
1632 if YoutubeIE.suitable(video_url):
1633 entries.append(self.url_result(video_url, 'Youtube'))
1634 continue
9b122384 1635
b30b8698
PH
1636 # here's a fun little line of code for you:
1637 video_id = os.path.splitext(video_id)[0]
fc9713a1 1638
d6fd958c
YCH
1639 if determine_ext(video_url) == 'smil':
1640 entries.append({
1641 'id': video_id,
1642 'formats': self._extract_smil_formats(video_url, video_id),
1643 'uploader': video_uploader,
1644 'title': video_title,
1645 'age_limit': age_limit,
1646 })
1647 else:
1648 entries.append({
1649 'id': video_id,
1650 'url': video_url,
1651 'uploader': video_uploader,
1652 'title': video_title,
1653 'age_limit': age_limit,
1654 })
b30b8698
PH
1655
1656 if len(entries) == 1:
669f0e7c 1657 return entries[0]
b30b8698
PH
1658 else:
1659 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
1660 # 'url' results don't have a title
1661 if e.get('title') is not None:
1662 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
1663 return {
1664 '_type': 'playlist',
1665 'entries': entries,
1666 }