]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[generic] Add support for xhamster embeds
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
cfe50f04
JMF
1# encoding: utf-8
2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
7
8from .common import InfoExtractor
fc9713a1 9from .youtube import YoutubeIE
8c25f81b 10from ..compat import (
9b122384 11 compat_urllib_parse,
1ddb9456
S
12 compat_urllib_parse_unquote,
13 compat_urllib_request,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
bcf89ce6 24 parse_xml,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
42393ce2 30 url_basename,
76c73715 31 xpath_text,
9b122384 32)
cfe50f04 33from .brightcove import BrightcoveIE
a2edf2e7 34from .nbc import NBCSportsVPlayerIE
c0d0b01f 35from .ooyala import OoyalaIE
93d020dd 36from .rutv import RUTVIE
954c1d05 37from .tvc import TVCIE
d40a3b5b 38from .sportbox import SportBoxEmbedIE
cb3ac1c6 39from .smotri import SmotriIE
1419fafd 40from .condenast import CondeNastIE
418c5cc3 41from .udn import UDNEmbedIE
2fe1b5bd 42from .senateisvp import SenateISVPIE
0954cd8a 43from .bliptv import BlipTVIE
bab19a8e 44from .svt import SVTIE
65d161c4 45from .pornhub import PornHubIE
2bb5b6d0 46from .xhamster import XHamsterEmbedIE
b407e173 47from .vimeo import VimeoIE
756f574e 48from .dailymotion import DailymotionCloudIE
9b122384 49
0838239e 50
9b122384 51class GenericIE(InfoExtractor):
79649588 52 IE_DESC = 'Generic downloader that works on some sites'
9b122384 53 _VALID_URL = r'.*'
79649588 54 IE_NAME = 'generic'
cfe50f04 55 _TESTS = [
c5fa81fe
S
56 # Direct link to a video
57 {
58 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
59 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
60 'info_dict': {
61 'id': 'trailer',
62 'ext': 'mp4',
63 'title': 'trailer',
64 'upload_date': '20100513',
65 }
66 },
c5138a7c 67 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
68 {
69 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
70 'md5': '128c42e68b13950268b648275386fc74',
71 'info_dict': {
72 'id': 'FictionJunction-Parallel_Hearts',
73 'ext': 'flac',
74 'title': 'FictionJunction-Parallel_Hearts',
75 'upload_date': '20140522',
76 },
77 'expected_warnings': [
78 'URL could be a direct video link, returning it as such.'
79 ]
80 },
81 # Direct download with broken HEAD
82 {
83 'url': 'http://ai-radio.org:8000/radio.opus',
84 'info_dict': {
85 'id': 'radio',
86 'ext': 'opus',
87 'title': 'radio',
88 },
89 'params': {
90 'skip_download': True, # infinite live stream
91 },
92 'expected_warnings': [
93 r'501.*Not Implemented'
94 ],
95 },
96 # Direct link with incorrect MIME type
97 {
98 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
99 'md5': '4ccbebe5f36706d85221f204d7eb5913',
100 'info_dict': {
101 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
102 'id': '5_Lennart_Poettering_-_Systemd',
103 'ext': 'webm',
104 'title': '5_Lennart_Poettering_-_Systemd',
105 'upload_date': '20141120',
106 },
107 'expected_warnings': [
108 'URL could be a direct video link, returning it as such.'
109 ]
110 },
111 # RSS feed
112 {
113 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
114 'info_dict': {
115 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
116 'title': 'Zero Punctuation',
117 'description': 're:.*groundbreaking video review series.*'
118 },
119 'playlist_mincount': 11,
120 },
121 # RSS feed with enclosure
122 {
123 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
124 'info_dict': {
125 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
126 'ext': 'm4v',
127 'upload_date': '20150228',
128 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
129 }
130 },
131 # google redirect
132 {
133 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
134 'info_dict': {
135 'id': 'cmQHVoWB5FY',
136 'ext': 'mp4',
137 'upload_date': '20130224',
138 'uploader_id': 'TheVerge',
139 'description': 're:^Chris Ziegler takes a look at the\.*',
140 'uploader': 'The Verge',
141 'title': 'First Firefox OS phones side-by-side',
142 },
143 'params': {
144 'skip_download': False,
145 }
146 },
cfe50f04 147 {
79649588 148 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 149 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 150 'info_dict': {
d360a146
S
151 'id': '13601338388002',
152 'ext': 'mp4',
79649588
PH
153 'uploader': 'www.hodiho.fr',
154 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
155 }
156 },
c19f7764
JMF
157 # bandcamp page with custom domain
158 {
79649588
PH
159 'add_ie': ['Bandcamp'],
160 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 161 'info_dict': {
fd50bf62
S
162 'id': '3235767654',
163 'ext': 'mp3',
79649588
PH
164 'title': 'The Pony Mash',
165 'uploader': 'M_Pallante',
c19f7764 166 },
79649588 167 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 168 },
eeb165e6 169 # embedded brightcove video
dd5bcdc4
JMF
170 # it also tests brightcove videos that need to set the 'Referer' in the
171 # http requests
eeb165e6 172 {
79649588
PH
173 'add_ie': ['Brightcove'],
174 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
175 'info_dict': {
176 'id': '2765128793001',
177 'ext': 'mp4',
178 'title': 'Le cours de bourse : l’analyse technique',
179 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
180 'uploader': 'BFM BUSINESS',
eeb165e6 181 },
79649588
PH
182 'params': {
183 'skip_download': True,
eeb165e6
JMF
184 },
185 },
17ab4d3b
PH
186 {
187 # https://github.com/rg3/youtube-dl/issues/2253
188 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
189 'md5': '0ba9446db037002366bab3b3eb30c88c',
190 'info_dict': {
fd50bf62
S
191 'id': '3101154703001',
192 'ext': 'mp4',
17ab4d3b
PH
193 'title': 'Still no power',
194 'uploader': 'thestar.com',
195 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
196 },
197 'add_ie': ['Brightcove'],
198 },
0479c625
S
199 {
200 'url': 'http://www.championat.com/video/football/v/87/87499.html',
201 'md5': 'fb973ecf6e4a78a67453647444222983',
202 'info_dict': {
203 'id': '3414141473001',
204 'ext': 'mp4',
205 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
206 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
207 'uploader': 'Championat',
208 },
209 },
bdf97017 210 {
37aab278 211 # https://github.com/rg3/youtube-dl/issues/3541
bdf97017
NJ
212 'add_ie': ['Brightcove'],
213 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
214 'info_dict': {
215 'id': '3866516442001',
37aab278 216 'ext': 'mp4',
bdf97017
NJ
217 'title': 'Leer mij vrouwen kennen: Aflevering 1',
218 'description': 'Leer mij vrouwen kennen: Aflevering 1',
219 'uploader': 'SBS Broadcasting',
220 },
37aab278 221 'skip': 'Restricted to Netherlands',
bdf97017 222 'params': {
37aab278 223 'skip_download': True, # m3u8 download
bdf97017
NJ
224 },
225 },
c0d0b01f
JMF
226 # ooyala video
227 {
79649588 228 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 229 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
230 'info_dict': {
231 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
232 'ext': 'mp4',
3486df38 233 'title': '2cc213299525360.mov', # that's what we get
c0d0b01f 234 },
87830900 235 'add_ie': ['Ooyala'],
c0d0b01f 236 },
f076b638 237 # multiple ooyala embeds on SBN network websites
238 {
239 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
240 'info_dict': {
241 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
242 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
243 },
244 'playlist_mincount': 3,
245 'params': {
246 'skip_download': True,
247 },
248 'add_ie': ['Ooyala'],
249 },
1b86cc41 250 # embed.ly video
251 {
252 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
253 'info_dict': {
254 'id': '9ODmcdjQcHQ',
255 'ext': 'mp4',
0a5bce56
PH
256 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
257 'upload_date': '20140225',
258 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
259 'uploader': 'Tested',
260 'uploader_id': 'testedcom',
1b86cc41 261 },
262 # No need to test YoutubeIE here
263 'params': {
264 'skip_download': True,
265 },
266 },
60cc4dc4
PH
267 # funnyordie embed
268 {
269 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
270 'info_dict': {
271 'id': '18e820ec3f',
272 'ext': 'mp4',
273 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
274 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 275 },
60cc4dc4 276 },
faa4ea68
S
277 # BBC iPlayer embeds
278 {
279 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
280 'info_dict': {
281 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
282 },
283 'playlist_mincount': 18,
284 },
93d020dd
S
285 # RUTV embed
286 {
287 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
288 'info_dict': {
289 'id': '776940',
290 'ext': 'mp4',
291 'title': 'Охотское море стало целиком российским',
292 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
293 },
294 'params': {
295 # m3u8 download
296 'skip_download': True,
297 },
aab74fa1 298 },
f37bdbe5
S
299 # TVC embed
300 {
301 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
302 'info_dict': {
303 'id': '55304',
304 'ext': 'mp4',
305 'title': 'Дошкольное воспитание',
306 },
307 },
b827a601
S
308 # SportBox embed
309 {
310 'url': 'http://www.vestifinance.ru/articles/25753',
311 'info_dict': {
312 'id': '25753',
313 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
314 },
315 'playlist': [{
316 'info_dict': {
317 'id': '370908',
318 'title': 'Госзаказ. День 3',
319 'ext': 'mp4',
320 }
321 }, {
322 'info_dict': {
323 'id': '370905',
324 'title': 'Госзаказ. День 2',
325 'ext': 'mp4',
326 }
327 }, {
328 'info_dict': {
329 'id': '370902',
330 'title': 'Госзаказ. День 1',
331 'ext': 'mp4',
332 }
333 }],
334 'params': {
335 # m3u8 download
336 'skip_download': True,
337 },
338 },
aab74fa1
PH
339 # Embedded TED video
340 {
341 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 342 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 343 'info_dict': {
a8eb5a8e 344 'id': '1969',
aab74fa1 345 'ext': 'mp4',
a8eb5a8e
PH
346 'title': 'Hidden miracles of the natural world',
347 'uploader': 'Louie Schwartzberg',
348 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 349 }
60cc4dc4 350 },
5c386252 351 # Embeded Ustream video
352 {
353 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
354 'md5': '27b99cdb639c9b12a79bca876a073417',
355 'info_dict': {
ca6aada4 356 'id': '45734260',
357 'ext': 'flv',
358 'uploader': 'AU SPA: The NSA and Privacy',
5c386252 359 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
360 }
361 },
d95e35d6
S
362 # nowvideo embed hidden behind percent encoding
363 {
364 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
365 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
366 'info_dict': {
367 'id': '06e53103ca9aa',
368 'ext': 'flv',
369 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
370 'description': 'No description',
371 },
0f2a2ba1 372 },
893f8832
PH
373 # arte embed
374 {
375 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
376 'md5': '7653032cbb25bf6c80d80f217055fa43',
377 'info_dict': {
378 'id': '048195-004_PLUS7-F',
379 'ext': 'flv',
380 'title': 'X:enius',
381 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
382 'upload_date': '20140320',
383 },
384 'params': {
385 'skip_download': 'Requires rtmpdump'
386 }
387 },
fa35cdad
PH
388 # Condé Nast embed
389 {
390 'url': 'http://www.wired.com/2014/04/honda-asimo/',
391 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
392 'info_dict': {
393 'id': '53501be369702d3275860000',
394 'ext': 'mp4',
395 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
396 }
ebd3c7b3
PH
397 },
398 # Dailymotion embed
399 {
400 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
401 'md5': '441aeeb82eb72c422c7f14ec533999cd',
402 'info_dict': {
403 'id': 'k2mm4bCdJ6CQ2i7c8o2',
404 'ext': 'mp4',
405 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
406 'uploader': 'Spi0n',
407 },
408 'add_ie': ['Dailymotion'],
2b88feed
PH
409 },
410 # YouTube embed
411 {
412 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
413 'info_dict': {
414 'id': 'FXRb4ykk4S0',
415 'ext': 'mp4',
416 'title': 'The NBL Auction 2014',
417 'uploader': 'BADMINTON England',
418 'uploader_id': 'BADMINTONEvents',
419 'upload_date': '20140603',
420 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
421 },
422 'add_ie': ['Youtube'],
423 'params': {
424 'skip_download': True,
425 }
426 },
c5cd249e
JMF
427 # MTVSercices embed
428 {
429 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
430 'md5': '35727f82f58c76d996fc188f9755b0d5',
431 'info_dict': {
432 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
433 'ext': 'mp4',
434 'title': 'Review',
435 'description': 'Mario\'s life in the fast lane has never looked so good.',
436 },
437 },
61013473 438 # YouTube embed via <data-embed-url="">
439 {
440 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 441 'info_dict': {
a8eb5a8e 442 'id': '4vAffPZIT44',
61013473 443 'ext': 'mp4',
a8eb5a8e 444 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
445 'uploader': 'Gameloft',
446 'uploader_id': 'gameloft',
a8eb5a8e
PH
447 'upload_date': '20140828',
448 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
449 },
450 'params': {
451 'skip_download': True,
61013473 452 }
c8e9a235
PH
453 },
454 # Camtasia studio
455 {
456 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
457 'playlist': [{
458 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
459 'info_dict': {
460 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
461 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
462 'ext': 'flv',
463 'duration': 2235.90,
464 }
465 }, {
466 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
467 'info_dict': {
468 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
469 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
470 'ext': 'flv',
471 'duration': 2235.93,
472 }
473 }],
474 'info_dict': {
475 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
476 }
4d805e06
PH
477 },
478 # Flowplayer
479 {
480 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
481 'md5': '9d65602bf31c6e20014319c7d07fba27',
482 'info_dict': {
483 'id': '5123ea6d5e5a7',
484 'ext': 'mp4',
485 'age_limit': 18,
486 'uploader': 'www.handjobhub.com',
d6d9186f 487 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 488 }
0990305d 489 },
22a6f150
PH
490 # Multiple brightcove videos
491 # https://github.com/rg3/youtube-dl/issues/2283
492 {
493 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
494 'info_dict': {
495 'id': 'always-never',
496 'title': 'Always / Never - The New Yorker',
497 },
498 'playlist_count': 3,
499 'params': {
500 'extract_flat': False,
501 'skip_download': True,
502 }
1a94ff68
S
503 },
504 # MLB embed
505 {
506 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
507 'md5': '96f09a37e44da40dd083e12d9a683327',
508 'info_dict': {
509 'id': '33322633',
510 'ext': 'mp4',
511 'title': 'Ump changes call to ball',
512 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
513 'duration': 48,
514 'timestamp': 1401537900,
515 'upload_date': '20140531',
516 'thumbnail': 're:^https?://.*\.jpg$',
517 },
518 },
746c67d7
NJ
519 # Wistia embed
520 {
521 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
522 'md5': '8788b683c777a5cf25621eaf286d0c23',
523 'info_dict': {
524 'id': '1cfaf6b7ea',
525 'ext': 'mov',
526 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
527 'duration': 643.0,
528 'filesize': 182808282,
529 'uploader': 'education-portal.com',
530 },
531 },
52cffcb1 532 {
533 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
534 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
535 'info_dict': {
536 'id': 'uxjb0lwrcz',
537 'ext': 'mp4',
85d7b765 538 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
52cffcb1 539 'duration': 1715.0,
85d7b765 540 'uploader': 'thoughtworks.wistia.com',
70b7e3fb 541 },
52cffcb1 542 },
ac645ac7
PH
543 # Soundcloud embed
544 {
545 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
546 'info_dict': {
547 'id': '174391317',
548 'ext': 'mp3',
549 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
550 'uploader': 'Sophos Security',
551 'title': 'Chet Chat 171 - Oct 29, 2014',
552 'upload_date': '20141029',
553 }
af63fed7
PH
554 },
555 # Livestream embed
556 {
557 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
558 'info_dict': {
559 'id': '67864563',
560 'ext': 'flv',
561 'upload_date': '20141112',
562 'title': 'Rosetta #CometLanding webcast HL 10',
563 }
564 },
65f3a228
PH
565 # LazyYT
566 {
567 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
568 'info_dict': {
11e611a7 569 'id': '1986',
65f3a228
PH
570 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
571 },
572 'playlist_mincount': 2,
4e262a88 573 },
42bdd9d0
PH
574 # Cinchcast embed
575 {
576 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
577 'info_dict': {
578 'id': '7141703',
579 'ext': 'mp3',
580 'upload_date': '20141126',
581 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
582 }
583 },
501f13fb
PH
584 # Cinerama player
585 {
586 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
587 'info_dict': {
588 'id': '730m_DandD_1901_512k',
589 'ext': 'mp4',
590 'uploader': 'www.abc.net.au',
591 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
592 }
796df3c6
S
593 },
594 # embedded viddler video
595 {
596 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
597 'info_dict': {
598 'id': '4d03aad9',
599 'ext': 'mp4',
600 'uploader': 'deadspin',
601 'title': 'WALL-TO-GORTAT',
602 'timestamp': 1422285291,
603 'upload_date': '20150126',
604 },
605 'add_ie': ['Viddler'],
a0f71985 606 },
2051acde
S
607 # Libsyn embed
608 {
609 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
610 'info_dict': {
611 'id': '3377616',
612 'ext': 'mp3',
613 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
614 'description': 'md5:601cb790edd05908957dae8aaa866465',
615 'upload_date': '20150220',
616 },
617 },
a0f71985
PH
618 # jwplayer YouTube
619 {
620 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
621 'info_dict': {
622 'id': 'Mrj4DVp2zeA',
623 'ext': 'mp4',
f37e3f99 624 'upload_date': '20150212',
a0f71985
PH
625 'uploader': 'The National Archives UK',
626 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
627 'uploader_id': 'NationalArchives08',
628 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
629 },
59b8ab58
PH
630 },
631 # rtl.nl embed
632 {
633 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
634 'playlist_mincount': 5,
635 'info_dict': {
636 'id': 'aanslagen-kopenhagen',
637 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
638 }
255fca5e
S
639 },
640 # Zapiks embed
641 {
642 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
643 'info_dict': {
644 'id': '118046',
645 'ext': 'mp4',
646 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
647 }
648 },
e3216b82
NJ
649 # Kaltura embed
650 {
651 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
652 'info_dict': {
653 'id': '1_eergr3h1',
654 'ext': 'mp4',
655 'upload_date': '20150226',
656 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
657 'timestamp': int,
658 'title': 'John Carlson Postgame 2/25/15',
659 },
660 },
135c9c42
S
661 # Eagle.Platform embed (generic URL)
662 {
663 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
664 'info_dict': {
665 'id': '227304',
666 'ext': 'mp4',
667 'title': 'Навальный вышел на свободу',
668 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
669 'thumbnail': 're:^https?://.*\.jpg$',
670 'duration': 87,
671 'view_count': int,
672 'age_limit': 0,
673 },
674 },
d47ae7f6
S
675 # ClipYou (Eagle.Platform) embed (custom URL)
676 {
677 'url': 'http://muz-tv.ru/play/7129/',
678 'info_dict': {
679 'id': '12820',
680 'ext': 'mp4',
681 'title': "'O Sole Mio",
682 'thumbnail': 're:^https?://.*\.jpg$',
683 'duration': 216,
684 'view_count': int,
685 },
686 },
f8388757
S
687 # Pladform embed
688 {
689 'url': 'http://muz-tv.ru/kinozal/view/7400/',
690 'info_dict': {
691 'id': '100183293',
692 'ext': 'mp4',
62259846 693 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757
S
694 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
695 'thumbnail': 're:^https?://.*\.jpg$',
696 'duration': 694,
697 'age_limit': 0,
698 },
699 },
c798f15b
S
700 # Playwire embed
701 {
702 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
703 'info_dict': {
704 'id': '3519514',
705 'ext': 'mp4',
706 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
707 'thumbnail': 're:^https?://.*\.png$',
708 'duration': 45.115,
709 },
710 },
ad320e9b
NJ
711 # 5min embed
712 {
713 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
714 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
715 'info_dict': {
716 'id': '518726732',
717 'ext': 'mp4',
718 'title': 'Facebook Creates "On This Day" | Crunch Report',
719 },
720 },
dc455a5f
S
721 # SVT embed
722 {
723 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
724 'info_dict': {
725 'id': '2900353',
726 'ext': 'flv',
727 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
728 'duration': 27,
729 'age_limit': 0,
730 },
731 },
a4257017
S
732 # Crooks and Liars embed
733 {
734 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
735 'info_dict': {
736 'id': '8RUoRhRi',
737 'ext': 'mp4',
738 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
739 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
740 'timestamp': 1428207000,
741 'upload_date': '20150405',
742 'uploader': 'Heather',
743 },
744 },
745 # Crooks and Liars external embed
746 {
747 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
748 'info_dict': {
749 'id': 'MTE3MjUtMzQ2MzA',
750 'ext': 'mp4',
751 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
752 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
753 'timestamp': 1265032391,
754 'upload_date': '20100201',
755 'uploader': 'Heather',
756 },
757 },
facecb84 758 # NBC Sports vplayer embed
a2edf2e7 759 {
facecb84 760 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 761 'info_dict': {
facecb84
S
762 'id': 'ln7x1qSThw4k',
763 'ext': 'flv',
764 'title': "PFT Live: New leader in the 'new-look' defense",
765 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
a2edf2e7 766 },
418c5cc3
YCH
767 },
768 # UDN embed
769 {
770 'url': 'http://www.udn.com/news/story/7314/822787',
01c58f84 771 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 772 'info_dict': {
01c58f84 773 'id': '300346',
418c5cc3 774 'ext': 'mp4',
01c58f84 775 'title': '中一中男師變性 全校師生力挺',
418c5cc3
YCH
776 'thumbnail': 're:^https?://.*\.jpg$',
777 }
edfcf7ab
YCH
778 },
779 # Ooyala embed
780 {
781 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
782 'info_dict': {
783 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
784 'ext': 'mp4',
785 'description': 'VIDEO: Index/Match versus VLOOKUP.',
786 'title': 'This is what separates the Excel masters from the wannabes',
787 },
788 'params': {
789 # m3u8 downloads
790 'skip_download': True,
791 }
d6fd958c
YCH
792 },
793 # Contains a SMIL manifest
794 {
795 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
796 'info_dict': {
797 'id': 'file',
798 'ext': 'flv',
799 'title': '+ Football: Lottery Champions League Europe',
800 'uploader': 'www.telewebion.com',
801 },
802 'params': {
803 # rtmpe downloads
804 'skip_download': True,
805 }
b26733ba
YCH
806 },
807 # Brightcove URL in single quotes
808 {
809 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
810 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
811 'info_dict': {
812 'id': '4255764656001',
813 'ext': 'mp4',
814 'title': 'SN Presents: Russell Martin, World Citizen',
815 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
816 'uploader': 'Rogers Sportsnet',
817 },
756f574e
YCH
818 },
819 # Dailymotion Cloud video
820 {
821 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
822 'md5': '49444254273501a64675a7e68c502681',
823 'info_dict': {
824 'id': '5585de919473990de4bee11b',
825 'ext': 'mp4',
826 'title': 'Le débat',
827 'thumbnail': 're:^https?://.*\.jpe?g$',
828 }
76c73715 829 }
cfe50f04 830 ]
9b122384 831
9b122384
PH
832 def report_following_redirect(self, new_url):
833 """Report information extraction."""
79649588 834 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 835
4fc946b5
PH
836 def _extract_rss(self, url, video_id, doc):
837 playlist_title = doc.find('./channel/title').text
838 playlist_desc_el = doc.find('./channel/description')
839 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
840
76c73715
PH
841 entries = []
842 for it in doc.findall('./channel/item'):
843 next_url = xpath_text(it, 'link', fatal=False)
844 if not next_url:
845 enclosure_nodes = it.findall('./enclosure')
846 for e in enclosure_nodes:
847 next_url = e.attrib.get('url')
848 if next_url:
849 break
850
851 if not next_url:
852 continue
853
854 entries.append({
855 '_type': 'url',
856 'url': next_url,
857 'title': it.find('title').text,
858 })
4fc946b5
PH
859
860 return {
861 '_type': 'playlist',
862 'id': url,
863 'title': playlist_title,
864 'description': playlist_desc,
865 'entries': entries,
866 }
867
c8e9a235
PH
868 def _extract_camtasia(self, url, video_id, webpage):
869 """ Returns None if no camtasia video can be found. """
870
871 camtasia_cfg = self._search_regex(
872 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
873 webpage, 'camtasia configuration file', default=None)
874 if camtasia_cfg is None:
875 return None
876
877 title = self._html_search_meta('DC.title', webpage, fatal=True)
878
879 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
880 camtasia_cfg = self._download_xml(
881 camtasia_url, video_id,
882 note='Downloading camtasia configuration',
883 errnote='Failed to download camtasia configuration')
884 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
885
886 entries = []
887 for n in fileset_node.getchildren():
888 url_n = n.find('./uri')
889 if url_n is None:
890 continue
891
892 entries.append({
893 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
894 'title': '%s - %s' % (title, n.tag),
895 'url': compat_urlparse.urljoin(url, url_n.text),
896 'duration': float_or_none(n.find('./duration').text),
897 })
898
899 return {
900 '_type': 'playlist',
901 'entries': entries,
902 'title': title,
903 }
904
9b122384 905 def _real_extract(self, url):
ebd3c7b3
PH
906 if url.startswith('//'):
907 return {
908 '_type': 'url',
20991253 909 'url': self.http_scheme() + url,
ebd3c7b3
PH
910 }
911
a7130543
JMF
912 parsed_url = compat_urlparse.urlparse(url)
913 if not parsed_url.scheme:
04b4d394
PH
914 default_search = self._downloader.params.get('default_search')
915 if default_search is None:
1f7ccb90 916 default_search = 'fixup_error'
04b4d394 917
1f7ccb90 918 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
919 if '/' in url:
920 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
921 return self.url_result('http://' + url)
1f7ccb90 922 elif default_search != 'fixup_error':
9c1fc022 923 if default_search == 'auto_warning':
0e67ab0d
PH
924 if re.match(r'^(?:url|URL)$', url):
925 raise ExtractorError(
926 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
927 expected=True)
928 else:
929 self._downloader.report_warning(
7571c02c 930 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 931 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
932
933 if default_search in ('error', 'fixup_error'):
7571c02c 934 raise ExtractorError(
b74e86f4
PH
935 '%r is not a valid URL. '
936 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
937 % (url, url), expected=True)
04b4d394 938 else:
f2f2c0c2
PH
939 if ':' not in default_search:
940 default_search += ':'
04b4d394 941 return self.url_result(default_search + url)
4d54ef20
PH
942
943 url, smuggled_data = unsmuggle_url(url)
944 force_videoid = None
d6e6a422 945 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
946 if smuggled_data and 'force_videoid' in smuggled_data:
947 force_videoid = smuggled_data['force_videoid']
948 video_id = force_videoid
949 else:
1ddb9456 950 video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
a7130543 951
79649588 952 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 953
ebab4520 954 head_req = HEADRequest(url)
23be51d8 955 head_response = self._request_webpage(
ebab4520
PH
956 head_req, video_id,
957 note=False, errnote='Could not send HEAD request to %s' % url,
958 fatal=False)
42393ce2 959
23be51d8 960 if head_response is not False:
42393ce2 961 # Check for redirect
23be51d8 962 new_url = head_response.geturl()
42393ce2
PH
963 if url != new_url:
964 self.report_following_redirect(new_url)
4d54ef20
PH
965 if force_videoid:
966 new_url = smuggle_url(
967 new_url, {'force_videoid': force_videoid})
cecaaf3f 968 return self.url_result(new_url)
42393ce2 969
23be51d8
PH
970 full_response = None
971 if head_response is False:
58bde34a
S
972 request = compat_urllib_request.Request(url)
973 request.add_header('Accept-Encoding', '*')
974 full_response = self._request_webpage(request, video_id)
23be51d8
PH
975 head_response = full_response
976
977 # Check for direct link to a video
978 content_type = head_response.headers.get('Content-Type', '')
979 m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
980 if m:
981 upload_date = unified_strdate(
982 head_response.headers.get('Last-Modified'))
983 return {
984 'id': video_id,
1ddb9456 985 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
ccdd0ffb 986 'direct': True,
23be51d8
PH
987 'formats': [{
988 'format_id': m.group('format_id'),
989 'url': url,
990 'vcodec': 'none' if m.group('type') == 'audio' else None
991 }],
992 'upload_date': upload_date,
993 }
42393ce2 994
d6e6a422
PH
995 if not self._downloader.params.get('test', False) and not is_intentional:
996 self._downloader.report_warning('Falling back on generic information extractor.')
997
4e262a88 998 if not full_response:
58bde34a
S
999 request = compat_urllib_request.Request(url)
1000 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1001 # making it impossible to download only chunk of the file (yet we need only 512kB to
1002 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1003 # that will always result in downloading the whole file that is not desirable.
1004 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1005 # to accept raw bytes and being able to download only a chunk.
1006 # It may probably better to solve this by checking Content-Type for application/octet-stream
1007 # after HEAD request finishes, but not sure if we can rely on this.
1008 request.add_header('Accept-Encoding', '*')
1009 full_response = self._request_webpage(request, video_id)
4e262a88
PH
1010
1011 # Maybe it's a direct link to a video?
1012 # Be careful not to download the whole thing!
1013 first_bytes = full_response.read(512)
61ca9a80 1014 if not is_html(first_bytes):
4e262a88
PH
1015 self._downloader.report_warning(
1016 'URL could be a direct video link, returning it as such.')
1017 upload_date = unified_strdate(
1018 head_response.headers.get('Last-Modified'))
1019 return {
1020 'id': video_id,
1ddb9456 1021 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
4e262a88
PH
1022 'direct': True,
1023 'url': url,
1024 'upload_date': upload_date,
1025 }
1026
1027 webpage = self._webpage_read_content(
1028 full_response, url, video_id, prefix=first_bytes)
1029
9b122384 1030 self.report_extraction(video_id)
887c6acd 1031
4fc946b5
PH
1032 # Is it an RSS feed?
1033 try:
bcf89ce6 1034 doc = parse_xml(webpage)
4fc946b5
PH
1035 if doc.tag == 'rss':
1036 return self._extract_rss(url, video_id, doc)
f7300c5c 1037 except compat_xml_parse_error:
4fc946b5
PH
1038 pass
1039
c8e9a235
PH
1040 # Is it a Camtasia project?
1041 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1042 if camtasia_res is not None:
1043 return camtasia_res
1044
14390730
S
1045 # Sometimes embedded video player is hidden behind percent encoding
1046 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1047 # Unescaping the whole page allows to handle those cases in a generic way
1f7659db
S
1048 webpage = compat_urllib_parse.unquote(webpage)
1049
887c6acd
PH
1050 # it's tempting to parse this further, but you would
1051 # have to take into account all the variations like
1052 # Video Title - Site Name
1053 # Site Name | Video Title
1054 # Video Title - Tagline | Site Name
1055 # and so on and so forth; it's just not practical
ef4fd848 1056 video_title = self._html_search_regex(
79649588
PH
1057 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1058 default='video')
ef4fd848 1059
4d805e06
PH
1060 # Try to detect age limit automatically
1061 age_limit = self._rta_search(webpage)
1062 # And then there are the jokers who advertise that they use RTA,
1063 # but actually don't.
1064 AGE_LIMIT_MARKERS = [
1065 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1066 ]
1067 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1068 age_limit = 18
1069
ef4fd848
PH
1070 # video uploader is domain name
1071 video_uploader = self._search_regex(
79649588 1072 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1073
ed2d6a19 1074 # Helper method
83992676 1075 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1076 urlrs = orderedSet(
83992676 1077 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1078 for m in matches)
ed2d6a19
PH
1079 return self.playlist_result(
1080 urlrs, playlist_id=video_id, playlist_title=video_title)
1081
627a91a9 1082 # Look for BrightCove:
99877772
PH
1083 bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1084 if bc_urls:
79649588 1085 self.to_screen('Brightcove video detected.')
99877772
PH
1086 entries = [{
1087 '_type': 'url',
1088 'url': smuggle_url(bc_url, {'Referer': url}),
1089 'ie_key': 'Brightcove'
1090 } for bc_url in bc_urls]
1091
1092 return {
1093 '_type': 'playlist',
1094 'title': video_title,
1095 'id': video_id,
1096 'entries': entries,
1097 }
cfe50f04 1098
59b8ab58
PH
1099 # Look for embedded rtl.nl player
1100 matches = re.findall(
97b570a9 1101 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1102 webpage)
1103 if matches:
1104 return _playlist_from_matches(matches, ie='RtlNl')
1105
b407e173
YCH
1106 vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1107 if vimeo_url is not None:
1108 return self.url_result(vimeo_url)
7115ca84 1109
53c1d3ef 1110 # Look for embedded YouTube player
1f9da904 1111 matches = re.findall(r'''(?x)
2b88feed
PH
1112 (?:
1113 <iframe[^>]+?src=|
c71dfccc 1114 data-video-url=|
2b88feed 1115 <embed[^>]+?src=|
a7e97f6d
PH
1116 embedSWF\(?:\s*|
1117 new\s+SWFObject\(
2b88feed
PH
1118 )
1119 (["\'])
1bf5423e 1120 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1121 (?:embed|v|p)/.+?)
1f9da904 1122 \1''', webpage)
887c6acd 1123 if matches:
ed2d6a19 1124 return _playlist_from_matches(
3b2f933b 1125 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1126
65f3a228
PH
1127 # Look for lazyYT YouTube embed
1128 matches = re.findall(
1129 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1130 if matches:
1131 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1132
355e4fd0
PH
1133 # Look for embedded Dailymotion player
1134 matches = re.findall(
ef4fd848 1135 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
355e4fd0 1136 if matches:
ed2d6a19
PH
1137 return _playlist_from_matches(
1138 matches, lambda m: unescapeHTML(m[1]))
355e4fd0 1139
8489578d
NJ
1140 # Look for embedded Dailymotion playlist player (#3822)
1141 m = re.search(
1142 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1143 if m:
1144 playlists = re.findall(
1145 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1146 if playlists:
1147 return _playlist_from_matches(
1148 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1149
ef4fd848
PH
1150 # Look for embedded Wistia player
1151 match = re.search(
281d3f1d 1152 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1153 if match:
9471c444
NJ
1154 embed_url = self._proto_relative_url(
1155 unescapeHTML(match.group('url')))
ef4fd848
PH
1156 return {
1157 '_type': 'url_transparent',
9471c444 1158 'url': embed_url,
ef4fd848
PH
1159 'ie_key': 'Wistia',
1160 'uploader': video_uploader,
1161 'title': video_title,
1162 'id': video_id,
1163 }
5f6a1245 1164
9471c444 1165 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1166 if match:
1167 return {
1168 '_type': 'url_transparent',
1169 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1170 'ie_key': 'Wistia',
1171 'uploader': video_uploader,
1172 'title': video_title,
1173 'id': match.group('id')
1174 }
ef4fd848 1175
ee3e63e4 1176 # Look for embedded blip.tv player
0954cd8a
YCH
1177 bliptv_url = BlipTVIE._extract_url(webpage)
1178 if bliptv_url:
1179 return self.url_result(bliptv_url, 'BlipTV')
ee3e63e4 1180
bab19a8e
S
1181 # Look for SVT player
1182 svt_url = SVTIE._extract_url(webpage)
1183 if svt_url:
1184 return self.url_result(svt_url, 'SVT')
1185
fa35cdad
PH
1186 # Look for embedded condenast player
1187 matches = re.findall(
1188 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1189 webpage)
1190 if matches:
1191 return {
1192 '_type': 'playlist',
1193 'entries': [{
1194 '_type': 'url',
1195 'ie_key': 'CondeNast',
1196 'url': ma,
1197 } for ma in matches],
1198 'title': video_title,
1199 'id': video_id,
1200 }
1201
c19f7764
JMF
1202 # Look for Bandcamp pages with custom domain
1203 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1204 if mobj is not None:
1205 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1206 # Don't set the extractor because it can be a track url or an album
1207 return self.url_result(burl)
c19f7764 1208
f25571ff
PH
1209 # Look for embedded Vevo player
1210 mobj = re.search(
1211 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1212 if mobj is not None:
1213 return self.url_result(mobj.group('url'))
796df3c6
S
1214
1215 # Look for embedded Viddler player
cb454b33
S
1216 mobj = re.search(
1217 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1218 webpage)
796df3c6
S
1219 if mobj is not None:
1220 return self.url_result(mobj.group('url'))
f25571ff 1221
3378d67a
S
1222 # Look for NYTimes player
1223 mobj = re.search(
1224 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1225 webpage)
1226 if mobj is not None:
1227 return self.url_result(mobj.group('url'))
1228
cefdf970
S
1229 # Look for Libsyn player
1230 mobj = re.search(
1231 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1232 if mobj is not None:
1233 return self.url_result(mobj.group('url'))
1234
c0d0b01f 1235 # Look for Ooyala videos
cb454b33 1236 mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1237 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1238 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1239 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1240 if mobj is not None:
750f9020 1241 return OoyalaIE._build_url_result(mobj.group('ec'))
c0d0b01f 1242
f076b638 1243 # Look for multiple Ooyala embeds on SBN network websites
1244 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1245 if mobj is not None:
1246 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1247 if embeds:
1248 return _playlist_from_matches(
1249 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1250
aa94a6d3 1251 # Look for Aparat videos
48099643 1252 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
1253 if mobj is not None:
1254 return self.url_result(mobj.group(1), 'Aparat')
1255
c93c2ab1 1256 # Look for MPORA videos
c3f51436 1257 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
1258 if mobj is not None:
1259 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 1260
15c0e8e7 1261 # Look for embedded NovaMov-based player
8f89e687 1262 mobj = re.search(
8dfa187b 1263 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
1264 (?P<url>http://(?:(?:embed|www)\.)?
1265 (?:novamov\.com|
1266 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1267 videoweed\.(?:es|com)|
1268 movshare\.(?:net|sx|ag)|
1269 divxstage\.(?:eu|net|ch|co|at|ag))
1270 /embed\.php.+?)\1''', webpage)
8f89e687 1271 if mobj is not None:
15c0e8e7 1272 return self.url_result(mobj.group('url'))
50f56607 1273
9834872b
PH
1274 # Look for embedded Facebook player
1275 mobj = re.search(
db1f3888 1276 r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
9834872b
PH
1277 if mobj is not None:
1278 return self.url_result(mobj.group('url'), 'Facebook')
1279
ca97a56e
S
1280 # Look for embedded VK player
1281 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1282 if mobj is not None:
1283 return self.url_result(mobj.group('url'), 'VK')
1284
0364fa8b
S
1285 # Look for embedded ivi player
1286 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1287 if mobj is not None:
1288 return self.url_result(mobj.group('url'), 'Ivi')
1289
db1f3888
PH
1290 # Look for embedded Huffington Post player
1291 mobj = re.search(
c3f51436 1292 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
1293 if mobj is not None:
1294 return self.url_result(mobj.group('url'), 'HuffPost')
1295
1b86cc41 1296 # Look for embed.ly
1297 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1298 if mobj is not None:
1299 return self.url_result(mobj.group('url'))
1300 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1301 if mobj is not None:
1302 return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1303
60cc4dc4
PH
1304 # Look for funnyordie embed
1305 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1306 if matches:
ed2d6a19
PH
1307 return _playlist_from_matches(
1308 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 1309
db546cf8
S
1310 # Look for BBC iPlayer embed
1311 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1312 if matches:
476eae0c 1313 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 1314
93d020dd
S
1315 # Look for embedded RUTV player
1316 rutv_url = RUTVIE._extract_url(webpage)
1317 if rutv_url:
1318 return self.url_result(rutv_url, 'RUTV')
1319
494f20cb 1320 # Look for embedded TVC player
b8599718
S
1321 tvc_url = TVCIE._extract_url(webpage)
1322 if tvc_url:
1323 return self.url_result(tvc_url, 'TVC')
494f20cb 1324
d40a3b5b
S
1325 # Look for embedded SportBox player
1326 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1327 if sportbox_urls:
1328 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1329
78e2b74b 1330 # Look for embedded PornHub player
65d161c4
S
1331 pornhub_url = PornHubIE._extract_url(webpage)
1332 if pornhub_url:
1333 return self.url_result(pornhub_url, 'PornHub')
1334
2bb5b6d0
S
1335 # Look for embedded XHamster player
1336 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1337 if xhamster_urls:
1338 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1339
9872d311
S
1340 # Look for embedded Tvigle player
1341 mobj = re.search(
1342 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1343 if mobj is not None:
1344 return self.url_result(mobj.group('url'), 'Tvigle')
1345
7e2ede98
JMF
1346 # Look for embedded TED player
1347 mobj = re.search(
d7cc31b6 1348 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
1349 if mobj is not None:
1350 return self.url_result(mobj.group('url'), 'TED')
1351
5c386252 1352 # Look for embedded Ustream videos
1353 mobj = re.search(
1354 r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1355 if mobj is not None:
1356 return self.url_result(mobj.group('url'), 'Ustream')
1357
893f8832
PH
1358 # Look for embedded arte.tv player
1359 mobj = re.search(
1360 r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1361 webpage)
1362 if mobj is not None:
1363 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1364
cb3ac1c6
S
1365 # Look for embedded smotri.com player
1366 smotri_url = SmotriIE._extract_url(webpage)
1367 if smotri_url:
1368 return self.url_result(smotri_url, 'Smotri')
1369
20991253
PH
1370 # Look for embeded soundcloud player
1371 mobj = re.search(
ac645ac7 1372 r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
20991253
PH
1373 webpage)
1374 if mobj is not None:
1375 url = unescapeHTML(mobj.group('url'))
1376 return self.url_result(url)
1377
826ec77f
PH
1378 # Look for embedded vulture.com player
1379 mobj = re.search(
1380 r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1381 webpage)
1382 if mobj is not None:
1383 url = unescapeHTML(mobj.group('url'))
1384 return self.url_result(url, ie='Vulture')
1385
c5cd249e
JMF
1386 # Look for embedded mtvservices player
1387 mobj = re.search(
1388 r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1389 webpage)
1390 if mobj is not None:
1391 url = unescapeHTML(mobj.group('url'))
1392 return self.url_result(url, ie='MTVServicesEmbedded')
1393
49807b4a
S
1394 # Look for embedded yahoo player
1395 mobj = re.search(
1396 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1397 webpage)
1398 if mobj is not None:
1399 return self.url_result(mobj.group('url'), 'Yahoo')
1400
2ef6fcb5
PH
1401 # Look for embedded sbs.com.au player
1402 mobj = re.search(
e98b8e79
PH
1403 r'''(?x)
1404 (?:
1405 <meta\s+property="og:video"\s+content=|
1406 <iframe[^>]+?src=
1407 )
1408 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
1409 webpage)
1410 if mobj is not None:
1411 return self.url_result(mobj.group('url'), 'SBS')
1412
42bdd9d0
PH
1413 # Look for embedded Cinchcast player
1414 mobj = re.search(
1415 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1416 webpage)
1417 if mobj is not None:
1418 return self.url_result(mobj.group('url'), 'Cinchcast')
1419
1a94ff68 1420 mobj = re.search(
5263cdfc 1421 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 1422 webpage)
8001607e
YCH
1423 if not mobj:
1424 mobj = re.search(
1425 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1426 webpage)
1a94ff68
S
1427 if mobj is not None:
1428 return self.url_result(mobj.group('url'), 'MLB')
1429
1419fafd
S
1430 mobj = re.search(
1431 r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1432 webpage)
1433 if mobj is not None:
1434 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1435
af63fed7
PH
1436 mobj = re.search(
1437 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1438 webpage)
1439 if mobj is not None:
1440 return self.url_result(mobj.group('url'), 'Livestream')
1441
255fca5e
S
1442 # Look for Zapiks embed
1443 mobj = re.search(
1444 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1445 if mobj is not None:
1446 return self.url_result(mobj.group('url'), 'Zapiks')
1447
e3216b82
NJ
1448 # Look for Kaltura embeds
1449 mobj = re.search(
1450 r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1451 if mobj is not None:
1452 return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1453
135c9c42
S
1454 # Look for Eagle.Platform embeds
1455 mobj = re.search(
1456 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1457 if mobj is not None:
1458 return self.url_result(mobj.group('url'), 'EaglePlatform')
1459
d47ae7f6
S
1460 # Look for ClipYou (uses Eagle.Platform) embeds
1461 mobj = re.search(
1462 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1463 if mobj is not None:
1464 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1465
f8388757
S
1466 # Look for Pladform embeds
1467 mobj = re.search(
1468 r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1469 if mobj is not None:
1470 return self.url_result(mobj.group('url'), 'Pladform')
1471
2dcc114f
S
1472 # Look for Playwire embeds
1473 mobj = re.search(
1474 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1475 if mobj is not None:
1476 return self.url_result(mobj.group('url'))
1477
ad320e9b
NJ
1478 # Look for 5min embeds
1479 mobj = re.search(
1480 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1481 if mobj is not None:
1482 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1483
18153f1b
S
1484 # Look for Crooks and Liars embeds
1485 mobj = re.search(
1486 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1487 if mobj is not None:
1488 return self.url_result(mobj.group('url'))
1489
a2edf2e7
YCH
1490 # Look for NBC Sports VPlayer embeds
1491 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1492 if nbc_sports_url:
1493 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1494
418c5cc3
YCH
1495 # Look for UDN embeds
1496 mobj = re.search(
1497 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1498 if mobj is not None:
1499 return self.url_result(
0a160363 1500 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 1501
2fe1b5bd
YCH
1502 # Look for Senate ISVP iframe
1503 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1504 if senate_isvp_url:
25c3a734 1505 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 1506
756f574e
YCH
1507 # Look for Dailymotion Cloud videos
1508 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1509 if dmcloud_url:
1510 return self.url_result(dmcloud_url, 'DailymotionCloud')
1511
ced659bb 1512 def check_video(vurl):
a0f71985
PH
1513 if YoutubeIE.suitable(vurl):
1514 return True
ced659bb
S
1515 vpath = compat_urlparse.urlparse(vurl).path
1516 vext = determine_ext(vpath)
1517 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1518
1519 def filter_video(urls):
1520 return list(filter(check_video, urls))
1521
9b122384 1522 # Start with something easy: JW Player in SWFObject
ced659bb 1523 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 1524 if not found:
d981cef6 1525 # Look for gorilla-vid style embedding
ced659bb 1526 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
1527 (?:
1528 jw_plugins|
1529 JWPlayerOptions|
1530 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1531 )
a0f71985
PH
1532 .*?
1533 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 1534 if not found:
9b122384 1535 # Broaden the search a little bit
ced659bb 1536 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
1537 if not found:
1538 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb
S
1539 found = filter_video(re.findall(
1540 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
1541 if not found:
1542 # Flow player
ced659bb 1543 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
1544 flowplayer\("[^"]+",\s*
1545 \{[^}]+?\}\s*,
52585fd6 1546 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 1547 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 1548 ''', webpage))
501f13fb
PH
1549 if not found:
1550 # Cinerama player
1551 found = re.findall(
1552 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 1553 if not found:
9b122384 1554 # Try to find twitter cards info
ced659bb
S
1555 found = filter_video(re.findall(
1556 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 1557 if not found:
9b122384
PH
1558 # We look for Open Graph info:
1559 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 1560 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
1561 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1562 if m_video_type is not None:
ced659bb 1563 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 1564 if not found:
7fea7156 1565 # HTML5 video
9b32eca3 1566 found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
b30b8698 1567 if not found:
ed9a25dd 1568 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 1569 found = re.search(
89ef304b 1570 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 1571 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 1572 webpage)
84f81016
S
1573 if not found:
1574 # Look also in Refresh HTTP header
1575 refresh_header = head_response.headers.get('Refresh')
1576 if refresh_header:
ed9a25dd 1577 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 1578 if found:
406224be 1579 new_url = compat_urlparse.urljoin(url, found.group(1))
89ef304b
PH
1580 self.report_following_redirect(new_url)
1581 return {
1582 '_type': 'url',
1583 'url': new_url,
1584 }
b30b8698 1585 if not found:
416c7fcb 1586 raise UnsupportedError(url)
9b122384 1587
b30b8698
PH
1588 entries = []
1589 for video_url in found:
1590 video_url = compat_urlparse.urljoin(url, video_url)
1591 video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
9b122384 1592
b30b8698
PH
1593 # Sometimes, jwplayer extraction will result in a YouTube URL
1594 if YoutubeIE.suitable(video_url):
1595 entries.append(self.url_result(video_url, 'Youtube'))
1596 continue
9b122384 1597
b30b8698
PH
1598 # here's a fun little line of code for you:
1599 video_id = os.path.splitext(video_id)[0]
fc9713a1 1600
d6fd958c
YCH
1601 if determine_ext(video_url) == 'smil':
1602 entries.append({
1603 'id': video_id,
1604 'formats': self._extract_smil_formats(video_url, video_id),
1605 'uploader': video_uploader,
1606 'title': video_title,
1607 'age_limit': age_limit,
1608 })
1609 else:
1610 entries.append({
1611 'id': video_id,
1612 'url': video_url,
1613 'uploader': video_uploader,
1614 'title': video_title,
1615 'age_limit': age_limit,
1616 })
b30b8698
PH
1617
1618 if len(entries) == 1:
669f0e7c 1619 return entries[0]
b30b8698
PH
1620 else:
1621 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
1622 # 'url' results don't have a title
1623 if e.get('title') is not None:
1624 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
1625 return {
1626 '_type': 'playlist',
1627 'entries': entries,
1628 }