]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[npo] Clarify token decryption algorithm source
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
cfe50f04
JMF
1# encoding: utf-8
2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
7
8from .common import InfoExtractor
fc9713a1 9from .youtube import YoutubeIE
8c25f81b 10from ..compat import (
9b122384 11 compat_urllib_parse,
1ddb9456
S
12 compat_urllib_parse_unquote,
13 compat_urllib_request,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
bcf89ce6 24 parse_xml,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
42393ce2 30 url_basename,
76c73715 31 xpath_text,
9b122384 32)
cfe50f04 33from .brightcove import BrightcoveIE
a2edf2e7 34from .nbc import NBCSportsVPlayerIE
c0d0b01f 35from .ooyala import OoyalaIE
93d020dd 36from .rutv import RUTVIE
954c1d05 37from .tvc import TVCIE
d40a3b5b 38from .sportbox import SportBoxEmbedIE
cb3ac1c6 39from .smotri import SmotriIE
1419fafd 40from .condenast import CondeNastIE
418c5cc3 41from .udn import UDNEmbedIE
2fe1b5bd 42from .senateisvp import SenateISVPIE
0954cd8a 43from .bliptv import BlipTVIE
bab19a8e 44from .svt import SVTIE
65d161c4 45from .pornhub import PornHubIE
2bb5b6d0 46from .xhamster import XHamsterEmbedIE
b407e173 47from .vimeo import VimeoIE
756f574e 48from .dailymotion import DailymotionCloudIE
1ac1c4c2 49from .onionstudios import OnionStudiosIE
eedd20ef 50from .snagfilms import SnagFilmsEmbedIE
9b122384 51
0838239e 52
9b122384 53class GenericIE(InfoExtractor):
79649588 54 IE_DESC = 'Generic downloader that works on some sites'
9b122384 55 _VALID_URL = r'.*'
79649588 56 IE_NAME = 'generic'
cfe50f04 57 _TESTS = [
c5fa81fe
S
58 # Direct link to a video
59 {
60 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
61 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
62 'info_dict': {
63 'id': 'trailer',
64 'ext': 'mp4',
65 'title': 'trailer',
66 'upload_date': '20100513',
67 }
68 },
c5138a7c 69 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
70 {
71 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
72 'md5': '128c42e68b13950268b648275386fc74',
73 'info_dict': {
74 'id': 'FictionJunction-Parallel_Hearts',
75 'ext': 'flac',
76 'title': 'FictionJunction-Parallel_Hearts',
77 'upload_date': '20140522',
78 },
79 'expected_warnings': [
80 'URL could be a direct video link, returning it as such.'
81 ]
82 },
83 # Direct download with broken HEAD
84 {
85 'url': 'http://ai-radio.org:8000/radio.opus',
86 'info_dict': {
87 'id': 'radio',
88 'ext': 'opus',
89 'title': 'radio',
90 },
91 'params': {
92 'skip_download': True, # infinite live stream
93 },
94 'expected_warnings': [
95 r'501.*Not Implemented'
96 ],
97 },
98 # Direct link with incorrect MIME type
99 {
100 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
101 'md5': '4ccbebe5f36706d85221f204d7eb5913',
102 'info_dict': {
103 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
104 'id': '5_Lennart_Poettering_-_Systemd',
105 'ext': 'webm',
106 'title': '5_Lennart_Poettering_-_Systemd',
107 'upload_date': '20141120',
108 },
109 'expected_warnings': [
110 'URL could be a direct video link, returning it as such.'
111 ]
112 },
113 # RSS feed
114 {
115 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
116 'info_dict': {
117 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
118 'title': 'Zero Punctuation',
119 'description': 're:.*groundbreaking video review series.*'
120 },
121 'playlist_mincount': 11,
122 },
123 # RSS feed with enclosure
124 {
125 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
126 'info_dict': {
127 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
128 'ext': 'm4v',
129 'upload_date': '20150228',
130 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
131 }
132 },
133 # google redirect
134 {
135 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
136 'info_dict': {
137 'id': 'cmQHVoWB5FY',
138 'ext': 'mp4',
139 'upload_date': '20130224',
140 'uploader_id': 'TheVerge',
141 'description': 're:^Chris Ziegler takes a look at the\.*',
142 'uploader': 'The Verge',
143 'title': 'First Firefox OS phones side-by-side',
144 },
145 'params': {
146 'skip_download': False,
147 }
148 },
cfe50f04 149 {
79649588 150 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 151 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 152 'info_dict': {
d360a146
S
153 'id': '13601338388002',
154 'ext': 'mp4',
79649588
PH
155 'uploader': 'www.hodiho.fr',
156 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
157 }
158 },
c19f7764
JMF
159 # bandcamp page with custom domain
160 {
79649588
PH
161 'add_ie': ['Bandcamp'],
162 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 163 'info_dict': {
fd50bf62
S
164 'id': '3235767654',
165 'ext': 'mp3',
79649588
PH
166 'title': 'The Pony Mash',
167 'uploader': 'M_Pallante',
c19f7764 168 },
79649588 169 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 170 },
eeb165e6 171 # embedded brightcove video
dd5bcdc4
JMF
172 # it also tests brightcove videos that need to set the 'Referer' in the
173 # http requests
eeb165e6 174 {
79649588
PH
175 'add_ie': ['Brightcove'],
176 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
177 'info_dict': {
178 'id': '2765128793001',
179 'ext': 'mp4',
180 'title': 'Le cours de bourse : l’analyse technique',
181 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
182 'uploader': 'BFM BUSINESS',
eeb165e6 183 },
79649588
PH
184 'params': {
185 'skip_download': True,
eeb165e6
JMF
186 },
187 },
17ab4d3b
PH
188 {
189 # https://github.com/rg3/youtube-dl/issues/2253
190 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
191 'md5': '0ba9446db037002366bab3b3eb30c88c',
192 'info_dict': {
fd50bf62
S
193 'id': '3101154703001',
194 'ext': 'mp4',
17ab4d3b
PH
195 'title': 'Still no power',
196 'uploader': 'thestar.com',
197 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
198 },
199 'add_ie': ['Brightcove'],
200 },
0479c625
S
201 {
202 'url': 'http://www.championat.com/video/football/v/87/87499.html',
203 'md5': 'fb973ecf6e4a78a67453647444222983',
204 'info_dict': {
205 'id': '3414141473001',
206 'ext': 'mp4',
207 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
208 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
209 'uploader': 'Championat',
210 },
211 },
bdf97017 212 {
37aab278 213 # https://github.com/rg3/youtube-dl/issues/3541
bdf97017
NJ
214 'add_ie': ['Brightcove'],
215 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
216 'info_dict': {
217 'id': '3866516442001',
37aab278 218 'ext': 'mp4',
bdf97017
NJ
219 'title': 'Leer mij vrouwen kennen: Aflevering 1',
220 'description': 'Leer mij vrouwen kennen: Aflevering 1',
221 'uploader': 'SBS Broadcasting',
222 },
37aab278 223 'skip': 'Restricted to Netherlands',
bdf97017 224 'params': {
37aab278 225 'skip_download': True, # m3u8 download
bdf97017
NJ
226 },
227 },
c0d0b01f
JMF
228 # ooyala video
229 {
79649588 230 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 231 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
232 'info_dict': {
233 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
234 'ext': 'mp4',
3486df38 235 'title': '2cc213299525360.mov', # that's what we get
c0d0b01f 236 },
87830900 237 'add_ie': ['Ooyala'],
c0d0b01f 238 },
f076b638 239 # multiple ooyala embeds on SBN network websites
240 {
241 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
242 'info_dict': {
243 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
244 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
245 },
246 'playlist_mincount': 3,
247 'params': {
248 'skip_download': True,
249 },
250 'add_ie': ['Ooyala'],
251 },
1b86cc41 252 # embed.ly video
253 {
254 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
255 'info_dict': {
256 'id': '9ODmcdjQcHQ',
257 'ext': 'mp4',
0a5bce56
PH
258 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
259 'upload_date': '20140225',
260 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
261 'uploader': 'Tested',
262 'uploader_id': 'testedcom',
1b86cc41 263 },
264 # No need to test YoutubeIE here
265 'params': {
266 'skip_download': True,
267 },
268 },
60cc4dc4
PH
269 # funnyordie embed
270 {
271 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
272 'info_dict': {
273 'id': '18e820ec3f',
274 'ext': 'mp4',
275 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
276 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 277 },
60cc4dc4 278 },
faa4ea68
S
279 # BBC iPlayer embeds
280 {
281 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
282 'info_dict': {
283 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
284 },
285 'playlist_mincount': 18,
286 },
93d020dd
S
287 # RUTV embed
288 {
289 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
290 'info_dict': {
291 'id': '776940',
292 'ext': 'mp4',
293 'title': 'Охотское море стало целиком российским',
294 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
295 },
296 'params': {
297 # m3u8 download
298 'skip_download': True,
299 },
aab74fa1 300 },
f37bdbe5
S
301 # TVC embed
302 {
303 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
304 'info_dict': {
305 'id': '55304',
306 'ext': 'mp4',
307 'title': 'Дошкольное воспитание',
308 },
309 },
b827a601
S
310 # SportBox embed
311 {
312 'url': 'http://www.vestifinance.ru/articles/25753',
313 'info_dict': {
314 'id': '25753',
315 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
316 },
317 'playlist': [{
318 'info_dict': {
319 'id': '370908',
320 'title': 'Госзаказ. День 3',
321 'ext': 'mp4',
322 }
323 }, {
324 'info_dict': {
325 'id': '370905',
326 'title': 'Госзаказ. День 2',
327 'ext': 'mp4',
328 }
329 }, {
330 'info_dict': {
331 'id': '370902',
332 'title': 'Госзаказ. День 1',
333 'ext': 'mp4',
334 }
335 }],
336 'params': {
337 # m3u8 download
338 'skip_download': True,
339 },
340 },
c76799c5
S
341 # XHamster embed
342 {
343 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
344 'info_dict': {
345 'id': 'showthread',
346 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
347 },
348 'playlist_mincount': 7,
349 },
aab74fa1
PH
350 # Embedded TED video
351 {
352 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 353 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 354 'info_dict': {
a8eb5a8e 355 'id': '1969',
aab74fa1 356 'ext': 'mp4',
a8eb5a8e
PH
357 'title': 'Hidden miracles of the natural world',
358 'uploader': 'Louie Schwartzberg',
359 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 360 }
60cc4dc4 361 },
5c386252 362 # Embeded Ustream video
363 {
364 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
365 'md5': '27b99cdb639c9b12a79bca876a073417',
366 'info_dict': {
ca6aada4 367 'id': '45734260',
368 'ext': 'flv',
369 'uploader': 'AU SPA: The NSA and Privacy',
5c386252 370 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
371 }
372 },
d95e35d6
S
373 # nowvideo embed hidden behind percent encoding
374 {
375 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
376 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
377 'info_dict': {
378 'id': '06e53103ca9aa',
379 'ext': 'flv',
380 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
381 'description': 'No description',
382 },
0f2a2ba1 383 },
893f8832
PH
384 # arte embed
385 {
386 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
387 'md5': '7653032cbb25bf6c80d80f217055fa43',
388 'info_dict': {
389 'id': '048195-004_PLUS7-F',
390 'ext': 'flv',
391 'title': 'X:enius',
392 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
393 'upload_date': '20140320',
394 },
395 'params': {
396 'skip_download': 'Requires rtmpdump'
397 }
398 },
fa35cdad
PH
399 # Condé Nast embed
400 {
401 'url': 'http://www.wired.com/2014/04/honda-asimo/',
402 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
403 'info_dict': {
404 'id': '53501be369702d3275860000',
405 'ext': 'mp4',
406 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
407 }
ebd3c7b3
PH
408 },
409 # Dailymotion embed
410 {
411 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
412 'md5': '441aeeb82eb72c422c7f14ec533999cd',
413 'info_dict': {
414 'id': 'k2mm4bCdJ6CQ2i7c8o2',
415 'ext': 'mp4',
416 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
417 'uploader': 'Spi0n',
418 },
419 'add_ie': ['Dailymotion'],
2b88feed
PH
420 },
421 # YouTube embed
422 {
423 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
424 'info_dict': {
425 'id': 'FXRb4ykk4S0',
426 'ext': 'mp4',
427 'title': 'The NBL Auction 2014',
428 'uploader': 'BADMINTON England',
429 'uploader_id': 'BADMINTONEvents',
430 'upload_date': '20140603',
431 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
432 },
433 'add_ie': ['Youtube'],
434 'params': {
435 'skip_download': True,
436 }
437 },
c5cd249e
JMF
438 # MTVSercices embed
439 {
440 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
441 'md5': '35727f82f58c76d996fc188f9755b0d5',
442 'info_dict': {
443 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
444 'ext': 'mp4',
445 'title': 'Review',
446 'description': 'Mario\'s life in the fast lane has never looked so good.',
447 },
448 },
61013473 449 # YouTube embed via <data-embed-url="">
450 {
451 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 452 'info_dict': {
a8eb5a8e 453 'id': '4vAffPZIT44',
61013473 454 'ext': 'mp4',
a8eb5a8e 455 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
456 'uploader': 'Gameloft',
457 'uploader_id': 'gameloft',
a8eb5a8e
PH
458 'upload_date': '20140828',
459 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
460 },
461 'params': {
462 'skip_download': True,
61013473 463 }
c8e9a235
PH
464 },
465 # Camtasia studio
466 {
467 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
468 'playlist': [{
469 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
470 'info_dict': {
471 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
472 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
473 'ext': 'flv',
474 'duration': 2235.90,
475 }
476 }, {
477 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
478 'info_dict': {
479 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
480 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
481 'ext': 'flv',
482 'duration': 2235.93,
483 }
484 }],
485 'info_dict': {
486 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
487 }
4d805e06
PH
488 },
489 # Flowplayer
490 {
491 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
492 'md5': '9d65602bf31c6e20014319c7d07fba27',
493 'info_dict': {
494 'id': '5123ea6d5e5a7',
495 'ext': 'mp4',
496 'age_limit': 18,
497 'uploader': 'www.handjobhub.com',
d6d9186f 498 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 499 }
0990305d 500 },
22a6f150
PH
501 # Multiple brightcove videos
502 # https://github.com/rg3/youtube-dl/issues/2283
503 {
504 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
505 'info_dict': {
506 'id': 'always-never',
507 'title': 'Always / Never - The New Yorker',
508 },
509 'playlist_count': 3,
510 'params': {
511 'extract_flat': False,
512 'skip_download': True,
513 }
1a94ff68
S
514 },
515 # MLB embed
516 {
517 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
518 'md5': '96f09a37e44da40dd083e12d9a683327',
519 'info_dict': {
520 'id': '33322633',
521 'ext': 'mp4',
522 'title': 'Ump changes call to ball',
523 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
524 'duration': 48,
525 'timestamp': 1401537900,
526 'upload_date': '20140531',
527 'thumbnail': 're:^https?://.*\.jpg$',
528 },
529 },
746c67d7
NJ
530 # Wistia embed
531 {
532 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
533 'md5': '8788b683c777a5cf25621eaf286d0c23',
534 'info_dict': {
535 'id': '1cfaf6b7ea',
536 'ext': 'mov',
537 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
538 'duration': 643.0,
539 'filesize': 182808282,
540 'uploader': 'education-portal.com',
541 },
542 },
52cffcb1 543 {
544 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
545 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
546 'info_dict': {
547 'id': 'uxjb0lwrcz',
548 'ext': 'mp4',
85d7b765 549 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
52cffcb1 550 'duration': 1715.0,
85d7b765 551 'uploader': 'thoughtworks.wistia.com',
70b7e3fb 552 },
52cffcb1 553 },
ac645ac7
PH
554 # Soundcloud embed
555 {
556 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
557 'info_dict': {
558 'id': '174391317',
559 'ext': 'mp3',
560 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
561 'uploader': 'Sophos Security',
562 'title': 'Chet Chat 171 - Oct 29, 2014',
563 'upload_date': '20141029',
564 }
af63fed7
PH
565 },
566 # Livestream embed
567 {
568 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
569 'info_dict': {
570 'id': '67864563',
571 'ext': 'flv',
572 'upload_date': '20141112',
573 'title': 'Rosetta #CometLanding webcast HL 10',
574 }
575 },
65f3a228
PH
576 # LazyYT
577 {
578 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
579 'info_dict': {
11e611a7 580 'id': '1986',
65f3a228
PH
581 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
582 },
583 'playlist_mincount': 2,
4e262a88 584 },
42bdd9d0
PH
585 # Cinchcast embed
586 {
587 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
588 'info_dict': {
589 'id': '7141703',
590 'ext': 'mp3',
591 'upload_date': '20141126',
592 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
593 }
594 },
501f13fb
PH
595 # Cinerama player
596 {
597 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
598 'info_dict': {
599 'id': '730m_DandD_1901_512k',
600 'ext': 'mp4',
601 'uploader': 'www.abc.net.au',
602 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
603 }
796df3c6
S
604 },
605 # embedded viddler video
606 {
607 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
608 'info_dict': {
609 'id': '4d03aad9',
610 'ext': 'mp4',
611 'uploader': 'deadspin',
612 'title': 'WALL-TO-GORTAT',
613 'timestamp': 1422285291,
614 'upload_date': '20150126',
615 },
616 'add_ie': ['Viddler'],
a0f71985 617 },
2051acde
S
618 # Libsyn embed
619 {
620 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
621 'info_dict': {
622 'id': '3377616',
623 'ext': 'mp3',
624 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
625 'description': 'md5:601cb790edd05908957dae8aaa866465',
626 'upload_date': '20150220',
627 },
628 },
a0f71985
PH
629 # jwplayer YouTube
630 {
631 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
632 'info_dict': {
633 'id': 'Mrj4DVp2zeA',
634 'ext': 'mp4',
f37e3f99 635 'upload_date': '20150212',
a0f71985
PH
636 'uploader': 'The National Archives UK',
637 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
638 'uploader_id': 'NationalArchives08',
639 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
640 },
59b8ab58
PH
641 },
642 # rtl.nl embed
643 {
644 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
645 'playlist_mincount': 5,
646 'info_dict': {
647 'id': 'aanslagen-kopenhagen',
648 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
649 }
255fca5e
S
650 },
651 # Zapiks embed
652 {
653 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
654 'info_dict': {
655 'id': '118046',
656 'ext': 'mp4',
657 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
658 }
659 },
e3216b82
NJ
660 # Kaltura embed
661 {
662 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
663 'info_dict': {
664 'id': '1_eergr3h1',
665 'ext': 'mp4',
666 'upload_date': '20150226',
667 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
668 'timestamp': int,
669 'title': 'John Carlson Postgame 2/25/15',
670 },
671 },
135c9c42
S
672 # Eagle.Platform embed (generic URL)
673 {
674 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
675 'info_dict': {
676 'id': '227304',
677 'ext': 'mp4',
678 'title': 'Навальный вышел на свободу',
679 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
680 'thumbnail': 're:^https?://.*\.jpg$',
681 'duration': 87,
682 'view_count': int,
683 'age_limit': 0,
684 },
685 },
d47ae7f6
S
686 # ClipYou (Eagle.Platform) embed (custom URL)
687 {
688 'url': 'http://muz-tv.ru/play/7129/',
689 'info_dict': {
690 'id': '12820',
691 'ext': 'mp4',
692 'title': "'O Sole Mio",
693 'thumbnail': 're:^https?://.*\.jpg$',
694 'duration': 216,
695 'view_count': int,
696 },
697 },
f8388757
S
698 # Pladform embed
699 {
700 'url': 'http://muz-tv.ru/kinozal/view/7400/',
701 'info_dict': {
702 'id': '100183293',
703 'ext': 'mp4',
62259846 704 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757
S
705 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
706 'thumbnail': 're:^https?://.*\.jpg$',
707 'duration': 694,
708 'age_limit': 0,
709 },
710 },
c798f15b
S
711 # Playwire embed
712 {
713 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
714 'info_dict': {
715 'id': '3519514',
716 'ext': 'mp4',
717 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
718 'thumbnail': 're:^https?://.*\.png$',
719 'duration': 45.115,
720 },
721 },
ad320e9b
NJ
722 # 5min embed
723 {
724 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
725 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
726 'info_dict': {
727 'id': '518726732',
728 'ext': 'mp4',
729 'title': 'Facebook Creates "On This Day" | Crunch Report',
730 },
731 },
dc455a5f
S
732 # SVT embed
733 {
734 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
735 'info_dict': {
736 'id': '2900353',
737 'ext': 'flv',
738 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
739 'duration': 27,
740 'age_limit': 0,
741 },
742 },
a4257017
S
743 # Crooks and Liars embed
744 {
745 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
746 'info_dict': {
747 'id': '8RUoRhRi',
748 'ext': 'mp4',
749 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
750 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
751 'timestamp': 1428207000,
752 'upload_date': '20150405',
753 'uploader': 'Heather',
754 },
755 },
756 # Crooks and Liars external embed
757 {
758 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
759 'info_dict': {
760 'id': 'MTE3MjUtMzQ2MzA',
761 'ext': 'mp4',
762 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
763 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
764 'timestamp': 1265032391,
765 'upload_date': '20100201',
766 'uploader': 'Heather',
767 },
768 },
facecb84 769 # NBC Sports vplayer embed
a2edf2e7 770 {
facecb84 771 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 772 'info_dict': {
facecb84
S
773 'id': 'ln7x1qSThw4k',
774 'ext': 'flv',
775 'title': "PFT Live: New leader in the 'new-look' defense",
776 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
a2edf2e7 777 },
418c5cc3
YCH
778 },
779 # UDN embed
780 {
781 'url': 'http://www.udn.com/news/story/7314/822787',
01c58f84 782 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 783 'info_dict': {
01c58f84 784 'id': '300346',
418c5cc3 785 'ext': 'mp4',
01c58f84 786 'title': '中一中男師變性 全校師生力挺',
418c5cc3
YCH
787 'thumbnail': 're:^https?://.*\.jpg$',
788 }
edfcf7ab
YCH
789 },
790 # Ooyala embed
791 {
792 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
793 'info_dict': {
794 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
795 'ext': 'mp4',
796 'description': 'VIDEO: Index/Match versus VLOOKUP.',
797 'title': 'This is what separates the Excel masters from the wannabes',
798 },
799 'params': {
800 # m3u8 downloads
801 'skip_download': True,
802 }
d6fd958c
YCH
803 },
804 # Contains a SMIL manifest
805 {
806 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
807 'info_dict': {
808 'id': 'file',
809 'ext': 'flv',
810 'title': '+ Football: Lottery Champions League Europe',
811 'uploader': 'www.telewebion.com',
812 },
813 'params': {
814 # rtmpe downloads
815 'skip_download': True,
816 }
b26733ba
YCH
817 },
818 # Brightcove URL in single quotes
819 {
820 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
821 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
822 'info_dict': {
823 'id': '4255764656001',
824 'ext': 'mp4',
825 'title': 'SN Presents: Russell Martin, World Citizen',
826 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
827 'uploader': 'Rogers Sportsnet',
828 },
756f574e
YCH
829 },
830 # Dailymotion Cloud video
831 {
832 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
833 'md5': '49444254273501a64675a7e68c502681',
834 'info_dict': {
835 'id': '5585de919473990de4bee11b',
836 'ext': 'mp4',
837 'title': 'Le débat',
838 'thumbnail': 're:^https?://.*\.jpe?g$',
839 }
a5158f38 840 },
8084be78
S
841 # OnionStudios embed
842 {
843 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
844 'info_dict': {
845 'id': '2855',
846 'ext': 'mp4',
847 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
848 'thumbnail': 're:^https?://.*\.jpe?g$',
849 'uploader': 'ClickHole',
850 'uploader_id': 'clickhole',
851 }
852 },
b8c1cc1a
S
853 # SnagFilms embed
854 {
855 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
856 'info_dict': {
857 'id': '74849a00-85a9-11e1-9660-123139220831',
858 'ext': 'mp4',
859 'title': '#whilewewatch',
860 }
861 },
a5158f38
YCH
862 # AdobeTVVideo embed
863 {
864 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
865 'md5': '43662b577c018ad707a63766462b1e87',
866 'info_dict': {
867 'id': '2456',
868 'ext': 'mp4',
869 'title': 'New experience with Acrobat DC',
870 'description': 'New experience with Acrobat DC',
871 'duration': 248.667,
872 },
76c73715 873 }
cfe50f04 874 ]
9b122384 875
9b122384
PH
876 def report_following_redirect(self, new_url):
877 """Report information extraction."""
79649588 878 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 879
4fc946b5
PH
880 def _extract_rss(self, url, video_id, doc):
881 playlist_title = doc.find('./channel/title').text
882 playlist_desc_el = doc.find('./channel/description')
883 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
884
76c73715
PH
885 entries = []
886 for it in doc.findall('./channel/item'):
887 next_url = xpath_text(it, 'link', fatal=False)
888 if not next_url:
889 enclosure_nodes = it.findall('./enclosure')
890 for e in enclosure_nodes:
891 next_url = e.attrib.get('url')
892 if next_url:
893 break
894
895 if not next_url:
896 continue
897
898 entries.append({
899 '_type': 'url',
900 'url': next_url,
901 'title': it.find('title').text,
902 })
4fc946b5
PH
903
904 return {
905 '_type': 'playlist',
906 'id': url,
907 'title': playlist_title,
908 'description': playlist_desc,
909 'entries': entries,
910 }
911
c8e9a235
PH
912 def _extract_camtasia(self, url, video_id, webpage):
913 """ Returns None if no camtasia video can be found. """
914
915 camtasia_cfg = self._search_regex(
916 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
917 webpage, 'camtasia configuration file', default=None)
918 if camtasia_cfg is None:
919 return None
920
921 title = self._html_search_meta('DC.title', webpage, fatal=True)
922
923 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
924 camtasia_cfg = self._download_xml(
925 camtasia_url, video_id,
926 note='Downloading camtasia configuration',
927 errnote='Failed to download camtasia configuration')
928 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
929
930 entries = []
931 for n in fileset_node.getchildren():
932 url_n = n.find('./uri')
933 if url_n is None:
934 continue
935
936 entries.append({
937 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
938 'title': '%s - %s' % (title, n.tag),
939 'url': compat_urlparse.urljoin(url, url_n.text),
940 'duration': float_or_none(n.find('./duration').text),
941 })
942
943 return {
944 '_type': 'playlist',
945 'entries': entries,
946 'title': title,
947 }
948
9b122384 949 def _real_extract(self, url):
ebd3c7b3
PH
950 if url.startswith('//'):
951 return {
952 '_type': 'url',
20991253 953 'url': self.http_scheme() + url,
ebd3c7b3
PH
954 }
955
a7130543
JMF
956 parsed_url = compat_urlparse.urlparse(url)
957 if not parsed_url.scheme:
04b4d394
PH
958 default_search = self._downloader.params.get('default_search')
959 if default_search is None:
1f7ccb90 960 default_search = 'fixup_error'
04b4d394 961
1f7ccb90 962 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
963 if '/' in url:
964 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
965 return self.url_result('http://' + url)
1f7ccb90 966 elif default_search != 'fixup_error':
9c1fc022 967 if default_search == 'auto_warning':
0e67ab0d
PH
968 if re.match(r'^(?:url|URL)$', url):
969 raise ExtractorError(
970 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
971 expected=True)
972 else:
973 self._downloader.report_warning(
7571c02c 974 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 975 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
976
977 if default_search in ('error', 'fixup_error'):
7571c02c 978 raise ExtractorError(
b74e86f4
PH
979 '%r is not a valid URL. '
980 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
981 % (url, url), expected=True)
04b4d394 982 else:
f2f2c0c2
PH
983 if ':' not in default_search:
984 default_search += ':'
04b4d394 985 return self.url_result(default_search + url)
4d54ef20
PH
986
987 url, smuggled_data = unsmuggle_url(url)
988 force_videoid = None
d6e6a422 989 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
990 if smuggled_data and 'force_videoid' in smuggled_data:
991 force_videoid = smuggled_data['force_videoid']
992 video_id = force_videoid
993 else:
1ddb9456 994 video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
a7130543 995
79649588 996 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 997
ebab4520 998 head_req = HEADRequest(url)
23be51d8 999 head_response = self._request_webpage(
ebab4520
PH
1000 head_req, video_id,
1001 note=False, errnote='Could not send HEAD request to %s' % url,
1002 fatal=False)
42393ce2 1003
23be51d8 1004 if head_response is not False:
42393ce2 1005 # Check for redirect
23be51d8 1006 new_url = head_response.geturl()
42393ce2
PH
1007 if url != new_url:
1008 self.report_following_redirect(new_url)
4d54ef20
PH
1009 if force_videoid:
1010 new_url = smuggle_url(
1011 new_url, {'force_videoid': force_videoid})
cecaaf3f 1012 return self.url_result(new_url)
42393ce2 1013
23be51d8
PH
1014 full_response = None
1015 if head_response is False:
58bde34a
S
1016 request = compat_urllib_request.Request(url)
1017 request.add_header('Accept-Encoding', '*')
1018 full_response = self._request_webpage(request, video_id)
23be51d8
PH
1019 head_response = full_response
1020
1021 # Check for direct link to a video
1022 content_type = head_response.headers.get('Content-Type', '')
1023 m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1024 if m:
1025 upload_date = unified_strdate(
1026 head_response.headers.get('Last-Modified'))
1027 return {
1028 'id': video_id,
1ddb9456 1029 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
ccdd0ffb 1030 'direct': True,
23be51d8
PH
1031 'formats': [{
1032 'format_id': m.group('format_id'),
1033 'url': url,
1034 'vcodec': 'none' if m.group('type') == 'audio' else None
1035 }],
1036 'upload_date': upload_date,
1037 }
42393ce2 1038
d6e6a422 1039 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
1040 force = self._downloader.params.get('force_generic_extractor', False)
1041 self._downloader.report_warning(
1042 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 1043
4e262a88 1044 if not full_response:
58bde34a
S
1045 request = compat_urllib_request.Request(url)
1046 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1047 # making it impossible to download only chunk of the file (yet we need only 512kB to
1048 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1049 # that will always result in downloading the whole file that is not desirable.
1050 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1051 # to accept raw bytes and being able to download only a chunk.
1052 # It may probably better to solve this by checking Content-Type for application/octet-stream
1053 # after HEAD request finishes, but not sure if we can rely on this.
1054 request.add_header('Accept-Encoding', '*')
1055 full_response = self._request_webpage(request, video_id)
4e262a88
PH
1056
1057 # Maybe it's a direct link to a video?
1058 # Be careful not to download the whole thing!
1059 first_bytes = full_response.read(512)
61ca9a80 1060 if not is_html(first_bytes):
4e262a88
PH
1061 self._downloader.report_warning(
1062 'URL could be a direct video link, returning it as such.')
1063 upload_date = unified_strdate(
1064 head_response.headers.get('Last-Modified'))
1065 return {
1066 'id': video_id,
1ddb9456 1067 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
4e262a88
PH
1068 'direct': True,
1069 'url': url,
1070 'upload_date': upload_date,
1071 }
1072
1073 webpage = self._webpage_read_content(
1074 full_response, url, video_id, prefix=first_bytes)
1075
9b122384 1076 self.report_extraction(video_id)
887c6acd 1077
4fc946b5
PH
1078 # Is it an RSS feed?
1079 try:
bcf89ce6 1080 doc = parse_xml(webpage)
4fc946b5
PH
1081 if doc.tag == 'rss':
1082 return self._extract_rss(url, video_id, doc)
f7300c5c 1083 except compat_xml_parse_error:
4fc946b5
PH
1084 pass
1085
c8e9a235
PH
1086 # Is it a Camtasia project?
1087 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1088 if camtasia_res is not None:
1089 return camtasia_res
1090
14390730
S
1091 # Sometimes embedded video player is hidden behind percent encoding
1092 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1093 # Unescaping the whole page allows to handle those cases in a generic way
1f7659db
S
1094 webpage = compat_urllib_parse.unquote(webpage)
1095
887c6acd
PH
1096 # it's tempting to parse this further, but you would
1097 # have to take into account all the variations like
1098 # Video Title - Site Name
1099 # Site Name | Video Title
1100 # Video Title - Tagline | Site Name
1101 # and so on and so forth; it's just not practical
ef4fd848 1102 video_title = self._html_search_regex(
79649588
PH
1103 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1104 default='video')
ef4fd848 1105
4d805e06
PH
1106 # Try to detect age limit automatically
1107 age_limit = self._rta_search(webpage)
1108 # And then there are the jokers who advertise that they use RTA,
1109 # but actually don't.
1110 AGE_LIMIT_MARKERS = [
1111 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1112 ]
1113 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1114 age_limit = 18
1115
ef4fd848
PH
1116 # video uploader is domain name
1117 video_uploader = self._search_regex(
79649588 1118 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1119
ed2d6a19 1120 # Helper method
83992676 1121 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1122 urlrs = orderedSet(
83992676 1123 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1124 for m in matches)
ed2d6a19
PH
1125 return self.playlist_result(
1126 urlrs, playlist_id=video_id, playlist_title=video_title)
1127
627a91a9 1128 # Look for BrightCove:
99877772
PH
1129 bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1130 if bc_urls:
79649588 1131 self.to_screen('Brightcove video detected.')
99877772
PH
1132 entries = [{
1133 '_type': 'url',
1134 'url': smuggle_url(bc_url, {'Referer': url}),
1135 'ie_key': 'Brightcove'
1136 } for bc_url in bc_urls]
1137
1138 return {
1139 '_type': 'playlist',
1140 'title': video_title,
1141 'id': video_id,
1142 'entries': entries,
1143 }
cfe50f04 1144
59b8ab58
PH
1145 # Look for embedded rtl.nl player
1146 matches = re.findall(
97b570a9 1147 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1148 webpage)
1149 if matches:
1150 return _playlist_from_matches(matches, ie='RtlNl')
1151
b407e173
YCH
1152 vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1153 if vimeo_url is not None:
1154 return self.url_result(vimeo_url)
7115ca84 1155
53c1d3ef 1156 # Look for embedded YouTube player
1f9da904 1157 matches = re.findall(r'''(?x)
2b88feed
PH
1158 (?:
1159 <iframe[^>]+?src=|
c71dfccc 1160 data-video-url=|
2b88feed 1161 <embed[^>]+?src=|
a7e97f6d
PH
1162 embedSWF\(?:\s*|
1163 new\s+SWFObject\(
2b88feed
PH
1164 )
1165 (["\'])
1bf5423e 1166 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1167 (?:embed|v|p)/.+?)
1f9da904 1168 \1''', webpage)
887c6acd 1169 if matches:
ed2d6a19 1170 return _playlist_from_matches(
3b2f933b 1171 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1172
65f3a228
PH
1173 # Look for lazyYT YouTube embed
1174 matches = re.findall(
1175 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1176 if matches:
1177 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1178
355e4fd0
PH
1179 # Look for embedded Dailymotion player
1180 matches = re.findall(
ef4fd848 1181 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
355e4fd0 1182 if matches:
ed2d6a19
PH
1183 return _playlist_from_matches(
1184 matches, lambda m: unescapeHTML(m[1]))
355e4fd0 1185
8489578d
NJ
1186 # Look for embedded Dailymotion playlist player (#3822)
1187 m = re.search(
1188 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1189 if m:
1190 playlists = re.findall(
1191 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1192 if playlists:
1193 return _playlist_from_matches(
1194 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1195
ef4fd848
PH
1196 # Look for embedded Wistia player
1197 match = re.search(
281d3f1d 1198 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1199 if match:
9471c444
NJ
1200 embed_url = self._proto_relative_url(
1201 unescapeHTML(match.group('url')))
ef4fd848
PH
1202 return {
1203 '_type': 'url_transparent',
9471c444 1204 'url': embed_url,
ef4fd848
PH
1205 'ie_key': 'Wistia',
1206 'uploader': video_uploader,
1207 'title': video_title,
1208 'id': video_id,
1209 }
5f6a1245 1210
9471c444 1211 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1212 if match:
1213 return {
1214 '_type': 'url_transparent',
1215 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1216 'ie_key': 'Wistia',
1217 'uploader': video_uploader,
1218 'title': video_title,
1219 'id': match.group('id')
1220 }
ef4fd848 1221
ee3e63e4 1222 # Look for embedded blip.tv player
0954cd8a
YCH
1223 bliptv_url = BlipTVIE._extract_url(webpage)
1224 if bliptv_url:
1225 return self.url_result(bliptv_url, 'BlipTV')
ee3e63e4 1226
bab19a8e
S
1227 # Look for SVT player
1228 svt_url = SVTIE._extract_url(webpage)
1229 if svt_url:
1230 return self.url_result(svt_url, 'SVT')
1231
fa35cdad
PH
1232 # Look for embedded condenast player
1233 matches = re.findall(
1234 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1235 webpage)
1236 if matches:
1237 return {
1238 '_type': 'playlist',
1239 'entries': [{
1240 '_type': 'url',
1241 'ie_key': 'CondeNast',
1242 'url': ma,
1243 } for ma in matches],
1244 'title': video_title,
1245 'id': video_id,
1246 }
1247
c19f7764
JMF
1248 # Look for Bandcamp pages with custom domain
1249 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1250 if mobj is not None:
1251 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1252 # Don't set the extractor because it can be a track url or an album
1253 return self.url_result(burl)
c19f7764 1254
f25571ff
PH
1255 # Look for embedded Vevo player
1256 mobj = re.search(
1257 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1258 if mobj is not None:
1259 return self.url_result(mobj.group('url'))
796df3c6
S
1260
1261 # Look for embedded Viddler player
cb454b33
S
1262 mobj = re.search(
1263 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1264 webpage)
796df3c6
S
1265 if mobj is not None:
1266 return self.url_result(mobj.group('url'))
f25571ff 1267
3378d67a
S
1268 # Look for NYTimes player
1269 mobj = re.search(
1270 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1271 webpage)
1272 if mobj is not None:
1273 return self.url_result(mobj.group('url'))
1274
cefdf970
S
1275 # Look for Libsyn player
1276 mobj = re.search(
1277 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1278 if mobj is not None:
1279 return self.url_result(mobj.group('url'))
1280
c0d0b01f 1281 # Look for Ooyala videos
cb454b33 1282 mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1283 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1284 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1285 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1286 if mobj is not None:
750f9020 1287 return OoyalaIE._build_url_result(mobj.group('ec'))
c0d0b01f 1288
f076b638 1289 # Look for multiple Ooyala embeds on SBN network websites
1290 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1291 if mobj is not None:
1292 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1293 if embeds:
1294 return _playlist_from_matches(
1295 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1296
aa94a6d3 1297 # Look for Aparat videos
48099643 1298 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
1299 if mobj is not None:
1300 return self.url_result(mobj.group(1), 'Aparat')
1301
c93c2ab1 1302 # Look for MPORA videos
c3f51436 1303 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
1304 if mobj is not None:
1305 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 1306
15c0e8e7 1307 # Look for embedded NovaMov-based player
8f89e687 1308 mobj = re.search(
8dfa187b 1309 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
1310 (?P<url>http://(?:(?:embed|www)\.)?
1311 (?:novamov\.com|
1312 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1313 videoweed\.(?:es|com)|
1314 movshare\.(?:net|sx|ag)|
1315 divxstage\.(?:eu|net|ch|co|at|ag))
1316 /embed\.php.+?)\1''', webpage)
8f89e687 1317 if mobj is not None:
15c0e8e7 1318 return self.url_result(mobj.group('url'))
50f56607 1319
9834872b
PH
1320 # Look for embedded Facebook player
1321 mobj = re.search(
db1f3888 1322 r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
9834872b
PH
1323 if mobj is not None:
1324 return self.url_result(mobj.group('url'), 'Facebook')
1325
ca97a56e
S
1326 # Look for embedded VK player
1327 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1328 if mobj is not None:
1329 return self.url_result(mobj.group('url'), 'VK')
1330
0364fa8b
S
1331 # Look for embedded ivi player
1332 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1333 if mobj is not None:
1334 return self.url_result(mobj.group('url'), 'Ivi')
1335
db1f3888
PH
1336 # Look for embedded Huffington Post player
1337 mobj = re.search(
c3f51436 1338 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
1339 if mobj is not None:
1340 return self.url_result(mobj.group('url'), 'HuffPost')
1341
1b86cc41 1342 # Look for embed.ly
1343 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1344 if mobj is not None:
1345 return self.url_result(mobj.group('url'))
1346 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1347 if mobj is not None:
1348 return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1349
60cc4dc4
PH
1350 # Look for funnyordie embed
1351 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1352 if matches:
ed2d6a19
PH
1353 return _playlist_from_matches(
1354 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 1355
db546cf8
S
1356 # Look for BBC iPlayer embed
1357 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1358 if matches:
476eae0c 1359 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 1360
93d020dd
S
1361 # Look for embedded RUTV player
1362 rutv_url = RUTVIE._extract_url(webpage)
1363 if rutv_url:
1364 return self.url_result(rutv_url, 'RUTV')
1365
494f20cb 1366 # Look for embedded TVC player
b8599718
S
1367 tvc_url = TVCIE._extract_url(webpage)
1368 if tvc_url:
1369 return self.url_result(tvc_url, 'TVC')
494f20cb 1370
d40a3b5b
S
1371 # Look for embedded SportBox player
1372 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1373 if sportbox_urls:
1374 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1375
78e2b74b 1376 # Look for embedded PornHub player
65d161c4
S
1377 pornhub_url = PornHubIE._extract_url(webpage)
1378 if pornhub_url:
1379 return self.url_result(pornhub_url, 'PornHub')
1380
2bb5b6d0
S
1381 # Look for embedded XHamster player
1382 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1383 if xhamster_urls:
1384 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1385
9872d311
S
1386 # Look for embedded Tvigle player
1387 mobj = re.search(
1388 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1389 if mobj is not None:
1390 return self.url_result(mobj.group('url'), 'Tvigle')
1391
7e2ede98
JMF
1392 # Look for embedded TED player
1393 mobj = re.search(
d7cc31b6 1394 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
1395 if mobj is not None:
1396 return self.url_result(mobj.group('url'), 'TED')
1397
5c386252 1398 # Look for embedded Ustream videos
1399 mobj = re.search(
1400 r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1401 if mobj is not None:
1402 return self.url_result(mobj.group('url'), 'Ustream')
1403
893f8832
PH
1404 # Look for embedded arte.tv player
1405 mobj = re.search(
1406 r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1407 webpage)
1408 if mobj is not None:
1409 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1410
cb3ac1c6
S
1411 # Look for embedded smotri.com player
1412 smotri_url = SmotriIE._extract_url(webpage)
1413 if smotri_url:
1414 return self.url_result(smotri_url, 'Smotri')
1415
20991253
PH
1416 # Look for embeded soundcloud player
1417 mobj = re.search(
ac645ac7 1418 r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
20991253
PH
1419 webpage)
1420 if mobj is not None:
1421 url = unescapeHTML(mobj.group('url'))
1422 return self.url_result(url)
1423
826ec77f
PH
1424 # Look for embedded vulture.com player
1425 mobj = re.search(
1426 r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1427 webpage)
1428 if mobj is not None:
1429 url = unescapeHTML(mobj.group('url'))
1430 return self.url_result(url, ie='Vulture')
1431
c5cd249e
JMF
1432 # Look for embedded mtvservices player
1433 mobj = re.search(
1434 r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1435 webpage)
1436 if mobj is not None:
1437 url = unescapeHTML(mobj.group('url'))
1438 return self.url_result(url, ie='MTVServicesEmbedded')
1439
49807b4a
S
1440 # Look for embedded yahoo player
1441 mobj = re.search(
1442 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1443 webpage)
1444 if mobj is not None:
1445 return self.url_result(mobj.group('url'), 'Yahoo')
1446
2ef6fcb5
PH
1447 # Look for embedded sbs.com.au player
1448 mobj = re.search(
e98b8e79
PH
1449 r'''(?x)
1450 (?:
1451 <meta\s+property="og:video"\s+content=|
1452 <iframe[^>]+?src=
1453 )
1454 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
1455 webpage)
1456 if mobj is not None:
1457 return self.url_result(mobj.group('url'), 'SBS')
1458
42bdd9d0
PH
1459 # Look for embedded Cinchcast player
1460 mobj = re.search(
1461 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1462 webpage)
1463 if mobj is not None:
1464 return self.url_result(mobj.group('url'), 'Cinchcast')
1465
1a94ff68 1466 mobj = re.search(
5263cdfc 1467 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 1468 webpage)
8001607e
YCH
1469 if not mobj:
1470 mobj = re.search(
1471 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1472 webpage)
1a94ff68
S
1473 if mobj is not None:
1474 return self.url_result(mobj.group('url'), 'MLB')
1475
1419fafd
S
1476 mobj = re.search(
1477 r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1478 webpage)
1479 if mobj is not None:
1480 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1481
af63fed7
PH
1482 mobj = re.search(
1483 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1484 webpage)
1485 if mobj is not None:
1486 return self.url_result(mobj.group('url'), 'Livestream')
1487
255fca5e
S
1488 # Look for Zapiks embed
1489 mobj = re.search(
1490 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1491 if mobj is not None:
1492 return self.url_result(mobj.group('url'), 'Zapiks')
1493
e3216b82
NJ
1494 # Look for Kaltura embeds
1495 mobj = re.search(
1496 r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1497 if mobj is not None:
1498 return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1499
135c9c42
S
1500 # Look for Eagle.Platform embeds
1501 mobj = re.search(
1502 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1503 if mobj is not None:
1504 return self.url_result(mobj.group('url'), 'EaglePlatform')
1505
d47ae7f6
S
1506 # Look for ClipYou (uses Eagle.Platform) embeds
1507 mobj = re.search(
1508 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1509 if mobj is not None:
1510 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1511
f8388757
S
1512 # Look for Pladform embeds
1513 mobj = re.search(
1514 r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1515 if mobj is not None:
1516 return self.url_result(mobj.group('url'), 'Pladform')
1517
2dcc114f
S
1518 # Look for Playwire embeds
1519 mobj = re.search(
1520 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1521 if mobj is not None:
1522 return self.url_result(mobj.group('url'))
1523
ad320e9b
NJ
1524 # Look for 5min embeds
1525 mobj = re.search(
1526 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1527 if mobj is not None:
1528 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1529
18153f1b
S
1530 # Look for Crooks and Liars embeds
1531 mobj = re.search(
1532 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1533 if mobj is not None:
1534 return self.url_result(mobj.group('url'))
1535
a2edf2e7
YCH
1536 # Look for NBC Sports VPlayer embeds
1537 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1538 if nbc_sports_url:
1539 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1540
418c5cc3
YCH
1541 # Look for UDN embeds
1542 mobj = re.search(
1543 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1544 if mobj is not None:
1545 return self.url_result(
0a160363 1546 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 1547
2fe1b5bd
YCH
1548 # Look for Senate ISVP iframe
1549 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1550 if senate_isvp_url:
25c3a734 1551 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 1552
756f574e
YCH
1553 # Look for Dailymotion Cloud videos
1554 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1555 if dmcloud_url:
1556 return self.url_result(dmcloud_url, 'DailymotionCloud')
1557
1ac1c4c2
S
1558 # Look for OnionStudios embeds
1559 onionstudios_url = OnionStudiosIE._extract_url(webpage)
1560 if onionstudios_url:
1561 return self.url_result(onionstudios_url)
1562
eedd20ef
S
1563 # Look for SnagFilms embeds
1564 snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1565 if snagfilms_url:
1566 return self.url_result(snagfilms_url)
1567
a5158f38
YCH
1568 # Look for AdobeTVVideo embeds
1569 mobj = re.search(
1570 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1571 webpage)
1572 if mobj is not None:
1573 return self.url_result(
1574 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1575 'AdobeTVVideo')
1576
ced659bb 1577 def check_video(vurl):
a0f71985
PH
1578 if YoutubeIE.suitable(vurl):
1579 return True
ced659bb
S
1580 vpath = compat_urlparse.urlparse(vurl).path
1581 vext = determine_ext(vpath)
1582 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1583
1584 def filter_video(urls):
1585 return list(filter(check_video, urls))
1586
9b122384 1587 # Start with something easy: JW Player in SWFObject
ced659bb 1588 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 1589 if not found:
d981cef6 1590 # Look for gorilla-vid style embedding
ced659bb 1591 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
1592 (?:
1593 jw_plugins|
1594 JWPlayerOptions|
1595 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1596 )
a0f71985
PH
1597 .*?
1598 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 1599 if not found:
9b122384 1600 # Broaden the search a little bit
ced659bb 1601 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
1602 if not found:
1603 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb
S
1604 found = filter_video(re.findall(
1605 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
1606 if not found:
1607 # Flow player
ced659bb 1608 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
1609 flowplayer\("[^"]+",\s*
1610 \{[^}]+?\}\s*,
52585fd6 1611 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 1612 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 1613 ''', webpage))
501f13fb
PH
1614 if not found:
1615 # Cinerama player
1616 found = re.findall(
1617 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 1618 if not found:
9b122384 1619 # Try to find twitter cards info
ced659bb
S
1620 found = filter_video(re.findall(
1621 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 1622 if not found:
9b122384
PH
1623 # We look for Open Graph info:
1624 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 1625 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
1626 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1627 if m_video_type is not None:
ced659bb 1628 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 1629 if not found:
7fea7156 1630 # HTML5 video
9b32eca3 1631 found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
b30b8698 1632 if not found:
ed9a25dd 1633 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 1634 found = re.search(
89ef304b 1635 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 1636 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 1637 webpage)
84f81016
S
1638 if not found:
1639 # Look also in Refresh HTTP header
1640 refresh_header = head_response.headers.get('Refresh')
1641 if refresh_header:
ed9a25dd 1642 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 1643 if found:
406224be 1644 new_url = compat_urlparse.urljoin(url, found.group(1))
89ef304b
PH
1645 self.report_following_redirect(new_url)
1646 return {
1647 '_type': 'url',
1648 'url': new_url,
1649 }
b30b8698 1650 if not found:
416c7fcb 1651 raise UnsupportedError(url)
9b122384 1652
b30b8698
PH
1653 entries = []
1654 for video_url in found:
1655 video_url = compat_urlparse.urljoin(url, video_url)
1656 video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
9b122384 1657
b30b8698
PH
1658 # Sometimes, jwplayer extraction will result in a YouTube URL
1659 if YoutubeIE.suitable(video_url):
1660 entries.append(self.url_result(video_url, 'Youtube'))
1661 continue
9b122384 1662
b30b8698
PH
1663 # here's a fun little line of code for you:
1664 video_id = os.path.splitext(video_id)[0]
fc9713a1 1665
d6fd958c
YCH
1666 if determine_ext(video_url) == 'smil':
1667 entries.append({
1668 'id': video_id,
1669 'formats': self._extract_smil_formats(video_url, video_id),
1670 'uploader': video_uploader,
1671 'title': video_title,
1672 'age_limit': age_limit,
1673 })
1674 else:
1675 entries.append({
1676 'id': video_id,
1677 'url': video_url,
1678 'uploader': video_uploader,
1679 'title': video_title,
1680 'age_limit': age_limit,
1681 })
b30b8698
PH
1682
1683 if len(entries) == 1:
669f0e7c 1684 return entries[0]
b30b8698
PH
1685 else:
1686 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
1687 # 'url' results don't have a title
1688 if e.get('title') is not None:
1689 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
1690 return {
1691 '_type': 'playlist',
1692 'entries': entries,
1693 }