]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/generic.py
[ustream] Add UstreamIE._extract_url()
[yt-dlp.git] / youtube_dl / extractor / generic.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12 compat_etree_fromstring,
13 compat_urllib_parse_unquote,
14 compat_urlparse,
15 compat_xml_parse_error,
16 )
17 from ..utils import (
18 determine_ext,
19 ExtractorError,
20 float_or_none,
21 HEADRequest,
22 is_html,
23 orderedSet,
24 sanitized_Request,
25 smuggle_url,
26 unescapeHTML,
27 unified_strdate,
28 unsmuggle_url,
29 UnsupportedError,
30 xpath_text,
31 )
32 from .brightcove import (
33 BrightcoveLegacyIE,
34 BrightcoveNewIE,
35 )
36 from .nbc import NBCSportsVPlayerIE
37 from .ooyala import OoyalaIE
38 from .rutv import RUTVIE
39 from .tvc import TVCIE
40 from .sportbox import SportBoxEmbedIE
41 from .smotri import SmotriIE
42 from .myvi import MyviIE
43 from .condenast import CondeNastIE
44 from .udn import UDNEmbedIE
45 from .senateisvp import SenateISVPIE
46 from .svt import SVTIE
47 from .pornhub import PornHubIE
48 from .xhamster import XHamsterEmbedIE
49 from .tnaflix import TNAFlixNetworkEmbedIE
50 from .drtuber import DrTuberIE
51 from .redtube import RedTubeIE
52 from .vimeo import VimeoIE
53 from .dailymotion import (
54 DailymotionIE,
55 DailymotionCloudIE,
56 )
57 from .onionstudios import OnionStudiosIE
58 from .viewlift import ViewLiftEmbedIE
59 from .mtv import MTVServicesEmbeddedIE
60 from .pladform import PladformIE
61 from .videomore import VideomoreIE
62 from .webcaster import WebcasterFeedIE
63 from .googledrive import GoogleDriveIE
64 from .jwplatform import JWPlatformIE
65 from .digiteka import DigitekaIE
66 from .arkena import ArkenaIE
67 from .instagram import InstagramIE
68 from .liveleak import LiveLeakIE
69 from .threeqsdn import ThreeQSDNIE
70 from .theplatform import ThePlatformIE
71 from .vessel import VesselIE
72 from .kaltura import KalturaIE
73 from .eagleplatform import EaglePlatformIE
74 from .facebook import FacebookIE
75 from .soundcloud import SoundcloudIE
76 from .tunein import TuneInBaseIE
77 from .vbox7 import Vbox7IE
78 from .dbtv import DBTVIE
79 from .piksel import PikselIE
80 from .videa import VideaIE
81 from .twentymin import TwentyMinutenIE
82 from .ustream import UstreamIE
83
84
85 class GenericIE(InfoExtractor):
86 IE_DESC = 'Generic downloader that works on some sites'
87 _VALID_URL = r'.*'
88 IE_NAME = 'generic'
89 _TESTS = [
90 # Direct link to a video
91 {
92 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
93 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
94 'info_dict': {
95 'id': 'trailer',
96 'ext': 'mp4',
97 'title': 'trailer',
98 'upload_date': '20100513',
99 }
100 },
101 # Direct link to media delivered compressed (until Accept-Encoding is *)
102 {
103 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
104 'md5': '128c42e68b13950268b648275386fc74',
105 'info_dict': {
106 'id': 'FictionJunction-Parallel_Hearts',
107 'ext': 'flac',
108 'title': 'FictionJunction-Parallel_Hearts',
109 'upload_date': '20140522',
110 },
111 'expected_warnings': [
112 'URL could be a direct video link, returning it as such.'
113 ],
114 'skip': 'URL invalid',
115 },
116 # Direct download with broken HEAD
117 {
118 'url': 'http://ai-radio.org:8000/radio.opus',
119 'info_dict': {
120 'id': 'radio',
121 'ext': 'opus',
122 'title': 'radio',
123 },
124 'params': {
125 'skip_download': True, # infinite live stream
126 },
127 'expected_warnings': [
128 r'501.*Not Implemented',
129 r'400.*Bad Request',
130 ],
131 },
132 # Direct link with incorrect MIME type
133 {
134 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
135 'md5': '4ccbebe5f36706d85221f204d7eb5913',
136 'info_dict': {
137 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
138 'id': '5_Lennart_Poettering_-_Systemd',
139 'ext': 'webm',
140 'title': '5_Lennart_Poettering_-_Systemd',
141 'upload_date': '20141120',
142 },
143 'expected_warnings': [
144 'URL could be a direct video link, returning it as such.'
145 ]
146 },
147 # RSS feed
148 {
149 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
150 'info_dict': {
151 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
152 'title': 'Zero Punctuation',
153 'description': 're:.*groundbreaking video review series.*'
154 },
155 'playlist_mincount': 11,
156 },
157 # RSS feed with enclosure
158 {
159 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
160 'info_dict': {
161 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
162 'ext': 'm4v',
163 'upload_date': '20150228',
164 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
165 }
166 },
167 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
168 {
169 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
170 'info_dict': {
171 'id': 'smil',
172 'ext': 'mp4',
173 'title': 'Automatics, robotics and biocybernetics',
174 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
175 'upload_date': '20130627',
176 'formats': 'mincount:16',
177 'subtitles': 'mincount:1',
178 },
179 'params': {
180 'force_generic_extractor': True,
181 'skip_download': True,
182 },
183 },
184 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
185 {
186 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
187 'info_dict': {
188 'id': 'hds',
189 'ext': 'flv',
190 'title': 'hds',
191 'formats': 'mincount:1',
192 },
193 'params': {
194 'skip_download': True,
195 },
196 },
197 # SMIL from https://www.restudy.dk/video/play/id/1637
198 {
199 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
200 'info_dict': {
201 'id': 'video_1637',
202 'ext': 'flv',
203 'title': 'video_1637',
204 'formats': 'mincount:3',
205 },
206 'params': {
207 'skip_download': True,
208 },
209 },
210 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
211 {
212 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
213 'info_dict': {
214 'id': 'smil-service',
215 'ext': 'flv',
216 'title': 'smil-service',
217 'formats': 'mincount:1',
218 },
219 'params': {
220 'skip_download': True,
221 },
222 },
223 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
224 {
225 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
226 'info_dict': {
227 'id': '4719370',
228 'ext': 'mp4',
229 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
230 'formats': 'mincount:3',
231 },
232 'params': {
233 'skip_download': True,
234 },
235 },
236 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
237 {
238 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
239 'info_dict': {
240 'id': 'mZlp2ctYIUEB',
241 'ext': 'mp4',
242 'title': 'Tikibad ontruimd wegens brand',
243 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
244 'thumbnail': r're:^https?://.*\.jpg$',
245 'duration': 33,
246 },
247 'params': {
248 'skip_download': True,
249 },
250 },
251 # MPD from http://dash-mse-test.appspot.com/media.html
252 {
253 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
254 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
255 'info_dict': {
256 'id': 'car-20120827-manifest',
257 'ext': 'mp4',
258 'title': 'car-20120827-manifest',
259 'formats': 'mincount:9',
260 'upload_date': '20130904',
261 },
262 'params': {
263 'format': 'bestvideo',
264 },
265 },
266 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
267 {
268 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
269 'info_dict': {
270 'id': 'content',
271 'ext': 'mp4',
272 'title': 'content',
273 'formats': 'mincount:8',
274 },
275 'params': {
276 # m3u8 downloads
277 'skip_download': True,
278 },
279 'skip': 'video gone',
280 },
281 # m3u8 served with Content-Type: text/plain
282 {
283 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
284 'info_dict': {
285 'id': 'index',
286 'ext': 'mp4',
287 'title': 'index',
288 'upload_date': '20140720',
289 'formats': 'mincount:11',
290 },
291 'params': {
292 # m3u8 downloads
293 'skip_download': True,
294 },
295 'skip': 'video gone',
296 },
297 # google redirect
298 {
299 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
300 'info_dict': {
301 'id': 'cmQHVoWB5FY',
302 'ext': 'mp4',
303 'upload_date': '20130224',
304 'uploader_id': 'TheVerge',
305 'description': r're:^Chris Ziegler takes a look at the\.*',
306 'uploader': 'The Verge',
307 'title': 'First Firefox OS phones side-by-side',
308 },
309 'params': {
310 'skip_download': False,
311 }
312 },
313 {
314 # redirect in Refresh HTTP header
315 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
316 'info_dict': {
317 'id': 'pO8h3EaFRdo',
318 'ext': 'mp4',
319 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
320 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
321 'upload_date': '20150917',
322 'uploader_id': 'brtvofficial',
323 'uploader': 'Boiler Room',
324 },
325 'params': {
326 'skip_download': False,
327 },
328 },
329 {
330 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
331 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
332 'info_dict': {
333 'id': '13601338388002',
334 'ext': 'mp4',
335 'uploader': 'www.hodiho.fr',
336 'title': 'R\u00e9gis plante sa Jeep',
337 }
338 },
339 # bandcamp page with custom domain
340 {
341 'add_ie': ['Bandcamp'],
342 'url': 'http://bronyrock.com/track/the-pony-mash',
343 'info_dict': {
344 'id': '3235767654',
345 'ext': 'mp3',
346 'title': 'The Pony Mash',
347 'uploader': 'M_Pallante',
348 },
349 'skip': 'There is a limit of 200 free downloads / month for the test song',
350 },
351 {
352 # embedded brightcove video
353 # it also tests brightcove videos that need to set the 'Referer'
354 # in the http requests
355 'add_ie': ['BrightcoveLegacy'],
356 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
357 'info_dict': {
358 'id': '2765128793001',
359 'ext': 'mp4',
360 'title': 'Le cours de bourse : l’analyse technique',
361 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
362 'uploader': 'BFM BUSINESS',
363 },
364 'params': {
365 'skip_download': True,
366 },
367 },
368 {
369 # embedded with itemprop embedURL and video id spelled as `idVideo`
370 'add_id': ['BrightcoveLegacy'],
371 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
372 'info_dict': {
373 'id': '5255628253001',
374 'ext': 'mp4',
375 'title': 'md5:37c519b1128915607601e75a87995fc0',
376 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
377 'uploader': 'BFM BUSINESS',
378 'uploader_id': '876450612001',
379 'timestamp': 1482255315,
380 'upload_date': '20161220',
381 },
382 'params': {
383 'skip_download': True,
384 },
385 },
386 {
387 # https://github.com/rg3/youtube-dl/issues/2253
388 'url': 'http://bcove.me/i6nfkrc3',
389 'md5': '0ba9446db037002366bab3b3eb30c88c',
390 'info_dict': {
391 'id': '3101154703001',
392 'ext': 'mp4',
393 'title': 'Still no power',
394 'uploader': 'thestar.com',
395 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
396 },
397 'add_ie': ['BrightcoveLegacy'],
398 'skip': 'video gone',
399 },
400 {
401 'url': 'http://www.championat.com/video/football/v/87/87499.html',
402 'md5': 'fb973ecf6e4a78a67453647444222983',
403 'info_dict': {
404 'id': '3414141473001',
405 'ext': 'mp4',
406 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
407 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
408 'uploader': 'Championat',
409 },
410 },
411 {
412 # https://github.com/rg3/youtube-dl/issues/3541
413 'add_ie': ['BrightcoveLegacy'],
414 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
415 'info_dict': {
416 'id': '3866516442001',
417 'ext': 'mp4',
418 'title': 'Leer mij vrouwen kennen: Aflevering 1',
419 'description': 'Leer mij vrouwen kennen: Aflevering 1',
420 'uploader': 'SBS Broadcasting',
421 },
422 'skip': 'Restricted to Netherlands',
423 'params': {
424 'skip_download': True, # m3u8 download
425 },
426 },
427 {
428 # Brightcove with alternative playerID key
429 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
430 'info_dict': {
431 'id': 'nmeth.2062_SV1',
432 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
433 },
434 'playlist': [{
435 'info_dict': {
436 'id': '2228375078001',
437 'ext': 'mp4',
438 'title': 'nmeth.2062-sv1',
439 'description': 'nmeth.2062-sv1',
440 'timestamp': 1363357591,
441 'upload_date': '20130315',
442 'uploader': 'Nature Publishing Group',
443 'uploader_id': '1964492299001',
444 },
445 }],
446 },
447 # ooyala video
448 {
449 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
450 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
451 'info_dict': {
452 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
453 'ext': 'mp4',
454 'title': '2cc213299525360.mov', # that's what we get
455 'duration': 238.231,
456 },
457 'add_ie': ['Ooyala'],
458 },
459 {
460 # ooyala video embedded with http://player.ooyala.com/iframe.js
461 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
462 'info_dict': {
463 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
464 'ext': 'mp4',
465 'title': '"Steve Jobs: Man in the Machine" trailer',
466 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
467 'duration': 135.427,
468 },
469 'params': {
470 'skip_download': True,
471 },
472 'skip': 'movie expired',
473 },
474 # embed.ly video
475 {
476 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
477 'info_dict': {
478 'id': '9ODmcdjQcHQ',
479 'ext': 'mp4',
480 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
481 'upload_date': '20140225',
482 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
483 'uploader': 'Tested',
484 'uploader_id': 'testedcom',
485 },
486 # No need to test YoutubeIE here
487 'params': {
488 'skip_download': True,
489 },
490 },
491 # funnyordie embed
492 {
493 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
494 'info_dict': {
495 'id': '18e820ec3f',
496 'ext': 'mp4',
497 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
498 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
499 },
500 # HEAD requests lead to endless 301, while GET is OK
501 'expected_warnings': ['301'],
502 },
503 # RUTV embed
504 {
505 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
506 'info_dict': {
507 'id': '776940',
508 'ext': 'mp4',
509 'title': 'Охотское море стало целиком российским',
510 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
511 },
512 'params': {
513 # m3u8 download
514 'skip_download': True,
515 },
516 },
517 # TVC embed
518 {
519 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
520 'info_dict': {
521 'id': '55304',
522 'ext': 'mp4',
523 'title': 'Дошкольное воспитание',
524 },
525 },
526 # SportBox embed
527 {
528 'url': 'http://www.vestifinance.ru/articles/25753',
529 'info_dict': {
530 'id': '25753',
531 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
532 },
533 'playlist': [{
534 'info_dict': {
535 'id': '370908',
536 'title': 'Госзаказ. День 3',
537 'ext': 'mp4',
538 }
539 }, {
540 'info_dict': {
541 'id': '370905',
542 'title': 'Госзаказ. День 2',
543 'ext': 'mp4',
544 }
545 }, {
546 'info_dict': {
547 'id': '370902',
548 'title': 'Госзаказ. День 1',
549 'ext': 'mp4',
550 }
551 }],
552 'params': {
553 # m3u8 download
554 'skip_download': True,
555 },
556 },
557 # Myvi.ru embed
558 {
559 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
560 'info_dict': {
561 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
562 'ext': 'mp4',
563 'title': 'Ужастики, русский трейлер (2015)',
564 'thumbnail': r're:^https?://.*\.jpg$',
565 'duration': 153,
566 }
567 },
568 # XHamster embed
569 {
570 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
571 'info_dict': {
572 'id': 'showthread',
573 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
574 },
575 'playlist_mincount': 7,
576 # This forum does not allow <iframe> syntaxes anymore
577 # Now HTML tags are displayed as-is
578 'skip': 'No videos on this page',
579 },
580 # Embedded TED video
581 {
582 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
583 'md5': '65fdff94098e4a607385a60c5177c638',
584 'info_dict': {
585 'id': '1969',
586 'ext': 'mp4',
587 'title': 'Hidden miracles of the natural world',
588 'uploader': 'Louie Schwartzberg',
589 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
590 }
591 },
592 # Embedded Ustream video
593 {
594 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
595 'md5': '27b99cdb639c9b12a79bca876a073417',
596 'info_dict': {
597 'id': '45734260',
598 'ext': 'flv',
599 'uploader': 'AU SPA: The NSA and Privacy',
600 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
601 }
602 },
603 # nowvideo embed hidden behind percent encoding
604 {
605 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
606 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
607 'info_dict': {
608 'id': '06e53103ca9aa',
609 'ext': 'flv',
610 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
611 'description': 'No description',
612 },
613 },
614 # arte embed
615 {
616 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
617 'md5': '7653032cbb25bf6c80d80f217055fa43',
618 'info_dict': {
619 'id': '048195-004_PLUS7-F',
620 'ext': 'flv',
621 'title': 'X:enius',
622 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
623 'upload_date': '20140320',
624 },
625 'params': {
626 'skip_download': 'Requires rtmpdump'
627 },
628 'skip': 'video gone',
629 },
630 # francetv embed
631 {
632 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
633 'info_dict': {
634 'id': 'EV_30231',
635 'ext': 'mp4',
636 'title': 'Alcaline, le concert avec Calogero',
637 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
638 'upload_date': '20150226',
639 'timestamp': 1424989860,
640 'duration': 5400,
641 },
642 'params': {
643 # m3u8 downloads
644 'skip_download': True,
645 },
646 'expected_warnings': [
647 'Forbidden'
648 ]
649 },
650 # Condé Nast embed
651 {
652 'url': 'http://www.wired.com/2014/04/honda-asimo/',
653 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
654 'info_dict': {
655 'id': '53501be369702d3275860000',
656 'ext': 'mp4',
657 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
658 }
659 },
660 # Dailymotion embed
661 {
662 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
663 'md5': '441aeeb82eb72c422c7f14ec533999cd',
664 'info_dict': {
665 'id': 'k2mm4bCdJ6CQ2i7c8o2',
666 'ext': 'mp4',
667 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
668 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
669 'uploader': 'Spi0n',
670 'uploader_id': 'xgditw',
671 'upload_date': '20140425',
672 'timestamp': 1398441542,
673 },
674 'add_ie': ['Dailymotion'],
675 },
676 # YouTube embed
677 {
678 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
679 'info_dict': {
680 'id': 'FXRb4ykk4S0',
681 'ext': 'mp4',
682 'title': 'The NBL Auction 2014',
683 'uploader': 'BADMINTON England',
684 'uploader_id': 'BADMINTONEvents',
685 'upload_date': '20140603',
686 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
687 },
688 'add_ie': ['Youtube'],
689 'params': {
690 'skip_download': True,
691 }
692 },
693 # MTVSercices embed
694 {
695 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
696 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
697 'info_dict': {
698 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
699 'ext': 'mp4',
700 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
701 'description': 'Two valets share their love for movie star Liam Neesons.',
702 'timestamp': 1349922600,
703 'upload_date': '20121011',
704 },
705 },
706 # YouTube embed via <data-embed-url="">
707 {
708 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
709 'info_dict': {
710 'id': '4vAffPZIT44',
711 'ext': 'mp4',
712 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
713 'uploader': 'Gameloft',
714 'uploader_id': 'gameloft',
715 'upload_date': '20140828',
716 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
717 },
718 'params': {
719 'skip_download': True,
720 }
721 },
722 # Camtasia studio
723 {
724 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
725 'playlist': [{
726 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
727 'info_dict': {
728 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
729 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
730 'ext': 'flv',
731 'duration': 2235.90,
732 }
733 }, {
734 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
735 'info_dict': {
736 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
737 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
738 'ext': 'flv',
739 'duration': 2235.93,
740 }
741 }],
742 'info_dict': {
743 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
744 }
745 },
746 # Flowplayer
747 {
748 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
749 'md5': '9d65602bf31c6e20014319c7d07fba27',
750 'info_dict': {
751 'id': '5123ea6d5e5a7',
752 'ext': 'mp4',
753 'age_limit': 18,
754 'uploader': 'www.handjobhub.com',
755 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
756 }
757 },
758 # Multiple brightcove videos
759 # https://github.com/rg3/youtube-dl/issues/2283
760 {
761 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
762 'info_dict': {
763 'id': 'always-never',
764 'title': 'Always / Never - The New Yorker',
765 },
766 'playlist_count': 3,
767 'params': {
768 'extract_flat': False,
769 'skip_download': True,
770 }
771 },
772 # MLB embed
773 {
774 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
775 'md5': '96f09a37e44da40dd083e12d9a683327',
776 'info_dict': {
777 'id': '33322633',
778 'ext': 'mp4',
779 'title': 'Ump changes call to ball',
780 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
781 'duration': 48,
782 'timestamp': 1401537900,
783 'upload_date': '20140531',
784 'thumbnail': r're:^https?://.*\.jpg$',
785 },
786 },
787 # Wistia embed
788 {
789 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
790 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
791 'info_dict': {
792 'id': '6e2wtrbdaf',
793 'ext': 'mov',
794 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
795 'description': 'a Paywall Videos video from Remilon',
796 'duration': 644.072,
797 'uploader': 'study.com',
798 'timestamp': 1459678540,
799 'upload_date': '20160403',
800 'filesize': 24687186,
801 },
802 },
803 {
804 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
805 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
806 'info_dict': {
807 'id': 'uxjb0lwrcz',
808 'ext': 'mp4',
809 'title': 'Conversation about Hexagonal Rails Part 1',
810 'description': 'a Martin Fowler video from ThoughtWorks',
811 'duration': 1715.0,
812 'uploader': 'thoughtworks.wistia.com',
813 'timestamp': 1401832161,
814 'upload_date': '20140603',
815 },
816 },
817 # Wistia standard embed (async)
818 {
819 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
820 'info_dict': {
821 'id': '807fafadvk',
822 'ext': 'mp4',
823 'title': 'Drip Brennan Dunn Workshop',
824 'description': 'a JV Webinars video from getdrip-1',
825 'duration': 4986.95,
826 'timestamp': 1463607249,
827 'upload_date': '20160518',
828 },
829 'params': {
830 'skip_download': True,
831 }
832 },
833 # Soundcloud embed
834 {
835 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
836 'info_dict': {
837 'id': '174391317',
838 'ext': 'mp3',
839 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
840 'uploader': 'Sophos Security',
841 'title': 'Chet Chat 171 - Oct 29, 2014',
842 'upload_date': '20141029',
843 }
844 },
845 # Soundcloud multiple embeds
846 {
847 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
848 'info_dict': {
849 'id': '52809',
850 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
851 },
852 'playlist_mincount': 7,
853 },
854 # TuneIn station embed
855 {
856 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
857 'info_dict': {
858 'id': '204146',
859 'ext': 'mp3',
860 'title': 'CNRV',
861 'location': 'Paris, France',
862 'is_live': True,
863 },
864 'params': {
865 # Live stream
866 'skip_download': True,
867 },
868 },
869 # Livestream embed
870 {
871 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
872 'info_dict': {
873 'id': '67864563',
874 'ext': 'flv',
875 'upload_date': '20141112',
876 'title': 'Rosetta #CometLanding webcast HL 10',
877 }
878 },
879 # Another Livestream embed, without 'new.' in URL
880 {
881 'url': 'https://www.freespeech.org/',
882 'info_dict': {
883 'id': '123537347',
884 'ext': 'mp4',
885 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
886 },
887 'params': {
888 # Live stream
889 'skip_download': True,
890 },
891 },
892 # LazyYT
893 {
894 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
895 'info_dict': {
896 'id': '1986',
897 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
898 },
899 'playlist_mincount': 2,
900 },
901 # Cinchcast embed
902 {
903 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
904 'info_dict': {
905 'id': '7141703',
906 'ext': 'mp3',
907 'upload_date': '20141126',
908 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
909 }
910 },
911 # Cinerama player
912 {
913 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
914 'info_dict': {
915 'id': '730m_DandD_1901_512k',
916 'ext': 'mp4',
917 'uploader': 'www.abc.net.au',
918 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
919 }
920 },
921 # embedded viddler video
922 {
923 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
924 'info_dict': {
925 'id': '4d03aad9',
926 'ext': 'mp4',
927 'uploader': 'deadspin',
928 'title': 'WALL-TO-GORTAT',
929 'timestamp': 1422285291,
930 'upload_date': '20150126',
931 },
932 'add_ie': ['Viddler'],
933 },
934 # Libsyn embed
935 {
936 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
937 'info_dict': {
938 'id': '3377616',
939 'ext': 'mp3',
940 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
941 'description': 'md5:601cb790edd05908957dae8aaa866465',
942 'upload_date': '20150220',
943 },
944 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
945 },
946 # jwplayer YouTube
947 {
948 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
949 'info_dict': {
950 'id': 'Mrj4DVp2zeA',
951 'ext': 'mp4',
952 'upload_date': '20150212',
953 'uploader': 'The National Archives UK',
954 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
955 'uploader_id': 'NationalArchives08',
956 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
957 },
958 },
959 # rtl.nl embed
960 {
961 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
962 'playlist_mincount': 5,
963 'info_dict': {
964 'id': 'aanslagen-kopenhagen',
965 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
966 }
967 },
968 # Zapiks embed
969 {
970 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
971 'info_dict': {
972 'id': '118046',
973 'ext': 'mp4',
974 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
975 }
976 },
977 # Kaltura embed (different embed code)
978 {
979 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
980 'info_dict': {
981 'id': '1_a52wc67y',
982 'ext': 'flv',
983 'upload_date': '20150127',
984 'uploader_id': 'PremierMedia',
985 'timestamp': int,
986 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
987 },
988 },
989 # Kaltura embed protected with referrer
990 {
991 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
992 'info_dict': {
993 'id': '1_g4fbemnq',
994 'ext': 'mp4',
995 'title': 'Violetta - Achter De Schermen - Ruggero',
996 'description': 'Achter de schermen met Ruggero',
997 'timestamp': 1435133761,
998 'upload_date': '20150624',
999 'uploader_id': 'echojecka',
1000 },
1001 },
1002 # Kaltura embed with single quotes
1003 {
1004 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1005 'info_dict': {
1006 'id': '0_izeg5utt',
1007 'ext': 'mp4',
1008 'title': '35871',
1009 'timestamp': 1355743100,
1010 'upload_date': '20121217',
1011 'uploader_id': 'batchUser',
1012 },
1013 'add_ie': ['Kaltura'],
1014 },
1015 {
1016 # Kaltura embedded via quoted entry_id
1017 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1018 'info_dict': {
1019 'id': '0_utuok90b',
1020 'ext': 'mp4',
1021 'title': '06_matthew_brender_raj_dutt',
1022 'timestamp': 1466638791,
1023 'upload_date': '20160622',
1024 },
1025 'add_ie': ['Kaltura'],
1026 'expected_warnings': [
1027 'Could not send HEAD request'
1028 ],
1029 'params': {
1030 'skip_download': True,
1031 }
1032 },
1033 {
1034 # Kaltura embedded, some fileExt broken (#11480)
1035 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1036 'info_dict': {
1037 'id': '1_sgtvehim',
1038 'ext': 'mp4',
1039 'title': 'Our "Standard Models" of particle physics and cosmology',
1040 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1041 'timestamp': 1321158993,
1042 'upload_date': '20111113',
1043 'uploader_id': 'kps1',
1044 },
1045 'add_ie': ['Kaltura'],
1046 },
1047 # Eagle.Platform embed (generic URL)
1048 {
1049 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1050 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1051 'info_dict': {
1052 'id': '227304',
1053 'ext': 'mp4',
1054 'title': 'Навальный вышел на свободу',
1055 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1056 'thumbnail': r're:^https?://.*\.jpg$',
1057 'duration': 87,
1058 'view_count': int,
1059 'age_limit': 0,
1060 },
1061 },
1062 # ClipYou (Eagle.Platform) embed (custom URL)
1063 {
1064 'url': 'http://muz-tv.ru/play/7129/',
1065 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1066 'info_dict': {
1067 'id': '12820',
1068 'ext': 'mp4',
1069 'title': "'O Sole Mio",
1070 'thumbnail': r're:^https?://.*\.jpg$',
1071 'duration': 216,
1072 'view_count': int,
1073 },
1074 },
1075 # Pladform embed
1076 {
1077 'url': 'http://muz-tv.ru/kinozal/view/7400/',
1078 'info_dict': {
1079 'id': '100183293',
1080 'ext': 'mp4',
1081 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1082 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1083 'thumbnail': r're:^https?://.*\.jpg$',
1084 'duration': 694,
1085 'age_limit': 0,
1086 },
1087 },
1088 # Playwire embed
1089 {
1090 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1091 'info_dict': {
1092 'id': '3519514',
1093 'ext': 'mp4',
1094 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1095 'thumbnail': r're:^https?://.*\.png$',
1096 'duration': 45.115,
1097 },
1098 },
1099 # 5min embed
1100 {
1101 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1102 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1103 'info_dict': {
1104 'id': '518726732',
1105 'ext': 'mp4',
1106 'title': 'Facebook Creates "On This Day" | Crunch Report',
1107 },
1108 },
1109 # SVT embed
1110 {
1111 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1112 'info_dict': {
1113 'id': '2900353',
1114 'ext': 'flv',
1115 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1116 'duration': 27,
1117 'age_limit': 0,
1118 },
1119 },
1120 # Crooks and Liars embed
1121 {
1122 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1123 'info_dict': {
1124 'id': '8RUoRhRi',
1125 'ext': 'mp4',
1126 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1127 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1128 'timestamp': 1428207000,
1129 'upload_date': '20150405',
1130 'uploader': 'Heather',
1131 },
1132 },
1133 # Crooks and Liars external embed
1134 {
1135 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1136 'info_dict': {
1137 'id': 'MTE3MjUtMzQ2MzA',
1138 'ext': 'mp4',
1139 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1140 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1141 'timestamp': 1265032391,
1142 'upload_date': '20100201',
1143 'uploader': 'Heather',
1144 },
1145 },
1146 # NBC Sports vplayer embed
1147 {
1148 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1149 'info_dict': {
1150 'id': 'ln7x1qSThw4k',
1151 'ext': 'flv',
1152 'title': "PFT Live: New leader in the 'new-look' defense",
1153 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1154 'uploader': 'NBCU-SPORTS',
1155 'upload_date': '20140107',
1156 'timestamp': 1389118457,
1157 },
1158 },
1159 # NBC News embed
1160 {
1161 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1162 'md5': '1aa589c675898ae6d37a17913cf68d66',
1163 'info_dict': {
1164 'id': '701714499682',
1165 'ext': 'mp4',
1166 'title': 'PREVIEW: On Assignment: David Letterman',
1167 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1168 },
1169 },
1170 # UDN embed
1171 {
1172 'url': 'https://video.udn.com/news/300346',
1173 'md5': 'fd2060e988c326991037b9aff9df21a6',
1174 'info_dict': {
1175 'id': '300346',
1176 'ext': 'mp4',
1177 'title': '中一中男師變性 全校師生力挺',
1178 'thumbnail': r're:^https?://.*\.jpg$',
1179 },
1180 'params': {
1181 # m3u8 download
1182 'skip_download': True,
1183 },
1184 },
1185 # Ooyala embed
1186 {
1187 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1188 'info_dict': {
1189 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1190 'ext': 'mp4',
1191 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1192 'title': 'This is what separates the Excel masters from the wannabes',
1193 'duration': 191.933,
1194 },
1195 'params': {
1196 # m3u8 downloads
1197 'skip_download': True,
1198 }
1199 },
1200 # Brightcove URL in single quotes
1201 {
1202 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1203 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1204 'info_dict': {
1205 'id': '4255764656001',
1206 'ext': 'mp4',
1207 'title': 'SN Presents: Russell Martin, World Citizen',
1208 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1209 'uploader': 'Rogers Sportsnet',
1210 'uploader_id': '1704050871',
1211 'upload_date': '20150525',
1212 'timestamp': 1432570283,
1213 },
1214 },
1215 # Dailymotion Cloud video
1216 {
1217 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1218 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1219 'info_dict': {
1220 'id': 'x2uy8t3',
1221 'ext': 'mp4',
1222 'title': 'Sauvons les abeilles ! - Le débat',
1223 'description': 'md5:d9082128b1c5277987825d684939ca26',
1224 'thumbnail': r're:^https?://.*\.jpe?g$',
1225 'timestamp': 1434970506,
1226 'upload_date': '20150622',
1227 'uploader': 'Public Sénat',
1228 'uploader_id': 'xa9gza',
1229 }
1230 },
1231 # OnionStudios embed
1232 {
1233 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1234 'info_dict': {
1235 'id': '2855',
1236 'ext': 'mp4',
1237 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1238 'thumbnail': r're:^https?://.*\.jpe?g$',
1239 'uploader': 'ClickHole',
1240 'uploader_id': 'clickhole',
1241 }
1242 },
1243 # SnagFilms embed
1244 {
1245 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1246 'info_dict': {
1247 'id': '74849a00-85a9-11e1-9660-123139220831',
1248 'ext': 'mp4',
1249 'title': '#whilewewatch',
1250 }
1251 },
1252 # AdobeTVVideo embed
1253 {
1254 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1255 'md5': '43662b577c018ad707a63766462b1e87',
1256 'info_dict': {
1257 'id': '2456',
1258 'ext': 'mp4',
1259 'title': 'New experience with Acrobat DC',
1260 'description': 'New experience with Acrobat DC',
1261 'duration': 248.667,
1262 },
1263 },
1264 # BrightcoveInPageEmbed embed
1265 {
1266 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1267 'info_dict': {
1268 'id': '4238694884001',
1269 'ext': 'flv',
1270 'title': 'Tabletop: Dread, Last Thoughts',
1271 'description': 'Tabletop: Dread, Last Thoughts',
1272 'duration': 51690,
1273 },
1274 },
1275 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1276 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1277 {
1278 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1279 'info_dict': {
1280 'id': '4785848093001',
1281 'ext': 'mp4',
1282 'title': 'The Cardinal Pell Interview',
1283 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1284 'uploader': 'GlobeCast Australia - GlobeStream',
1285 'uploader_id': '2733773828001',
1286 'upload_date': '20160304',
1287 'timestamp': 1457083087,
1288 },
1289 'params': {
1290 # m3u8 downloads
1291 'skip_download': True,
1292 },
1293 },
1294 # Another form of arte.tv embed
1295 {
1296 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1297 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1298 'info_dict': {
1299 'id': '030273-562_PLUS7-F',
1300 'ext': 'mp4',
1301 'title': 'ARTE Reportage - Nulle part, en France',
1302 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1303 'upload_date': '20160409',
1304 },
1305 },
1306 # LiveLeak embed
1307 {
1308 'url': 'http://www.wykop.pl/link/3088787/',
1309 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1310 'info_dict': {
1311 'id': '874_1459135191',
1312 'ext': 'mp4',
1313 'title': 'Man shows poor quality of new apartment building',
1314 'description': 'The wall is like a sand pile.',
1315 'uploader': 'Lake8737',
1316 }
1317 },
1318 # Duplicated embedded video URLs
1319 {
1320 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1321 'info_dict': {
1322 'id': '149298443_480_16c25b74_2',
1323 'ext': 'mp4',
1324 'title': 'vs. Blue Orange Spring Game',
1325 'uploader': 'www.hudl.com',
1326 },
1327 },
1328 # twitter:player:stream embed
1329 {
1330 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1331 'info_dict': {
1332 'id': 'master',
1333 'ext': 'mp4',
1334 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1335 'uploader': 'www.rtl.be',
1336 },
1337 'params': {
1338 # m3u8 downloads
1339 'skip_download': True,
1340 },
1341 },
1342 # twitter:player embed
1343 {
1344 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1345 'md5': 'a3e0df96369831de324f0778e126653c',
1346 'info_dict': {
1347 'id': '4909620399001',
1348 'ext': 'mp4',
1349 'title': 'What Do Black Holes Sound Like?',
1350 'description': 'what do black holes sound like',
1351 'upload_date': '20160524',
1352 'uploader_id': '29913724001',
1353 'timestamp': 1464107587,
1354 'uploader': 'TheAtlantic',
1355 },
1356 'add_ie': ['BrightcoveLegacy'],
1357 },
1358 # Facebook <iframe> embed
1359 {
1360 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1361 'md5': 'fbcde74f534176ecb015849146dd3aee',
1362 'info_dict': {
1363 'id': '599637780109885',
1364 'ext': 'mp4',
1365 'title': 'Facebook video #599637780109885',
1366 },
1367 },
1368 # Facebook API embed
1369 {
1370 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1371 'md5': 'a47372ee61b39a7b90287094d447d94e',
1372 'info_dict': {
1373 'id': '10153467542406923',
1374 'ext': 'mp4',
1375 'title': 'Facebook video #10153467542406923',
1376 },
1377 },
1378 # Wordpress "YouTube Video Importer" plugin
1379 {
1380 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1381 'md5': 'd16797741b560b485194eddda8121b48',
1382 'info_dict': {
1383 'id': 'HNTXWDXV9Is',
1384 'ext': 'mp4',
1385 'title': 'Blue Devils Drumline Stanford lot 2016',
1386 'upload_date': '20160627',
1387 'uploader_id': 'GENOCIDE8GENERAL10',
1388 'uploader': 'cylus cyrus',
1389 },
1390 },
1391 {
1392 # video stored on custom kaltura server
1393 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1394 'md5': '537617d06e64dfed891fa1593c4b30cc',
1395 'info_dict': {
1396 'id': '0_1iotm5bh',
1397 'ext': 'mp4',
1398 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1399 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1400 'uploader_id': 'videos.expansion@el-mundo.net',
1401 'upload_date': '20150429',
1402 'timestamp': 1430303472,
1403 },
1404 'add_ie': ['Kaltura'],
1405 },
1406 {
1407 # Non-standard Vimeo embed
1408 'url': 'https://openclassrooms.com/courses/understanding-the-web',
1409 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1410 'info_dict': {
1411 'id': '148867247',
1412 'ext': 'mp4',
1413 'title': 'Understanding the web - Teaser',
1414 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1415 'upload_date': '20151214',
1416 'uploader': 'OpenClassrooms',
1417 'uploader_id': 'openclassrooms',
1418 },
1419 'add_ie': ['Vimeo'],
1420 },
1421 {
1422 # generic vimeo embed that requires original URL passed as Referer
1423 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1424 'only_matching': True,
1425 },
1426 {
1427 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1428 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1429 'info_dict': {
1430 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1431 'ext': 'mp4',
1432 'title': 'Big Buck Bunny',
1433 'description': 'Royalty free test video',
1434 'timestamp': 1432816365,
1435 'upload_date': '20150528',
1436 'is_live': False,
1437 },
1438 'params': {
1439 'skip_download': True,
1440 },
1441 'add_ie': [ArkenaIE.ie_key()],
1442 },
1443 {
1444 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1445 'info_dict': {
1446 'id': '1c7141f46c',
1447 'ext': 'mp4',
1448 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1449 },
1450 'params': {
1451 'skip_download': True,
1452 },
1453 'add_ie': [Vbox7IE.ie_key()],
1454 },
1455 {
1456 # DBTV embeds
1457 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1458 'info_dict': {
1459 'id': '43254897',
1460 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1461 },
1462 'playlist_mincount': 3,
1463 },
1464 {
1465 # Videa embeds
1466 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1467 'info_dict': {
1468 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1469 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1470 },
1471 'playlist_mincount': 2,
1472 },
1473 {
1474 # 20 minuten embed
1475 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1476 'info_dict': {
1477 'id': '523629',
1478 'ext': 'mp4',
1479 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1480 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1481 },
1482 'params': {
1483 'skip_download': True,
1484 },
1485 'add_ie': [TwentyMinutenIE.ie_key()],
1486 }
1487 # {
1488 # # TODO: find another test
1489 # # http://schema.org/VideoObject
1490 # 'url': 'https://flipagram.com/f/nyvTSJMKId',
1491 # 'md5': '888dcf08b7ea671381f00fab74692755',
1492 # 'info_dict': {
1493 # 'id': 'nyvTSJMKId',
1494 # 'ext': 'mp4',
1495 # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1496 # 'description': '#love for cats.',
1497 # 'timestamp': 1461244995,
1498 # 'upload_date': '20160421',
1499 # },
1500 # 'params': {
1501 # 'force_generic_extractor': True,
1502 # },
1503 # }
1504 ]
1505
1506 def report_following_redirect(self, new_url):
1507 """Report information extraction."""
1508 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1509
1510 def _extract_rss(self, url, video_id, doc):
1511 playlist_title = doc.find('./channel/title').text
1512 playlist_desc_el = doc.find('./channel/description')
1513 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1514
1515 entries = []
1516 for it in doc.findall('./channel/item'):
1517 next_url = xpath_text(it, 'link', fatal=False)
1518 if not next_url:
1519 enclosure_nodes = it.findall('./enclosure')
1520 for e in enclosure_nodes:
1521 next_url = e.attrib.get('url')
1522 if next_url:
1523 break
1524
1525 if not next_url:
1526 continue
1527
1528 entries.append({
1529 '_type': 'url',
1530 'url': next_url,
1531 'title': it.find('title').text,
1532 })
1533
1534 return {
1535 '_type': 'playlist',
1536 'id': url,
1537 'title': playlist_title,
1538 'description': playlist_desc,
1539 'entries': entries,
1540 }
1541
1542 def _extract_camtasia(self, url, video_id, webpage):
1543 """ Returns None if no camtasia video can be found. """
1544
1545 camtasia_cfg = self._search_regex(
1546 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1547 webpage, 'camtasia configuration file', default=None)
1548 if camtasia_cfg is None:
1549 return None
1550
1551 title = self._html_search_meta('DC.title', webpage, fatal=True)
1552
1553 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1554 camtasia_cfg = self._download_xml(
1555 camtasia_url, video_id,
1556 note='Downloading camtasia configuration',
1557 errnote='Failed to download camtasia configuration')
1558 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1559
1560 entries = []
1561 for n in fileset_node.getchildren():
1562 url_n = n.find('./uri')
1563 if url_n is None:
1564 continue
1565
1566 entries.append({
1567 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1568 'title': '%s - %s' % (title, n.tag),
1569 'url': compat_urlparse.urljoin(url, url_n.text),
1570 'duration': float_or_none(n.find('./duration').text),
1571 })
1572
1573 return {
1574 '_type': 'playlist',
1575 'entries': entries,
1576 'title': title,
1577 }
1578
1579 def _real_extract(self, url):
1580 if url.startswith('//'):
1581 return {
1582 '_type': 'url',
1583 'url': self.http_scheme() + url,
1584 }
1585
1586 parsed_url = compat_urlparse.urlparse(url)
1587 if not parsed_url.scheme:
1588 default_search = self._downloader.params.get('default_search')
1589 if default_search is None:
1590 default_search = 'fixup_error'
1591
1592 if default_search in ('auto', 'auto_warning', 'fixup_error'):
1593 if '/' in url:
1594 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1595 return self.url_result('http://' + url)
1596 elif default_search != 'fixup_error':
1597 if default_search == 'auto_warning':
1598 if re.match(r'^(?:url|URL)$', url):
1599 raise ExtractorError(
1600 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
1601 expected=True)
1602 else:
1603 self._downloader.report_warning(
1604 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
1605 return self.url_result('ytsearch:' + url)
1606
1607 if default_search in ('error', 'fixup_error'):
1608 raise ExtractorError(
1609 '%r is not a valid URL. '
1610 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
1611 % (url, url), expected=True)
1612 else:
1613 if ':' not in default_search:
1614 default_search += ':'
1615 return self.url_result(default_search + url)
1616
1617 url, smuggled_data = unsmuggle_url(url)
1618 force_videoid = None
1619 is_intentional = smuggled_data and smuggled_data.get('to_generic')
1620 if smuggled_data and 'force_videoid' in smuggled_data:
1621 force_videoid = smuggled_data['force_videoid']
1622 video_id = force_videoid
1623 else:
1624 video_id = self._generic_id(url)
1625
1626 self.to_screen('%s: Requesting header' % video_id)
1627
1628 head_req = HEADRequest(url)
1629 head_response = self._request_webpage(
1630 head_req, video_id,
1631 note=False, errnote='Could not send HEAD request to %s' % url,
1632 fatal=False)
1633
1634 if head_response is not False:
1635 # Check for redirect
1636 new_url = head_response.geturl()
1637 if url != new_url:
1638 self.report_following_redirect(new_url)
1639 if force_videoid:
1640 new_url = smuggle_url(
1641 new_url, {'force_videoid': force_videoid})
1642 return self.url_result(new_url)
1643
1644 full_response = None
1645 if head_response is False:
1646 request = sanitized_Request(url)
1647 request.add_header('Accept-Encoding', '*')
1648 full_response = self._request_webpage(request, video_id)
1649 head_response = full_response
1650
1651 info_dict = {
1652 'id': video_id,
1653 'title': self._generic_title(url),
1654 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1655 }
1656
1657 # Check for direct link to a video
1658 content_type = head_response.headers.get('Content-Type', '').lower()
1659 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1660 if m:
1661 format_id = m.group('format_id')
1662 if format_id.endswith('mpegurl'):
1663 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1664 elif format_id == 'f4m':
1665 formats = self._extract_f4m_formats(url, video_id)
1666 else:
1667 formats = [{
1668 'format_id': m.group('format_id'),
1669 'url': url,
1670 'vcodec': 'none' if m.group('type') == 'audio' else None
1671 }]
1672 info_dict['direct'] = True
1673 self._sort_formats(formats)
1674 info_dict['formats'] = formats
1675 return info_dict
1676
1677 if not self._downloader.params.get('test', False) and not is_intentional:
1678 force = self._downloader.params.get('force_generic_extractor', False)
1679 self._downloader.report_warning(
1680 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1681
1682 if not full_response:
1683 request = sanitized_Request(url)
1684 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1685 # making it impossible to download only chunk of the file (yet we need only 512kB to
1686 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1687 # that will always result in downloading the whole file that is not desirable.
1688 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1689 # to accept raw bytes and being able to download only a chunk.
1690 # It may probably better to solve this by checking Content-Type for application/octet-stream
1691 # after HEAD request finishes, but not sure if we can rely on this.
1692 request.add_header('Accept-Encoding', '*')
1693 full_response = self._request_webpage(request, video_id)
1694
1695 first_bytes = full_response.read(512)
1696
1697 # Is it an M3U playlist?
1698 if first_bytes.startswith(b'#EXTM3U'):
1699 info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1700 self._sort_formats(info_dict['formats'])
1701 return info_dict
1702
1703 # Maybe it's a direct link to a video?
1704 # Be careful not to download the whole thing!
1705 if not is_html(first_bytes):
1706 self._downloader.report_warning(
1707 'URL could be a direct video link, returning it as such.')
1708 info_dict.update({
1709 'direct': True,
1710 'url': url,
1711 })
1712 return info_dict
1713
1714 webpage = self._webpage_read_content(
1715 full_response, url, video_id, prefix=first_bytes)
1716
1717 self.report_extraction(video_id)
1718
1719 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1720 try:
1721 doc = compat_etree_fromstring(webpage.encode('utf-8'))
1722 if doc.tag == 'rss':
1723 return self._extract_rss(url, video_id, doc)
1724 elif doc.tag == 'SmoothStreamingMedia':
1725 info_dict['formats'] = self._parse_ism_formats(doc, url)
1726 self._sort_formats(info_dict['formats'])
1727 return info_dict
1728 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1729 smil = self._parse_smil(doc, url, video_id)
1730 self._sort_formats(smil['formats'])
1731 return smil
1732 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1733 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1734 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1735 info_dict['formats'] = self._parse_mpd_formats(
1736 doc, video_id,
1737 mpd_base_url=full_response.geturl().rpartition('/')[0],
1738 mpd_url=url)
1739 self._sort_formats(info_dict['formats'])
1740 return info_dict
1741 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1742 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1743 self._sort_formats(info_dict['formats'])
1744 return info_dict
1745 except compat_xml_parse_error:
1746 pass
1747
1748 # Is it a Camtasia project?
1749 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1750 if camtasia_res is not None:
1751 return camtasia_res
1752
1753 # Sometimes embedded video player is hidden behind percent encoding
1754 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1755 # Unescaping the whole page allows to handle those cases in a generic way
1756 webpage = compat_urllib_parse_unquote(webpage)
1757
1758 # it's tempting to parse this further, but you would
1759 # have to take into account all the variations like
1760 # Video Title - Site Name
1761 # Site Name | Video Title
1762 # Video Title - Tagline | Site Name
1763 # and so on and so forth; it's just not practical
1764 video_title = self._og_search_title(
1765 webpage, default=None) or self._html_search_regex(
1766 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1767 default='video')
1768
1769 # Try to detect age limit automatically
1770 age_limit = self._rta_search(webpage)
1771 # And then there are the jokers who advertise that they use RTA,
1772 # but actually don't.
1773 AGE_LIMIT_MARKERS = [
1774 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1775 ]
1776 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1777 age_limit = 18
1778
1779 # video uploader is domain name
1780 video_uploader = self._search_regex(
1781 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1782
1783 video_description = self._og_search_description(webpage, default=None)
1784 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1785
1786 # Helper method
1787 def _playlist_from_matches(matches, getter=None, ie=None):
1788 urlrs = orderedSet(
1789 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1790 for m in matches)
1791 return self.playlist_result(
1792 urlrs, playlist_id=video_id, playlist_title=video_title)
1793
1794 # Look for Brightcove Legacy Studio embeds
1795 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1796 if bc_urls:
1797 self.to_screen('Brightcove video detected.')
1798 entries = [{
1799 '_type': 'url',
1800 'url': smuggle_url(bc_url, {'Referer': url}),
1801 'ie_key': 'BrightcoveLegacy'
1802 } for bc_url in bc_urls]
1803
1804 return {
1805 '_type': 'playlist',
1806 'title': video_title,
1807 'id': video_id,
1808 'entries': entries,
1809 }
1810
1811 # Look for Brightcove New Studio embeds
1812 bc_urls = BrightcoveNewIE._extract_urls(webpage)
1813 if bc_urls:
1814 return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1815
1816 # Look for ThePlatform embeds
1817 tp_urls = ThePlatformIE._extract_urls(webpage)
1818 if tp_urls:
1819 return _playlist_from_matches(tp_urls, ie='ThePlatform')
1820
1821 # Look for Vessel embeds
1822 vessel_urls = VesselIE._extract_urls(webpage)
1823 if vessel_urls:
1824 return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
1825
1826 # Look for embedded rtl.nl player
1827 matches = re.findall(
1828 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1829 webpage)
1830 if matches:
1831 return _playlist_from_matches(matches, ie='RtlNl')
1832
1833 vimeo_urls = VimeoIE._extract_urls(url, webpage)
1834 if vimeo_urls:
1835 return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
1836
1837 vid_me_embed_url = self._search_regex(
1838 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1839 webpage, 'vid.me embed', default=None)
1840 if vid_me_embed_url is not None:
1841 return self.url_result(vid_me_embed_url, 'Vidme')
1842
1843 # Look for embedded YouTube player
1844 matches = re.findall(r'''(?x)
1845 (?:
1846 <iframe[^>]+?src=|
1847 data-video-url=|
1848 <embed[^>]+?src=|
1849 embedSWF\(?:\s*|
1850 new\s+SWFObject\(
1851 )
1852 (["\'])
1853 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1854 (?:embed|v|p)/.+?)
1855 \1''', webpage)
1856 if matches:
1857 return _playlist_from_matches(
1858 matches, lambda m: unescapeHTML(m[1]))
1859
1860 # Look for lazyYT YouTube embed
1861 matches = re.findall(
1862 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1863 if matches:
1864 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1865
1866 # Look for Wordpress "YouTube Video Importer" plugin
1867 matches = re.findall(r'''(?x)<div[^>]+
1868 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1869 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1870 if matches:
1871 return _playlist_from_matches(matches, lambda m: m[-1])
1872
1873 matches = DailymotionIE._extract_urls(webpage)
1874 if matches:
1875 return _playlist_from_matches(matches)
1876
1877 # Look for embedded Dailymotion playlist player (#3822)
1878 m = re.search(
1879 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1880 if m:
1881 playlists = re.findall(
1882 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1883 if playlists:
1884 return _playlist_from_matches(
1885 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1886
1887 # Look for embedded Wistia player
1888 match = re.search(
1889 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1890 if match:
1891 embed_url = self._proto_relative_url(
1892 unescapeHTML(match.group('url')))
1893 return {
1894 '_type': 'url_transparent',
1895 'url': embed_url,
1896 'ie_key': 'Wistia',
1897 'uploader': video_uploader,
1898 }
1899
1900 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1901 if match:
1902 return {
1903 '_type': 'url_transparent',
1904 'url': 'wistia:%s' % match.group('id'),
1905 'ie_key': 'Wistia',
1906 'uploader': video_uploader,
1907 }
1908
1909 match = re.search(
1910 r'''(?sx)
1911 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1912 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1913 ''', webpage)
1914 if match:
1915 return self.url_result(self._proto_relative_url(
1916 'wistia:%s' % match.group('id')), 'Wistia')
1917
1918 # Look for SVT player
1919 svt_url = SVTIE._extract_url(webpage)
1920 if svt_url:
1921 return self.url_result(svt_url, 'SVT')
1922
1923 # Look for embedded condenast player
1924 matches = re.findall(
1925 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1926 webpage)
1927 if matches:
1928 return {
1929 '_type': 'playlist',
1930 'entries': [{
1931 '_type': 'url',
1932 'ie_key': 'CondeNast',
1933 'url': ma,
1934 } for ma in matches],
1935 'title': video_title,
1936 'id': video_id,
1937 }
1938
1939 # Look for Bandcamp pages with custom domain
1940 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1941 if mobj is not None:
1942 burl = unescapeHTML(mobj.group(1))
1943 # Don't set the extractor because it can be a track url or an album
1944 return self.url_result(burl)
1945
1946 # Look for embedded Vevo player
1947 mobj = re.search(
1948 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1949 if mobj is not None:
1950 return self.url_result(mobj.group('url'))
1951
1952 # Look for embedded Viddler player
1953 mobj = re.search(
1954 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1955 webpage)
1956 if mobj is not None:
1957 return self.url_result(mobj.group('url'))
1958
1959 # Look for NYTimes player
1960 mobj = re.search(
1961 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1962 webpage)
1963 if mobj is not None:
1964 return self.url_result(mobj.group('url'))
1965
1966 # Look for Libsyn player
1967 mobj = re.search(
1968 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1969 if mobj is not None:
1970 return self.url_result(mobj.group('url'))
1971
1972 # Look for Ooyala videos
1973 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1974 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1975 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1976 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1977 if mobj is not None:
1978 embed_token = self._search_regex(
1979 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
1980 webpage, 'ooyala embed token', default=None)
1981 return OoyalaIE._build_url_result(smuggle_url(
1982 mobj.group('ec'), {
1983 'domain': url,
1984 'embed_token': embed_token,
1985 }))
1986
1987 # Look for multiple Ooyala embeds on SBN network websites
1988 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1989 if mobj is not None:
1990 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1991 if embeds:
1992 return _playlist_from_matches(
1993 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1994
1995 # Look for Aparat videos
1996 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1997 if mobj is not None:
1998 return self.url_result(mobj.group(1), 'Aparat')
1999
2000 # Look for MPORA videos
2001 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2002 if mobj is not None:
2003 return self.url_result(mobj.group(1), 'Mpora')
2004
2005 # Look for embedded NovaMov-based player
2006 mobj = re.search(
2007 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2008 (?P<url>http://(?:(?:embed|www)\.)?
2009 (?:novamov\.com|
2010 nowvideo\.(?:ch|sx|eu|at|ag|co)|
2011 videoweed\.(?:es|com)|
2012 movshare\.(?:net|sx|ag)|
2013 divxstage\.(?:eu|net|ch|co|at|ag))
2014 /embed\.php.+?)\1''', webpage)
2015 if mobj is not None:
2016 return self.url_result(mobj.group('url'))
2017
2018 # Look for embedded Facebook player
2019 facebook_url = FacebookIE._extract_url(webpage)
2020 if facebook_url is not None:
2021 return self.url_result(facebook_url, 'Facebook')
2022
2023 # Look for embedded VK player
2024 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2025 if mobj is not None:
2026 return self.url_result(mobj.group('url'), 'VK')
2027
2028 # Look for embedded Odnoklassniki player
2029 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2030 if mobj is not None:
2031 return self.url_result(mobj.group('url'), 'Odnoklassniki')
2032
2033 # Look for embedded ivi player
2034 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2035 if mobj is not None:
2036 return self.url_result(mobj.group('url'), 'Ivi')
2037
2038 # Look for embedded Huffington Post player
2039 mobj = re.search(
2040 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2041 if mobj is not None:
2042 return self.url_result(mobj.group('url'), 'HuffPost')
2043
2044 # Look for embed.ly
2045 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2046 if mobj is not None:
2047 return self.url_result(mobj.group('url'))
2048 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2049 if mobj is not None:
2050 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2051
2052 # Look for funnyordie embed
2053 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2054 if matches:
2055 return _playlist_from_matches(
2056 matches, getter=unescapeHTML, ie='FunnyOrDie')
2057
2058 # Look for BBC iPlayer embed
2059 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2060 if matches:
2061 return _playlist_from_matches(matches, ie='BBCCoUk')
2062
2063 # Look for embedded RUTV player
2064 rutv_url = RUTVIE._extract_url(webpage)
2065 if rutv_url:
2066 return self.url_result(rutv_url, 'RUTV')
2067
2068 # Look for embedded TVC player
2069 tvc_url = TVCIE._extract_url(webpage)
2070 if tvc_url:
2071 return self.url_result(tvc_url, 'TVC')
2072
2073 # Look for embedded SportBox player
2074 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2075 if sportbox_urls:
2076 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
2077
2078 # Look for embedded XHamster player
2079 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2080 if xhamster_urls:
2081 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
2082
2083 # Look for embedded TNAFlixNetwork player
2084 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2085 if tnaflix_urls:
2086 return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
2087
2088 # Look for embedded PornHub player
2089 pornhub_urls = PornHubIE._extract_urls(webpage)
2090 if pornhub_urls:
2091 return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
2092
2093 # Look for embedded DrTuber player
2094 drtuber_urls = DrTuberIE._extract_urls(webpage)
2095 if drtuber_urls:
2096 return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
2097
2098 # Look for embedded RedTube player
2099 redtube_urls = RedTubeIE._extract_urls(webpage)
2100 if redtube_urls:
2101 return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
2102
2103 # Look for embedded Tvigle player
2104 mobj = re.search(
2105 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2106 if mobj is not None:
2107 return self.url_result(mobj.group('url'), 'Tvigle')
2108
2109 # Look for embedded TED player
2110 mobj = re.search(
2111 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2112 if mobj is not None:
2113 return self.url_result(mobj.group('url'), 'TED')
2114
2115 # Look for embedded Ustream videos
2116 ustream_url = UstreamIE._extract_url(webpage)
2117 if ustream_url:
2118 return self.url_result(ustream_url, UstreamIE.ie_key())
2119
2120 # Look for embedded arte.tv player
2121 mobj = re.search(
2122 r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2123 webpage)
2124 if mobj is not None:
2125 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2126
2127 # Look for embedded francetv player
2128 mobj = re.search(
2129 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2130 webpage)
2131 if mobj is not None:
2132 return self.url_result(mobj.group('url'))
2133
2134 # Look for embedded smotri.com player
2135 smotri_url = SmotriIE._extract_url(webpage)
2136 if smotri_url:
2137 return self.url_result(smotri_url, 'Smotri')
2138
2139 # Look for embedded Myvi.ru player
2140 myvi_url = MyviIE._extract_url(webpage)
2141 if myvi_url:
2142 return self.url_result(myvi_url)
2143
2144 # Look for embedded soundcloud player
2145 soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2146 if soundcloud_urls:
2147 return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2148
2149 # Look for tunein player
2150 tunein_urls = TuneInBaseIE._extract_urls(webpage)
2151 if tunein_urls:
2152 return _playlist_from_matches(tunein_urls)
2153
2154 # Look for embedded mtvservices player
2155 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2156 if mtvservices_url:
2157 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2158
2159 # Look for embedded yahoo player
2160 mobj = re.search(
2161 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2162 webpage)
2163 if mobj is not None:
2164 return self.url_result(mobj.group('url'), 'Yahoo')
2165
2166 # Look for embedded sbs.com.au player
2167 mobj = re.search(
2168 r'''(?x)
2169 (?:
2170 <meta\s+property="og:video"\s+content=|
2171 <iframe[^>]+?src=
2172 )
2173 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2174 webpage)
2175 if mobj is not None:
2176 return self.url_result(mobj.group('url'), 'SBS')
2177
2178 # Look for embedded Cinchcast player
2179 mobj = re.search(
2180 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2181 webpage)
2182 if mobj is not None:
2183 return self.url_result(mobj.group('url'), 'Cinchcast')
2184
2185 mobj = re.search(
2186 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2187 webpage)
2188 if not mobj:
2189 mobj = re.search(
2190 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2191 webpage)
2192 if mobj is not None:
2193 return self.url_result(mobj.group('url'), 'MLB')
2194
2195 mobj = re.search(
2196 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2197 webpage)
2198 if mobj is not None:
2199 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2200
2201 mobj = re.search(
2202 r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2203 webpage)
2204 if mobj is not None:
2205 return self.url_result(mobj.group('url'), 'Livestream')
2206
2207 # Look for Zapiks embed
2208 mobj = re.search(
2209 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2210 if mobj is not None:
2211 return self.url_result(mobj.group('url'), 'Zapiks')
2212
2213 # Look for Kaltura embeds
2214 kaltura_url = KalturaIE._extract_url(webpage)
2215 if kaltura_url:
2216 return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2217
2218 # Look for Eagle.Platform embeds
2219 eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2220 if eagleplatform_url:
2221 return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2222
2223 # Look for ClipYou (uses Eagle.Platform) embeds
2224 mobj = re.search(
2225 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2226 if mobj is not None:
2227 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2228
2229 # Look for Pladform embeds
2230 pladform_url = PladformIE._extract_url(webpage)
2231 if pladform_url:
2232 return self.url_result(pladform_url)
2233
2234 # Look for Videomore embeds
2235 videomore_url = VideomoreIE._extract_url(webpage)
2236 if videomore_url:
2237 return self.url_result(videomore_url)
2238
2239 # Look for Webcaster embeds
2240 webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2241 if webcaster_url:
2242 return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2243
2244 # Look for Playwire embeds
2245 mobj = re.search(
2246 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2247 if mobj is not None:
2248 return self.url_result(mobj.group('url'))
2249
2250 # Look for 5min embeds
2251 mobj = re.search(
2252 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2253 if mobj is not None:
2254 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2255
2256 # Look for Crooks and Liars embeds
2257 mobj = re.search(
2258 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2259 if mobj is not None:
2260 return self.url_result(mobj.group('url'))
2261
2262 # Look for NBC Sports VPlayer embeds
2263 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2264 if nbc_sports_url:
2265 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2266
2267 # Look for NBC News embeds
2268 nbc_news_embed_url = re.search(
2269 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2270 if nbc_news_embed_url:
2271 return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2272
2273 # Look for Google Drive embeds
2274 google_drive_url = GoogleDriveIE._extract_url(webpage)
2275 if google_drive_url:
2276 return self.url_result(google_drive_url, 'GoogleDrive')
2277
2278 # Look for UDN embeds
2279 mobj = re.search(
2280 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2281 if mobj is not None:
2282 return self.url_result(
2283 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2284
2285 # Look for Senate ISVP iframe
2286 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2287 if senate_isvp_url:
2288 return self.url_result(senate_isvp_url, 'SenateISVP')
2289
2290 # Look for Dailymotion Cloud videos
2291 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2292 if dmcloud_url:
2293 return self.url_result(dmcloud_url, 'DailymotionCloud')
2294
2295 # Look for OnionStudios embeds
2296 onionstudios_url = OnionStudiosIE._extract_url(webpage)
2297 if onionstudios_url:
2298 return self.url_result(onionstudios_url)
2299
2300 # Look for ViewLift embeds
2301 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2302 if viewlift_url:
2303 return self.url_result(viewlift_url)
2304
2305 # Look for JWPlatform embeds
2306 jwplatform_url = JWPlatformIE._extract_url(webpage)
2307 if jwplatform_url:
2308 return self.url_result(jwplatform_url, 'JWPlatform')
2309
2310 # Look for Digiteka embeds
2311 digiteka_url = DigitekaIE._extract_url(webpage)
2312 if digiteka_url:
2313 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2314
2315 # Look for Arkena embeds
2316 arkena_url = ArkenaIE._extract_url(webpage)
2317 if arkena_url:
2318 return self.url_result(arkena_url, ArkenaIE.ie_key())
2319
2320 # Look for Piksel embeds
2321 piksel_url = PikselIE._extract_url(webpage)
2322 if piksel_url:
2323 return self.url_result(piksel_url, PikselIE.ie_key())
2324
2325 # Look for Limelight embeds
2326 mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2327 if mobj:
2328 lm = {
2329 'Media': 'media',
2330 'Channel': 'channel',
2331 'ChannelList': 'channel_list',
2332 }
2333 return self.url_result('limelight:%s:%s' % (
2334 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
2335
2336 mobj = re.search(
2337 r'''(?sx)
2338 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2339 <param[^>]+
2340 name=(["\'])flashVars\2[^>]+
2341 value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2342 ''', webpage)
2343 if mobj:
2344 return self.url_result('limelight:media:%s' % mobj.group('id'))
2345
2346 # Look for AdobeTVVideo embeds
2347 mobj = re.search(
2348 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2349 webpage)
2350 if mobj is not None:
2351 return self.url_result(
2352 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2353 'AdobeTVVideo')
2354
2355 # Look for Vine embeds
2356 mobj = re.search(
2357 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2358 webpage)
2359 if mobj is not None:
2360 return self.url_result(
2361 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2362
2363 # Look for VODPlatform embeds
2364 mobj = re.search(
2365 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2366 webpage)
2367 if mobj is not None:
2368 return self.url_result(
2369 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2370
2371 # Look for Mangomolo embeds
2372 mobj = re.search(
2373 r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2374 (?:
2375 video\?.*?\bid=(?P<video_id>\d+)|
2376 index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2377 ).+?)\1''', webpage)
2378 if mobj is not None:
2379 info = {
2380 '_type': 'url_transparent',
2381 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2382 'title': video_title,
2383 'description': video_description,
2384 'thumbnail': video_thumbnail,
2385 'uploader': video_uploader,
2386 }
2387 video_id = mobj.group('video_id')
2388 if video_id:
2389 info.update({
2390 'ie_key': 'MangomoloVideo',
2391 'id': video_id,
2392 })
2393 else:
2394 info.update({
2395 'ie_key': 'MangomoloLive',
2396 'id': mobj.group('channel_id'),
2397 })
2398 return info
2399
2400 # Look for Instagram embeds
2401 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2402 if instagram_embed_url is not None:
2403 return self.url_result(
2404 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2405
2406 # Look for LiveLeak embeds
2407 liveleak_url = LiveLeakIE._extract_url(webpage)
2408 if liveleak_url:
2409 return self.url_result(liveleak_url, 'LiveLeak')
2410
2411 # Look for 3Q SDN embeds
2412 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2413 if threeqsdn_url:
2414 return {
2415 '_type': 'url_transparent',
2416 'ie_key': ThreeQSDNIE.ie_key(),
2417 'url': self._proto_relative_url(threeqsdn_url),
2418 'title': video_title,
2419 'description': video_description,
2420 'thumbnail': video_thumbnail,
2421 'uploader': video_uploader,
2422 }
2423
2424 # Look for VBOX7 embeds
2425 vbox7_url = Vbox7IE._extract_url(webpage)
2426 if vbox7_url:
2427 return self.url_result(vbox7_url, Vbox7IE.ie_key())
2428
2429 # Look for DBTV embeds
2430 dbtv_urls = DBTVIE._extract_urls(webpage)
2431 if dbtv_urls:
2432 return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
2433
2434 # Look for Videa embeds
2435 videa_urls = VideaIE._extract_urls(webpage)
2436 if videa_urls:
2437 return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
2438
2439 # Look for 20 minuten embeds
2440 twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2441 if twentymin_urls:
2442 return _playlist_from_matches(
2443 twentymin_urls, ie=TwentyMinutenIE.ie_key())
2444
2445 # Looking for http://schema.org/VideoObject
2446 json_ld = self._search_json_ld(
2447 webpage, video_id, default={}, expected_type='VideoObject')
2448 if json_ld.get('url'):
2449 info_dict.update({
2450 'title': video_title or info_dict['title'],
2451 'description': video_description,
2452 'thumbnail': video_thumbnail,
2453 'age_limit': age_limit
2454 })
2455 info_dict.update(json_ld)
2456 return info_dict
2457
2458 # Look for HTML5 media
2459 entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2460 if entries:
2461 for entry in entries:
2462 entry.update({
2463 'id': video_id,
2464 'title': video_title,
2465 })
2466 self._sort_formats(entry['formats'])
2467 return self.playlist_result(entries)
2468
2469 def check_video(vurl):
2470 if YoutubeIE.suitable(vurl):
2471 return True
2472 vpath = compat_urlparse.urlparse(vurl).path
2473 vext = determine_ext(vpath)
2474 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
2475
2476 def filter_video(urls):
2477 return list(filter(check_video, urls))
2478
2479 # Start with something easy: JW Player in SWFObject
2480 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2481 if not found:
2482 # Look for gorilla-vid style embedding
2483 found = filter_video(re.findall(r'''(?sx)
2484 (?:
2485 jw_plugins|
2486 JWPlayerOptions|
2487 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2488 )
2489 .*?
2490 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2491 if not found:
2492 # Broaden the search a little bit
2493 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2494 if not found:
2495 # Broaden the findall a little bit: JWPlayer JS loader
2496 found = filter_video(re.findall(
2497 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2498 if not found:
2499 # Flow player
2500 found = filter_video(re.findall(r'''(?xs)
2501 flowplayer\("[^"]+",\s*
2502 \{[^}]+?\}\s*,
2503 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2504 ["']?url["']?\s*:\s*["']([^"']+)["']
2505 ''', webpage))
2506 if not found:
2507 # Cinerama player
2508 found = re.findall(
2509 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2510 if not found:
2511 # Try to find twitter cards info
2512 # twitter:player:stream should be checked before twitter:player since
2513 # it is expected to contain a raw stream (see
2514 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2515 found = filter_video(re.findall(
2516 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2517 if not found:
2518 # We look for Open Graph info:
2519 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2520 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2521 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2522 if m_video_type is not None:
2523 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2524 if not found:
2525 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2526 found = re.search(
2527 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2528 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2529 webpage)
2530 if not found:
2531 # Look also in Refresh HTTP header
2532 refresh_header = head_response.headers.get('Refresh')
2533 if refresh_header:
2534 # In python 2 response HTTP headers are bytestrings
2535 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2536 refresh_header = refresh_header.decode('iso-8859-1')
2537 found = re.search(REDIRECT_REGEX, refresh_header)
2538 if found:
2539 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2540 self.report_following_redirect(new_url)
2541 return {
2542 '_type': 'url',
2543 'url': new_url,
2544 }
2545
2546 if not found:
2547 # twitter:player is a https URL to iframe player that may or may not
2548 # be supported by youtube-dl thus this is checked the very last (see
2549 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2550 embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2551 if embed_url:
2552 return self.url_result(embed_url)
2553
2554 if not found:
2555 raise UnsupportedError(url)
2556
2557 entries = []
2558 for video_url in orderedSet(found):
2559 video_url = unescapeHTML(video_url)
2560 video_url = video_url.replace('\\/', '/')
2561 video_url = compat_urlparse.urljoin(url, video_url)
2562 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2563
2564 # Sometimes, jwplayer extraction will result in a YouTube URL
2565 if YoutubeIE.suitable(video_url):
2566 entries.append(self.url_result(video_url, 'Youtube'))
2567 continue
2568
2569 # here's a fun little line of code for you:
2570 video_id = os.path.splitext(video_id)[0]
2571
2572 entry_info_dict = {
2573 'id': video_id,
2574 'uploader': video_uploader,
2575 'title': video_title,
2576 'age_limit': age_limit,
2577 }
2578
2579 ext = determine_ext(video_url)
2580 if ext == 'smil':
2581 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2582 elif ext == 'xspf':
2583 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2584 elif ext == 'm3u8':
2585 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2586 elif ext == 'mpd':
2587 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2588 elif ext == 'f4m':
2589 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2590 elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
2591 # Just matching .ism/manifest is not enough to be reliably sure
2592 # whether it's actually an ISM manifest or some other streaming
2593 # manifest since there are various streaming URL formats
2594 # possible (see [1]) as well as some other shenanigans like
2595 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2596 # so on.
2597 # Thus the most reasonable way to solve this is to delegate
2598 # to generic extractor in order to look into the contents of
2599 # the manifest itself.
2600 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2601 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2602 entry_info_dict = self.url_result(
2603 smuggle_url(video_url, {'to_generic': True}),
2604 GenericIE.ie_key())
2605 else:
2606 entry_info_dict['url'] = video_url
2607
2608 if entry_info_dict.get('formats'):
2609 self._sort_formats(entry_info_dict['formats'])
2610
2611 entries.append(entry_info_dict)
2612
2613 if len(entries) == 1:
2614 return entries[0]
2615 else:
2616 for num, e in enumerate(entries, start=1):
2617 # 'url' results don't have a title
2618 if e.get('title') is not None:
2619 e['title'] = '%s (%d)' % (e['title'], num)
2620 return {
2621 '_type': 'playlist',
2622 'entries': entries,
2623 }