youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     sanitized_Request,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import (
  34     BrightcoveLegacyIE,
  35     BrightcoveNewIE,
  36 )
  37 from .nbc import NBCSportsVPlayerIE
  38 from .ooyala import OoyalaIE
  39 from .rutv import RUTVIE
  40 from .tvc import TVCIE
  41 from .sportbox import SportBoxEmbedIE
  42 from .smotri import SmotriIE
  43 from .myvi import MyviIE
  44 from .condenast import CondeNastIE
  45 from .udn import UDNEmbedIE
  46 from .senateisvp import SenateISVPIE
  47 from .svt import SVTIE
  48 from .pornhub import PornHubIE
  49 from .xhamster import XHamsterEmbedIE
  50 from .tnaflix import TNAFlixNetworkEmbedIE
  51 from .vimeo import VimeoIE
  52 from .dailymotion import DailymotionCloudIE
  53 from .onionstudios import OnionStudiosIE
  54 from .snagfilms import SnagFilmsEmbedIE
  55 from .screenwavemedia import ScreenwaveMediaIE
  56 from .mtv import MTVServicesEmbeddedIE
  57 from .pladform import PladformIE
  58 from .videomore import VideomoreIE
  59 from .googledrive import GoogleDriveIE
  60 from .jwplatform import JWPlatformIE
  61 from .digiteka import DigitekaIE
  62 from .instagram import InstagramIE
  63 from .liveleak import LiveLeakIE
  64
  65
  66 class GenericIE(InfoExtractor):
  67     IE_DESC = 'Generic downloader that works on some sites'
  68     _VALID_URL = r'.*'
  69     IE_NAME = 'generic'
  70     _TESTS = [
  71         # Direct link to a video
  72         {
  73             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  74             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  75             'info_dict': {
  76                 'id': 'trailer',
  77                 'ext': 'mp4',
  78                 'title': 'trailer',
  79                 'upload_date': '20100513',
  80             }
  81         },
  82         # Direct link to media delivered compressed (until Accept-Encoding is *)
  83         {
  84             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  85             'md5': '128c42e68b13950268b648275386fc74',
  86             'info_dict': {
  87                 'id': 'FictionJunction-Parallel_Hearts',
  88                 'ext': 'flac',
  89                 'title': 'FictionJunction-Parallel_Hearts',
  90                 'upload_date': '20140522',
  91             },
  92             'expected_warnings': [
  93                 'URL could be a direct video link, returning it as such.'
  94             ]
  95         },
  96         # Direct download with broken HEAD
  97         {
  98             'url': 'http://ai-radio.org:8000/radio.opus',
  99             'info_dict': {
 100                 'id': 'radio',
 101                 'ext': 'opus',
 102                 'title': 'radio',
 103             },
 104             'params': {
 105                 'skip_download': True,  # infinite live stream
 106             },
 107             'expected_warnings': [
 108                 r'501.*Not Implemented',
 109                 r'400.*Bad Request',
 110             ],
 111         },
 112         # Direct link with incorrect MIME type
 113         {
 114             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 115             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 116             'info_dict': {
 117                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 118                 'id': '5_Lennart_Poettering_-_Systemd',
 119                 'ext': 'webm',
 120                 'title': '5_Lennart_Poettering_-_Systemd',
 121                 'upload_date': '20141120',
 122             },
 123             'expected_warnings': [
 124                 'URL could be a direct video link, returning it as such.'
 125             ]
 126         },
 127         # RSS feed
 128         {
 129             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 130             'info_dict': {
 131                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 132                 'title': 'Zero Punctuation',
 133                 'description': 're:.*groundbreaking video review series.*'
 134             },
 135             'playlist_mincount': 11,
 136         },
 137         # RSS feed with enclosure
 138         {
 139             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 140             'info_dict': {
 141                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 142                 'ext': 'm4v',
 143                 'upload_date': '20150228',
 144                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 145             }
 146         },
 147         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 148         {
 149             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 150             'info_dict': {
 151                 'id': 'smil',
 152                 'ext': 'mp4',
 153                 'title': 'Automatics, robotics and biocybernetics',
 154                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 155                 'upload_date': '20130627',
 156                 'formats': 'mincount:16',
 157                 'subtitles': 'mincount:1',
 158             },
 159             'params': {
 160                 'force_generic_extractor': True,
 161                 'skip_download': True,
 162             },
 163         },
 164         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 165         {
 166             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 167             'info_dict': {
 168                 'id': 'hds',
 169                 'ext': 'flv',
 170                 'title': 'hds',
 171                 'formats': 'mincount:1',
 172             },
 173             'params': {
 174                 'skip_download': True,
 175             },
 176         },
 177         # SMIL from https://www.restudy.dk/video/play/id/1637
 178         {
 179             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 180             'info_dict': {
 181                 'id': 'video_1637',
 182                 'ext': 'flv',
 183                 'title': 'video_1637',
 184                 'formats': 'mincount:3',
 185             },
 186             'params': {
 187                 'skip_download': True,
 188             },
 189         },
 190         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 191         {
 192             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 193             'info_dict': {
 194                 'id': 'smil-service',
 195                 'ext': 'flv',
 196                 'title': 'smil-service',
 197                 'formats': 'mincount:1',
 198             },
 199             'params': {
 200                 'skip_download': True,
 201             },
 202         },
 203         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 204         {
 205             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 206             'info_dict': {
 207                 'id': '4719370',
 208                 'ext': 'mp4',
 209                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 210                 'formats': 'mincount:3',
 211             },
 212             'params': {
 213                 'skip_download': True,
 214             },
 215         },
 216         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 217         {
 218             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 219             'info_dict': {
 220                 'id': 'mZlp2ctYIUEB',
 221                 'ext': 'mp4',
 222                 'title': 'Tikibad ontruimd wegens brand',
 223                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 224                 'thumbnail': 're:^https?://.*\.jpg$',
 225                 'duration': 33,
 226             },
 227             'params': {
 228                 'skip_download': True,
 229             },
 230         },
 231         # MPD from http://dash-mse-test.appspot.com/media.html
 232         {
 233             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 234             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 235             'info_dict': {
 236                 'id': 'car-20120827-manifest',
 237                 'ext': 'mp4',
 238                 'title': 'car-20120827-manifest',
 239                 'formats': 'mincount:9',
 240                 'upload_date': '20130904',
 241             },
 242             'params': {
 243                 'format': 'bestvideo',
 244             },
 245         },
 246         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 247         {
 248             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 249             'info_dict': {
 250                 'id': 'content',
 251                 'ext': 'mp4',
 252                 'title': 'content',
 253                 'formats': 'mincount:8',
 254             },
 255             'params': {
 256                 # m3u8 downloads
 257                 'skip_download': True,
 258             }
 259         },
 260         # m3u8 served with Content-Type: text/plain
 261         {
 262             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 263             'info_dict': {
 264                 'id': 'index',
 265                 'ext': 'mp4',
 266                 'title': 'index',
 267                 'upload_date': '20140720',
 268                 'formats': 'mincount:11',
 269             },
 270             'params': {
 271                 # m3u8 downloads
 272                 'skip_download': True,
 273             }
 274         },
 275         # google redirect
 276         {
 277             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 278             'info_dict': {
 279                 'id': 'cmQHVoWB5FY',
 280                 'ext': 'mp4',
 281                 'upload_date': '20130224',
 282                 'uploader_id': 'TheVerge',
 283                 'description': 're:^Chris Ziegler takes a look at the\.*',
 284                 'uploader': 'The Verge',
 285                 'title': 'First Firefox OS phones side-by-side',
 286             },
 287             'params': {
 288                 'skip_download': False,
 289             }
 290         },
 291         {
 292             # redirect in Refresh HTTP header
 293             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 294             'info_dict': {
 295                 'id': 'pO8h3EaFRdo',
 296                 'ext': 'mp4',
 297                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 298                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 299                 'upload_date': '20150917',
 300                 'uploader_id': 'brtvofficial',
 301                 'uploader': 'Boiler Room',
 302             },
 303             'params': {
 304                 'skip_download': False,
 305             },
 306         },
 307         {
 308             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 309             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 310             'info_dict': {
 311                 'id': '13601338388002',
 312                 'ext': 'mp4',
 313                 'uploader': 'www.hodiho.fr',
 314                 'title': 'R\u00e9gis plante sa Jeep',
 315             }
 316         },
 317         # bandcamp page with custom domain
 318         {
 319             'add_ie': ['Bandcamp'],
 320             'url': 'http://bronyrock.com/track/the-pony-mash',
 321             'info_dict': {
 322                 'id': '3235767654',
 323                 'ext': 'mp3',
 324                 'title': 'The Pony Mash',
 325                 'uploader': 'M_Pallante',
 326             },
 327             'skip': 'There is a limit of 200 free downloads / month for the test song',
 328         },
 329         # embedded brightcove video
 330         # it also tests brightcove videos that need to set the 'Referer' in the
 331         # http requests
 332         {
 333             'add_ie': ['BrightcoveLegacy'],
 334             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 335             'info_dict': {
 336                 'id': '2765128793001',
 337                 'ext': 'mp4',
 338                 'title': 'Le cours de bourse : l’analyse technique',
 339                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 340                 'uploader': 'BFM BUSINESS',
 341             },
 342             'params': {
 343                 'skip_download': True,
 344             },
 345         },
 346         {
 347             # https://github.com/rg3/youtube-dl/issues/2253
 348             'url': 'http://bcove.me/i6nfkrc3',
 349             'md5': '0ba9446db037002366bab3b3eb30c88c',
 350             'info_dict': {
 351                 'id': '3101154703001',
 352                 'ext': 'mp4',
 353                 'title': 'Still no power',
 354                 'uploader': 'thestar.com',
 355                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 356             },
 357             'add_ie': ['BrightcoveLegacy'],
 358         },
 359         {
 360             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 361             'md5': 'fb973ecf6e4a78a67453647444222983',
 362             'info_dict': {
 363                 'id': '3414141473001',
 364                 'ext': 'mp4',
 365                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 366                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 367                 'uploader': 'Championat',
 368             },
 369         },
 370         {
 371             # https://github.com/rg3/youtube-dl/issues/3541
 372             'add_ie': ['BrightcoveLegacy'],
 373             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 374             'info_dict': {
 375                 'id': '3866516442001',
 376                 'ext': 'mp4',
 377                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 378                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 379                 'uploader': 'SBS Broadcasting',
 380             },
 381             'skip': 'Restricted to Netherlands',
 382             'params': {
 383                 'skip_download': True,  # m3u8 download
 384             },
 385         },
 386         # ooyala video
 387         {
 388             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 389             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 390             'info_dict': {
 391                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 392                 'ext': 'mp4',
 393                 'title': '2cc213299525360.mov',  # that's what we get
 394                 'duration': 238.231,
 395             },
 396             'add_ie': ['Ooyala'],
 397         },
 398         {
 399             # ooyala video embedded with http://player.ooyala.com/iframe.js
 400             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 401             'info_dict': {
 402                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 403                 'ext': 'mp4',
 404                 'title': '"Steve Jobs: Man in the Machine" trailer',
 405                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 406                 'duration': 135.427,
 407             },
 408             'params': {
 409                 'skip_download': True,
 410             },
 411         },
 412         # embed.ly video
 413         {
 414             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 415             'info_dict': {
 416                 'id': '9ODmcdjQcHQ',
 417                 'ext': 'mp4',
 418                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 419                 'upload_date': '20140225',
 420                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 421                 'uploader': 'Tested',
 422                 'uploader_id': 'testedcom',
 423             },
 424             # No need to test YoutubeIE here
 425             'params': {
 426                 'skip_download': True,
 427             },
 428         },
 429         # funnyordie embed
 430         {
 431             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 432             'info_dict': {
 433                 'id': '18e820ec3f',
 434                 'ext': 'mp4',
 435                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 436                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 437             },
 438         },
 439         # RUTV embed
 440         {
 441             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 442             'info_dict': {
 443                 'id': '776940',
 444                 'ext': 'mp4',
 445                 'title': 'Охотское море стало целиком российским',
 446                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 447             },
 448             'params': {
 449                 # m3u8 download
 450                 'skip_download': True,
 451             },
 452         },
 453         # TVC embed
 454         {
 455             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 456             'info_dict': {
 457                 'id': '55304',
 458                 'ext': 'mp4',
 459                 'title': 'Дошкольное воспитание',
 460             },
 461         },
 462         # SportBox embed
 463         {
 464             'url': 'http://www.vestifinance.ru/articles/25753',
 465             'info_dict': {
 466                 'id': '25753',
 467                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 468             },
 469             'playlist': [{
 470                 'info_dict': {
 471                     'id': '370908',
 472                     'title': 'Госзаказ. День 3',
 473                     'ext': 'mp4',
 474                 }
 475             }, {
 476                 'info_dict': {
 477                     'id': '370905',
 478                     'title': 'Госзаказ. День 2',
 479                     'ext': 'mp4',
 480                 }
 481             }, {
 482                 'info_dict': {
 483                     'id': '370902',
 484                     'title': 'Госзаказ. День 1',
 485                     'ext': 'mp4',
 486                 }
 487             }],
 488             'params': {
 489                 # m3u8 download
 490                 'skip_download': True,
 491             },
 492         },
 493         # Myvi.ru embed
 494         {
 495             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 496             'info_dict': {
 497                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 498                 'ext': 'mp4',
 499                 'title': 'Ужастики, русский трейлер (2015)',
 500                 'thumbnail': 're:^https?://.*\.jpg$',
 501                 'duration': 153,
 502             }
 503         },
 504         # XHamster embed
 505         {
 506             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 507             'info_dict': {
 508                 'id': 'showthread',
 509                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 510             },
 511             'playlist_mincount': 7,
 512         },
 513         # Embedded TED video
 514         {
 515             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 516             'md5': '65fdff94098e4a607385a60c5177c638',
 517             'info_dict': {
 518                 'id': '1969',
 519                 'ext': 'mp4',
 520                 'title': 'Hidden miracles of the natural world',
 521                 'uploader': 'Louie Schwartzberg',
 522                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 523             }
 524         },
 525         # Embedded Ustream video
 526         {
 527             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 528             'md5': '27b99cdb639c9b12a79bca876a073417',
 529             'info_dict': {
 530                 'id': '45734260',
 531                 'ext': 'flv',
 532                 'uploader': 'AU SPA:  The NSA and Privacy',
 533                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 534             }
 535         },
 536         # nowvideo embed hidden behind percent encoding
 537         {
 538             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 539             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 540             'info_dict': {
 541                 'id': '06e53103ca9aa',
 542                 'ext': 'flv',
 543                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 544                 'description': 'No description',
 545             },
 546         },
 547         # arte embed
 548         {
 549             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 550             'md5': '7653032cbb25bf6c80d80f217055fa43',
 551             'info_dict': {
 552                 'id': '048195-004_PLUS7-F',
 553                 'ext': 'flv',
 554                 'title': 'X:enius',
 555                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 556                 'upload_date': '20140320',
 557             },
 558             'params': {
 559                 'skip_download': 'Requires rtmpdump'
 560             }
 561         },
 562         # francetv embed
 563         {
 564             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 565             'info_dict': {
 566                 'id': 'EV_30231',
 567                 'ext': 'mp4',
 568                 'title': 'Alcaline, le concert avec Calogero',
 569                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 570                 'upload_date': '20150226',
 571                 'timestamp': 1424989860,
 572                 'duration': 5400,
 573             },
 574             'params': {
 575                 # m3u8 downloads
 576                 'skip_download': True,
 577             },
 578             'expected_warnings': [
 579                 'Forbidden'
 580             ]
 581         },
 582         # Condé Nast embed
 583         {
 584             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 585             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 586             'info_dict': {
 587                 'id': '53501be369702d3275860000',
 588                 'ext': 'mp4',
 589                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 590             }
 591         },
 592         # Dailymotion embed
 593         {
 594             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 595             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 596             'info_dict': {
 597                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 598                 'ext': 'mp4',
 599                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 600                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 601                 'uploader': 'Spi0n',
 602                 'uploader_id': 'xgditw',
 603                 'upload_date': '20140425',
 604                 'timestamp': 1398441542,
 605             },
 606             'add_ie': ['Dailymotion'],
 607         },
 608         # YouTube embed
 609         {
 610             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 611             'info_dict': {
 612                 'id': 'FXRb4ykk4S0',
 613                 'ext': 'mp4',
 614                 'title': 'The NBL Auction 2014',
 615                 'uploader': 'BADMINTON England',
 616                 'uploader_id': 'BADMINTONEvents',
 617                 'upload_date': '20140603',
 618                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 619             },
 620             'add_ie': ['Youtube'],
 621             'params': {
 622                 'skip_download': True,
 623             }
 624         },
 625         # MTVSercices embed
 626         {
 627             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 628             'md5': '35727f82f58c76d996fc188f9755b0d5',
 629             'info_dict': {
 630                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 631                 'ext': 'mp4',
 632                 'title': 'Review',
 633                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 634             },
 635         },
 636         # YouTube embed via <data-embed-url="">
 637         {
 638             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 639             'info_dict': {
 640                 'id': '4vAffPZIT44',
 641                 'ext': 'mp4',
 642                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 643                 'uploader': 'Gameloft',
 644                 'uploader_id': 'gameloft',
 645                 'upload_date': '20140828',
 646                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 647             },
 648             'params': {
 649                 'skip_download': True,
 650             }
 651         },
 652         # Camtasia studio
 653         {
 654             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 655             'playlist': [{
 656                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 657                 'info_dict': {
 658                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 659                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 660                     'ext': 'flv',
 661                     'duration': 2235.90,
 662                 }
 663             }, {
 664                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 665                 'info_dict': {
 666                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 667                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 668                     'ext': 'flv',
 669                     'duration': 2235.93,
 670                 }
 671             }],
 672             'info_dict': {
 673                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 674             }
 675         },
 676         # Flowplayer
 677         {
 678             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 679             'md5': '9d65602bf31c6e20014319c7d07fba27',
 680             'info_dict': {
 681                 'id': '5123ea6d5e5a7',
 682                 'ext': 'mp4',
 683                 'age_limit': 18,
 684                 'uploader': 'www.handjobhub.com',
 685                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 686             }
 687         },
 688         # Multiple brightcove videos
 689         # https://github.com/rg3/youtube-dl/issues/2283
 690         {
 691             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 692             'info_dict': {
 693                 'id': 'always-never',
 694                 'title': 'Always / Never - The New Yorker',
 695             },
 696             'playlist_count': 3,
 697             'params': {
 698                 'extract_flat': False,
 699                 'skip_download': True,
 700             }
 701         },
 702         # MLB embed
 703         {
 704             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 705             'md5': '96f09a37e44da40dd083e12d9a683327',
 706             'info_dict': {
 707                 'id': '33322633',
 708                 'ext': 'mp4',
 709                 'title': 'Ump changes call to ball',
 710                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 711                 'duration': 48,
 712                 'timestamp': 1401537900,
 713                 'upload_date': '20140531',
 714                 'thumbnail': 're:^https?://.*\.jpg$',
 715             },
 716         },
 717         # Wistia embed
 718         {
 719             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 720             'md5': '8788b683c777a5cf25621eaf286d0c23',
 721             'info_dict': {
 722                 'id': '1cfaf6b7ea',
 723                 'ext': 'mov',
 724                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 725                 'duration': 643.0,
 726                 'filesize': 182808282,
 727                 'uploader': 'education-portal.com',
 728             },
 729         },
 730         {
 731             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 732             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 733             'info_dict': {
 734                 'id': 'uxjb0lwrcz',
 735                 'ext': 'mp4',
 736                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 737                 'description': 'a Martin Fowler video from ThoughtWorks',
 738                 'duration': 1715.0,
 739                 'uploader': 'thoughtworks.wistia.com',
 740                 'upload_date': '20140603',
 741                 'timestamp': 1401832161,
 742             },
 743         },
 744         # Soundcloud embed
 745         {
 746             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 747             'info_dict': {
 748                 'id': '174391317',
 749                 'ext': 'mp3',
 750                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 751                 'uploader': 'Sophos Security',
 752                 'title': 'Chet Chat 171 - Oct 29, 2014',
 753                 'upload_date': '20141029',
 754             }
 755         },
 756         # Livestream embed
 757         {
 758             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 759             'info_dict': {
 760                 'id': '67864563',
 761                 'ext': 'flv',
 762                 'upload_date': '20141112',
 763                 'title': 'Rosetta #CometLanding webcast HL 10',
 764             }
 765         },
 766         # LazyYT
 767         {
 768             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 769             'info_dict': {
 770                 'id': '1986',
 771                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 772             },
 773             'playlist_mincount': 2,
 774         },
 775         # Cinchcast embed
 776         {
 777             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 778             'info_dict': {
 779                 'id': '7141703',
 780                 'ext': 'mp3',
 781                 'upload_date': '20141126',
 782                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 783             }
 784         },
 785         # Cinerama player
 786         {
 787             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 788             'info_dict': {
 789                 'id': '730m_DandD_1901_512k',
 790                 'ext': 'mp4',
 791                 'uploader': 'www.abc.net.au',
 792                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 793             }
 794         },
 795         # embedded viddler video
 796         {
 797             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 798             'info_dict': {
 799                 'id': '4d03aad9',
 800                 'ext': 'mp4',
 801                 'uploader': 'deadspin',
 802                 'title': 'WALL-TO-GORTAT',
 803                 'timestamp': 1422285291,
 804                 'upload_date': '20150126',
 805             },
 806             'add_ie': ['Viddler'],
 807         },
 808         # Libsyn embed
 809         {
 810             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 811             'info_dict': {
 812                 'id': '3377616',
 813                 'ext': 'mp3',
 814                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 815                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 816                 'upload_date': '20150220',
 817             },
 818         },
 819         # jwplayer YouTube
 820         {
 821             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 822             'info_dict': {
 823                 'id': 'Mrj4DVp2zeA',
 824                 'ext': 'mp4',
 825                 'upload_date': '20150212',
 826                 'uploader': 'The National Archives UK',
 827                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 828                 'uploader_id': 'NationalArchives08',
 829                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 830             },
 831         },
 832         # rtl.nl embed
 833         {
 834             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 835             'playlist_mincount': 5,
 836             'info_dict': {
 837                 'id': 'aanslagen-kopenhagen',
 838                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 839             }
 840         },
 841         # Zapiks embed
 842         {
 843             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 844             'info_dict': {
 845                 'id': '118046',
 846                 'ext': 'mp4',
 847                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 848             }
 849         },
 850         # Kaltura embed
 851         {
 852             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 853             'info_dict': {
 854                 'id': '1_eergr3h1',
 855                 'ext': 'mp4',
 856                 'upload_date': '20150226',
 857                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 858                 'timestamp': int,
 859                 'title': 'John Carlson Postgame 2/25/15',
 860             },
 861         },
 862         # Kaltura embed (different embed code)
 863         {
 864             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 865             'info_dict': {
 866                 'id': '1_a52wc67y',
 867                 'ext': 'flv',
 868                 'upload_date': '20150127',
 869                 'uploader_id': 'PremierMedia',
 870                 'timestamp': int,
 871                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 872             },
 873         },
 874         # Kaltura embed protected with referrer
 875         {
 876             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 877             'info_dict': {
 878                 'id': '1_g4fbemnq',
 879                 'ext': 'mp4',
 880                 'title': 'Violetta - Achter De Schermen - Ruggero',
 881                 'description': 'Achter de schermen met Ruggero',
 882                 'timestamp': 1435133761,
 883                 'upload_date': '20150624',
 884                 'uploader_id': 'echojecka',
 885             },
 886         },
 887         # Eagle.Platform embed (generic URL)
 888         {
 889             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 890             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
 891             'info_dict': {
 892                 'id': '227304',
 893                 'ext': 'mp4',
 894                 'title': 'Навальный вышел на свободу',
 895                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 896                 'thumbnail': 're:^https?://.*\.jpg$',
 897                 'duration': 87,
 898                 'view_count': int,
 899                 'age_limit': 0,
 900             },
 901         },
 902         # ClipYou (Eagle.Platform) embed (custom URL)
 903         {
 904             'url': 'http://muz-tv.ru/play/7129/',
 905             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
 906             'info_dict': {
 907                 'id': '12820',
 908                 'ext': 'mp4',
 909                 'title': "'O Sole Mio",
 910                 'thumbnail': 're:^https?://.*\.jpg$',
 911                 'duration': 216,
 912                 'view_count': int,
 913             },
 914         },
 915         # Pladform embed
 916         {
 917             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 918             'info_dict': {
 919                 'id': '100183293',
 920                 'ext': 'mp4',
 921                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 922                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 923                 'thumbnail': 're:^https?://.*\.jpg$',
 924                 'duration': 694,
 925                 'age_limit': 0,
 926             },
 927         },
 928         # Playwire embed
 929         {
 930             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 931             'info_dict': {
 932                 'id': '3519514',
 933                 'ext': 'mp4',
 934                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 935                 'thumbnail': 're:^https?://.*\.png$',
 936                 'duration': 45.115,
 937             },
 938         },
 939         # 5min embed
 940         {
 941             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 942             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 943             'info_dict': {
 944                 'id': '518726732',
 945                 'ext': 'mp4',
 946                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 947             },
 948         },
 949         # SVT embed
 950         {
 951             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 952             'info_dict': {
 953                 'id': '2900353',
 954                 'ext': 'flv',
 955                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 956                 'duration': 27,
 957                 'age_limit': 0,
 958             },
 959         },
 960         # Crooks and Liars embed
 961         {
 962             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 963             'info_dict': {
 964                 'id': '8RUoRhRi',
 965                 'ext': 'mp4',
 966                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 967                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 968                 'timestamp': 1428207000,
 969                 'upload_date': '20150405',
 970                 'uploader': 'Heather',
 971             },
 972         },
 973         # Crooks and Liars external embed
 974         {
 975             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 976             'info_dict': {
 977                 'id': 'MTE3MjUtMzQ2MzA',
 978                 'ext': 'mp4',
 979                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 980                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 981                 'timestamp': 1265032391,
 982                 'upload_date': '20100201',
 983                 'uploader': 'Heather',
 984             },
 985         },
 986         # NBC Sports vplayer embed
 987         {
 988             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 989             'info_dict': {
 990                 'id': 'ln7x1qSThw4k',
 991                 'ext': 'flv',
 992                 'title': "PFT Live: New leader in the 'new-look' defense",
 993                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 994                 'uploader': 'NBCU-SPORTS',
 995                 'upload_date': '20140107',
 996                 'timestamp': 1389118457,
 997             },
 998         },
 999         # UDN embed
1000         {
1001             'url': 'http://www.udn.com/news/story/7314/822787',
1002             'md5': 'fd2060e988c326991037b9aff9df21a6',
1003             'info_dict': {
1004                 'id': '300346',
1005                 'ext': 'mp4',
1006                 'title': '中一中男師變性 全校師生力挺',
1007                 'thumbnail': 're:^https?://.*\.jpg$',
1008             }
1009         },
1010         # Ooyala embed
1011         {
1012             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1013             'info_dict': {
1014                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1015                 'ext': 'mp4',
1016                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1017                 'title': 'This is what separates the Excel masters from the wannabes',
1018                 'duration': 191.933,
1019             },
1020             'params': {
1021                 # m3u8 downloads
1022                 'skip_download': True,
1023             }
1024         },
1025         # Contains a SMIL manifest
1026         {
1027             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1028             'info_dict': {
1029                 'id': 'file',
1030                 'ext': 'flv',
1031                 'title': '+ Football: Lottery Champions League Europe',
1032                 'uploader': 'www.telewebion.com',
1033             },
1034             'params': {
1035                 # rtmpe downloads
1036                 'skip_download': True,
1037             }
1038         },
1039         # Brightcove URL in single quotes
1040         {
1041             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1042             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1043             'info_dict': {
1044                 'id': '4255764656001',
1045                 'ext': 'mp4',
1046                 'title': 'SN Presents: Russell Martin, World Citizen',
1047                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1048                 'uploader': 'Rogers Sportsnet',
1049                 'uploader_id': '1704050871',
1050                 'upload_date': '20150525',
1051                 'timestamp': 1432570283,
1052             },
1053         },
1054         # Dailymotion Cloud video
1055         {
1056             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1057             'md5': '49444254273501a64675a7e68c502681',
1058             'info_dict': {
1059                 'id': '5585de919473990de4bee11b',
1060                 'ext': 'mp4',
1061                 'title': 'Le débat',
1062                 'thumbnail': 're:^https?://.*\.jpe?g$',
1063             }
1064         },
1065         # OnionStudios embed
1066         {
1067             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1068             'info_dict': {
1069                 'id': '2855',
1070                 'ext': 'mp4',
1071                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1072                 'thumbnail': 're:^https?://.*\.jpe?g$',
1073                 'uploader': 'ClickHole',
1074                 'uploader_id': 'clickhole',
1075             }
1076         },
1077         # SnagFilms embed
1078         {
1079             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1080             'info_dict': {
1081                 'id': '74849a00-85a9-11e1-9660-123139220831',
1082                 'ext': 'mp4',
1083                 'title': '#whilewewatch',
1084             }
1085         },
1086         # AdobeTVVideo embed
1087         {
1088             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1089             'md5': '43662b577c018ad707a63766462b1e87',
1090             'info_dict': {
1091                 'id': '2456',
1092                 'ext': 'mp4',
1093                 'title': 'New experience with Acrobat DC',
1094                 'description': 'New experience with Acrobat DC',
1095                 'duration': 248.667,
1096             },
1097         },
1098         # ScreenwaveMedia embed
1099         {
1100             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1101             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1102             'info_dict': {
1103                 'id': 'cinemasnob-55d26273809dd',
1104                 'ext': 'mp4',
1105                 'title': 'cinemasnob',
1106             },
1107         },
1108         # BrightcoveInPageEmbed embed
1109         {
1110             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1111             'info_dict': {
1112                 'id': '4238694884001',
1113                 'ext': 'flv',
1114                 'title': 'Tabletop: Dread, Last Thoughts',
1115                 'description': 'Tabletop: Dread, Last Thoughts',
1116                 'duration': 51690,
1117             },
1118         },
1119         # JWPlayer with M3U8
1120         {
1121             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1122             'info_dict': {
1123                 'id': 'playlist',
1124                 'ext': 'mp4',
1125                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1126                 'uploader': 'ren.tv',
1127             },
1128             'params': {
1129                 # m3u8 downloads
1130                 'skip_download': True,
1131             }
1132         },
1133         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1134         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1135         {
1136             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1137             'info_dict': {
1138                 'id': '4785848093001',
1139                 'ext': 'mp4',
1140                 'title': 'The Cardinal Pell Interview',
1141                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1142                 'uploader': 'GlobeCast Australia - GlobeStream',
1143                 'uploader_id': '2733773828001',
1144                 'upload_date': '20160304',
1145                 'timestamp': 1457083087,
1146             },
1147             'params': {
1148                 # m3u8 downloads
1149                 'skip_download': True,
1150             },
1151         },
1152         # Another form of arte.tv embed
1153         {
1154             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1155             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1156             'info_dict': {
1157                 'id': '030273-562_PLUS7-F',
1158                 'ext': 'mp4',
1159                 'title': 'ARTE Reportage - Nulle part, en France',
1160                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1161                 'upload_date': '20160409',
1162             },
1163         },
1164         # LiveLeak embed
1165         {
1166             'url': 'http://www.wykop.pl/link/3088787/',
1167             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1168             'info_dict': {
1169                 'id': '874_1459135191',
1170                 'ext': 'mp4',
1171                 'title': 'Man shows poor quality of new apartment building',
1172                 'description': 'The wall is like a sand pile.',
1173                 'uploader': 'Lake8737',
1174             }
1175         },
1176     ]
1177
1178     def report_following_redirect(self, new_url):
1179         """Report information extraction."""
1180         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1181
1182     def _extract_rss(self, url, video_id, doc):
1183         playlist_title = doc.find('./channel/title').text
1184         playlist_desc_el = doc.find('./channel/description')
1185         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1186
1187         entries = []
1188         for it in doc.findall('./channel/item'):
1189             next_url = xpath_text(it, 'link', fatal=False)
1190             if not next_url:
1191                 enclosure_nodes = it.findall('./enclosure')
1192                 for e in enclosure_nodes:
1193                     next_url = e.attrib.get('url')
1194                     if next_url:
1195                         break
1196
1197             if not next_url:
1198                 continue
1199
1200             entries.append({
1201                 '_type': 'url',
1202                 'url': next_url,
1203                 'title': it.find('title').text,
1204             })
1205
1206         return {
1207             '_type': 'playlist',
1208             'id': url,
1209             'title': playlist_title,
1210             'description': playlist_desc,
1211             'entries': entries,
1212         }
1213
1214     def _extract_camtasia(self, url, video_id, webpage):
1215         """ Returns None if no camtasia video can be found. """
1216
1217         camtasia_cfg = self._search_regex(
1218             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1219             webpage, 'camtasia configuration file', default=None)
1220         if camtasia_cfg is None:
1221             return None
1222
1223         title = self._html_search_meta('DC.title', webpage, fatal=True)
1224
1225         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1226         camtasia_cfg = self._download_xml(
1227             camtasia_url, video_id,
1228             note='Downloading camtasia configuration',
1229             errnote='Failed to download camtasia configuration')
1230         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1231
1232         entries = []
1233         for n in fileset_node.getchildren():
1234             url_n = n.find('./uri')
1235             if url_n is None:
1236                 continue
1237
1238             entries.append({
1239                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1240                 'title': '%s - %s' % (title, n.tag),
1241                 'url': compat_urlparse.urljoin(url, url_n.text),
1242                 'duration': float_or_none(n.find('./duration').text),
1243             })
1244
1245         return {
1246             '_type': 'playlist',
1247             'entries': entries,
1248             'title': title,
1249         }
1250
1251     def _real_extract(self, url):
1252         if url.startswith('//'):
1253             return {
1254                 '_type': 'url',
1255                 'url': self.http_scheme() + url,
1256             }
1257
1258         parsed_url = compat_urlparse.urlparse(url)
1259         if not parsed_url.scheme:
1260             default_search = self._downloader.params.get('default_search')
1261             if default_search is None:
1262                 default_search = 'fixup_error'
1263
1264             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1265                 if '/' in url:
1266                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1267                     return self.url_result('http://' + url)
1268                 elif default_search != 'fixup_error':
1269                     if default_search == 'auto_warning':
1270                         if re.match(r'^(?:url|URL)$', url):
1271                             raise ExtractorError(
1272                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1273                                 expected=True)
1274                         else:
1275                             self._downloader.report_warning(
1276                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1277                     return self.url_result('ytsearch:' + url)
1278
1279             if default_search in ('error', 'fixup_error'):
1280                 raise ExtractorError(
1281                     '%r is not a valid URL. '
1282                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1283                     % (url, url), expected=True)
1284             else:
1285                 if ':' not in default_search:
1286                     default_search += ':'
1287                 return self.url_result(default_search + url)
1288
1289         url, smuggled_data = unsmuggle_url(url)
1290         force_videoid = None
1291         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1292         if smuggled_data and 'force_videoid' in smuggled_data:
1293             force_videoid = smuggled_data['force_videoid']
1294             video_id = force_videoid
1295         else:
1296             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1297
1298         self.to_screen('%s: Requesting header' % video_id)
1299
1300         head_req = HEADRequest(url)
1301         head_response = self._request_webpage(
1302             head_req, video_id,
1303             note=False, errnote='Could not send HEAD request to %s' % url,
1304             fatal=False)
1305
1306         if head_response is not False:
1307             # Check for redirect
1308             new_url = head_response.geturl()
1309             if url != new_url:
1310                 self.report_following_redirect(new_url)
1311                 if force_videoid:
1312                     new_url = smuggle_url(
1313                         new_url, {'force_videoid': force_videoid})
1314                 return self.url_result(new_url)
1315
1316         full_response = None
1317         if head_response is False:
1318             request = sanitized_Request(url)
1319             request.add_header('Accept-Encoding', '*')
1320             full_response = self._request_webpage(request, video_id)
1321             head_response = full_response
1322
1323         info_dict = {
1324             'id': video_id,
1325             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1326             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1327         }
1328
1329         # Check for direct link to a video
1330         content_type = head_response.headers.get('Content-Type', '').lower()
1331         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1332         if m:
1333             format_id = m.group('format_id')
1334             if format_id.endswith('mpegurl'):
1335                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1336             elif format_id == 'f4m':
1337                 formats = self._extract_f4m_formats(url, video_id)
1338             else:
1339                 formats = [{
1340                     'format_id': m.group('format_id'),
1341                     'url': url,
1342                     'vcodec': 'none' if m.group('type') == 'audio' else None
1343                 }]
1344                 info_dict['direct'] = True
1345             self._sort_formats(formats)
1346             info_dict['formats'] = formats
1347             return info_dict
1348
1349         if not self._downloader.params.get('test', False) and not is_intentional:
1350             force = self._downloader.params.get('force_generic_extractor', False)
1351             self._downloader.report_warning(
1352                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1353
1354         if not full_response:
1355             request = sanitized_Request(url)
1356             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1357             # making it impossible to download only chunk of the file (yet we need only 512kB to
1358             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1359             # that will always result in downloading the whole file that is not desirable.
1360             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1361             # to accept raw bytes and being able to download only a chunk.
1362             # It may probably better to solve this by checking Content-Type for application/octet-stream
1363             # after HEAD request finishes, but not sure if we can rely on this.
1364             request.add_header('Accept-Encoding', '*')
1365             full_response = self._request_webpage(request, video_id)
1366
1367         first_bytes = full_response.read(512)
1368
1369         # Is it an M3U playlist?
1370         if first_bytes.startswith(b'#EXTM3U'):
1371             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1372             self._sort_formats(info_dict['formats'])
1373             return info_dict
1374
1375         # Maybe it's a direct link to a video?
1376         # Be careful not to download the whole thing!
1377         if not is_html(first_bytes):
1378             self._downloader.report_warning(
1379                 'URL could be a direct video link, returning it as such.')
1380             info_dict.update({
1381                 'direct': True,
1382                 'url': url,
1383             })
1384             return info_dict
1385
1386         webpage = self._webpage_read_content(
1387             full_response, url, video_id, prefix=first_bytes)
1388
1389         self.report_extraction(video_id)
1390
1391         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1392         try:
1393             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1394             if doc.tag == 'rss':
1395                 return self._extract_rss(url, video_id, doc)
1396             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1397                 smil = self._parse_smil(doc, url, video_id)
1398                 self._sort_formats(smil['formats'])
1399                 return smil
1400             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1401                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1402             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1403                 info_dict['formats'] = self._parse_mpd_formats(
1404                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1405                 self._sort_formats(info_dict['formats'])
1406                 return info_dict
1407             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1408                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1409                 self._sort_formats(info_dict['formats'])
1410                 return info_dict
1411         except compat_xml_parse_error:
1412             pass
1413
1414         # Is it a Camtasia project?
1415         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1416         if camtasia_res is not None:
1417             return camtasia_res
1418
1419         # Sometimes embedded video player is hidden behind percent encoding
1420         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1421         # Unescaping the whole page allows to handle those cases in a generic way
1422         webpage = compat_urllib_parse_unquote(webpage)
1423
1424         # it's tempting to parse this further, but you would
1425         # have to take into account all the variations like
1426         #   Video Title - Site Name
1427         #   Site Name | Video Title
1428         #   Video Title - Tagline | Site Name
1429         # and so on and so forth; it's just not practical
1430         video_title = self._html_search_regex(
1431             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1432             default='video')
1433
1434         # Try to detect age limit automatically
1435         age_limit = self._rta_search(webpage)
1436         # And then there are the jokers who advertise that they use RTA,
1437         # but actually don't.
1438         AGE_LIMIT_MARKERS = [
1439             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1440         ]
1441         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1442             age_limit = 18
1443
1444         # video uploader is domain name
1445         video_uploader = self._search_regex(
1446             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1447
1448         # Helper method
1449         def _playlist_from_matches(matches, getter=None, ie=None):
1450             urlrs = orderedSet(
1451                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1452                 for m in matches)
1453             return self.playlist_result(
1454                 urlrs, playlist_id=video_id, playlist_title=video_title)
1455
1456         # Look for Brightcove Legacy Studio embeds
1457         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1458         if bc_urls:
1459             self.to_screen('Brightcove video detected.')
1460             entries = [{
1461                 '_type': 'url',
1462                 'url': smuggle_url(bc_url, {'Referer': url}),
1463                 'ie_key': 'BrightcoveLegacy'
1464             } for bc_url in bc_urls]
1465
1466             return {
1467                 '_type': 'playlist',
1468                 'title': video_title,
1469                 'id': video_id,
1470                 'entries': entries,
1471             }
1472
1473         # Look for Brightcove New Studio embeds
1474         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1475         if bc_urls:
1476             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1477
1478         # Look for embedded rtl.nl player
1479         matches = re.findall(
1480             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1481             webpage)
1482         if matches:
1483             return _playlist_from_matches(matches, ie='RtlNl')
1484
1485         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1486         if vimeo_url is not None:
1487             return self.url_result(vimeo_url)
1488
1489         vid_me_embed_url = self._search_regex(
1490             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1491             webpage, 'vid.me embed', default=None)
1492         if vid_me_embed_url is not None:
1493             return self.url_result(vid_me_embed_url, 'Vidme')
1494
1495         # Look for embedded YouTube player
1496         matches = re.findall(r'''(?x)
1497             (?:
1498                 <iframe[^>]+?src=|
1499                 data-video-url=|
1500                 <embed[^>]+?src=|
1501                 embedSWF\(?:\s*|
1502                 new\s+SWFObject\(
1503             )
1504             (["\'])
1505                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1506                 (?:embed|v|p)/.+?)
1507             \1''', webpage)
1508         if matches:
1509             return _playlist_from_matches(
1510                 matches, lambda m: unescapeHTML(m[1]))
1511
1512         # Look for lazyYT YouTube embed
1513         matches = re.findall(
1514             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1515         if matches:
1516             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1517
1518         # Look for embedded Dailymotion player
1519         matches = re.findall(
1520             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1521         if matches:
1522             return _playlist_from_matches(
1523                 matches, lambda m: unescapeHTML(m[1]))
1524
1525         # Look for embedded Dailymotion playlist player (#3822)
1526         m = re.search(
1527             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1528         if m:
1529             playlists = re.findall(
1530                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1531             if playlists:
1532                 return _playlist_from_matches(
1533                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1534
1535         # Look for embedded Wistia player
1536         match = re.search(
1537             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1538         if match:
1539             embed_url = self._proto_relative_url(
1540                 unescapeHTML(match.group('url')))
1541             return {
1542                 '_type': 'url_transparent',
1543                 'url': embed_url,
1544                 'ie_key': 'Wistia',
1545                 'uploader': video_uploader,
1546                 'title': video_title,
1547                 'id': video_id,
1548             }
1549
1550         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1551         if match:
1552             return {
1553                 '_type': 'url_transparent',
1554                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1555                 'ie_key': 'Wistia',
1556                 'uploader': video_uploader,
1557                 'title': video_title,
1558                 'id': match.group('id')
1559             }
1560
1561         # Look for SVT player
1562         svt_url = SVTIE._extract_url(webpage)
1563         if svt_url:
1564             return self.url_result(svt_url, 'SVT')
1565
1566         # Look for embedded condenast player
1567         matches = re.findall(
1568             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1569             webpage)
1570         if matches:
1571             return {
1572                 '_type': 'playlist',
1573                 'entries': [{
1574                     '_type': 'url',
1575                     'ie_key': 'CondeNast',
1576                     'url': ma,
1577                 } for ma in matches],
1578                 'title': video_title,
1579                 'id': video_id,
1580             }
1581
1582         # Look for Bandcamp pages with custom domain
1583         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1584         if mobj is not None:
1585             burl = unescapeHTML(mobj.group(1))
1586             # Don't set the extractor because it can be a track url or an album
1587             return self.url_result(burl)
1588
1589         # Look for embedded Vevo player
1590         mobj = re.search(
1591             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1592         if mobj is not None:
1593             return self.url_result(mobj.group('url'))
1594
1595         # Look for embedded Viddler player
1596         mobj = re.search(
1597             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1598             webpage)
1599         if mobj is not None:
1600             return self.url_result(mobj.group('url'))
1601
1602         # Look for NYTimes player
1603         mobj = re.search(
1604             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1605             webpage)
1606         if mobj is not None:
1607             return self.url_result(mobj.group('url'))
1608
1609         # Look for Libsyn player
1610         mobj = re.search(
1611             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1612         if mobj is not None:
1613             return self.url_result(mobj.group('url'))
1614
1615         # Look for Ooyala videos
1616         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1617                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1618                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1619                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1620         if mobj is not None:
1621             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1622
1623         # Look for multiple Ooyala embeds on SBN network websites
1624         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1625         if mobj is not None:
1626             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1627             if embeds:
1628                 return _playlist_from_matches(
1629                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1630
1631         # Look for Aparat videos
1632         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1633         if mobj is not None:
1634             return self.url_result(mobj.group(1), 'Aparat')
1635
1636         # Look for MPORA videos
1637         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1638         if mobj is not None:
1639             return self.url_result(mobj.group(1), 'Mpora')
1640
1641         # Look for embedded NovaMov-based player
1642         mobj = re.search(
1643             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1644                     (?P<url>http://(?:(?:embed|www)\.)?
1645                         (?:novamov\.com|
1646                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1647                            videoweed\.(?:es|com)|
1648                            movshare\.(?:net|sx|ag)|
1649                            divxstage\.(?:eu|net|ch|co|at|ag))
1650                         /embed\.php.+?)\1''', webpage)
1651         if mobj is not None:
1652             return self.url_result(mobj.group('url'))
1653
1654         # Look for embedded Facebook player
1655         mobj = re.search(
1656             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1657         if mobj is not None:
1658             return self.url_result(mobj.group('url'), 'Facebook')
1659
1660         # Look for embedded VK player
1661         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1662         if mobj is not None:
1663             return self.url_result(mobj.group('url'), 'VK')
1664
1665         # Look for embedded Odnoklassniki player
1666         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1667         if mobj is not None:
1668             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1669
1670         # Look for embedded ivi player
1671         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1672         if mobj is not None:
1673             return self.url_result(mobj.group('url'), 'Ivi')
1674
1675         # Look for embedded Huffington Post player
1676         mobj = re.search(
1677             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1678         if mobj is not None:
1679             return self.url_result(mobj.group('url'), 'HuffPost')
1680
1681         # Look for embed.ly
1682         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1683         if mobj is not None:
1684             return self.url_result(mobj.group('url'))
1685         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1686         if mobj is not None:
1687             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1688
1689         # Look for funnyordie embed
1690         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1691         if matches:
1692             return _playlist_from_matches(
1693                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1694
1695         # Look for BBC iPlayer embed
1696         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1697         if matches:
1698             return _playlist_from_matches(matches, ie='BBCCoUk')
1699
1700         # Look for embedded RUTV player
1701         rutv_url = RUTVIE._extract_url(webpage)
1702         if rutv_url:
1703             return self.url_result(rutv_url, 'RUTV')
1704
1705         # Look for embedded TVC player
1706         tvc_url = TVCIE._extract_url(webpage)
1707         if tvc_url:
1708             return self.url_result(tvc_url, 'TVC')
1709
1710         # Look for embedded SportBox player
1711         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1712         if sportbox_urls:
1713             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1714
1715         # Look for embedded PornHub player
1716         pornhub_url = PornHubIE._extract_url(webpage)
1717         if pornhub_url:
1718             return self.url_result(pornhub_url, 'PornHub')
1719
1720         # Look for embedded XHamster player
1721         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1722         if xhamster_urls:
1723             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1724
1725         # Look for embedded TNAFlixNetwork player
1726         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1727         if tnaflix_urls:
1728             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1729
1730         # Look for embedded Tvigle player
1731         mobj = re.search(
1732             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1733         if mobj is not None:
1734             return self.url_result(mobj.group('url'), 'Tvigle')
1735
1736         # Look for embedded TED player
1737         mobj = re.search(
1738             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1739         if mobj is not None:
1740             return self.url_result(mobj.group('url'), 'TED')
1741
1742         # Look for embedded Ustream videos
1743         mobj = re.search(
1744             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1745         if mobj is not None:
1746             return self.url_result(mobj.group('url'), 'Ustream')
1747
1748         # Look for embedded arte.tv player
1749         mobj = re.search(
1750             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
1751             webpage)
1752         if mobj is not None:
1753             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1754
1755         # Look for embedded francetv player
1756         mobj = re.search(
1757             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1758             webpage)
1759         if mobj is not None:
1760             return self.url_result(mobj.group('url'))
1761
1762         # Look for embedded smotri.com player
1763         smotri_url = SmotriIE._extract_url(webpage)
1764         if smotri_url:
1765             return self.url_result(smotri_url, 'Smotri')
1766
1767         # Look for embedded Myvi.ru player
1768         myvi_url = MyviIE._extract_url(webpage)
1769         if myvi_url:
1770             return self.url_result(myvi_url)
1771
1772         # Look for embedded soundcloud player
1773         mobj = re.search(
1774             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1775             webpage)
1776         if mobj is not None:
1777             url = unescapeHTML(mobj.group('url'))
1778             return self.url_result(url)
1779
1780         # Look for embedded vulture.com player
1781         mobj = re.search(
1782             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1783             webpage)
1784         if mobj is not None:
1785             url = unescapeHTML(mobj.group('url'))
1786             return self.url_result(url, ie='Vulture')
1787
1788         # Look for embedded mtvservices player
1789         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1790         if mtvservices_url:
1791             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1792
1793         # Look for embedded yahoo player
1794         mobj = re.search(
1795             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1796             webpage)
1797         if mobj is not None:
1798             return self.url_result(mobj.group('url'), 'Yahoo')
1799
1800         # Look for embedded sbs.com.au player
1801         mobj = re.search(
1802             r'''(?x)
1803             (?:
1804                 <meta\s+property="og:video"\s+content=|
1805                 <iframe[^>]+?src=
1806             )
1807             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1808             webpage)
1809         if mobj is not None:
1810             return self.url_result(mobj.group('url'), 'SBS')
1811
1812         # Look for embedded Cinchcast player
1813         mobj = re.search(
1814             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1815             webpage)
1816         if mobj is not None:
1817             return self.url_result(mobj.group('url'), 'Cinchcast')
1818
1819         mobj = re.search(
1820             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1821             webpage)
1822         if not mobj:
1823             mobj = re.search(
1824                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1825                 webpage)
1826         if mobj is not None:
1827             return self.url_result(mobj.group('url'), 'MLB')
1828
1829         mobj = re.search(
1830             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1831             webpage)
1832         if mobj is not None:
1833             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1834
1835         mobj = re.search(
1836             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1837             webpage)
1838         if mobj is not None:
1839             return self.url_result(mobj.group('url'), 'Livestream')
1840
1841         # Look for Zapiks embed
1842         mobj = re.search(
1843             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1844         if mobj is not None:
1845             return self.url_result(mobj.group('url'), 'Zapiks')
1846
1847         # Look for Kaltura embeds
1848         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1849                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1850         if mobj is not None:
1851             return self.url_result(smuggle_url(
1852                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1853                 {'source_url': url}), 'Kaltura')
1854
1855         # Look for Eagle.Platform embeds
1856         mobj = re.search(
1857             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1858         if mobj is not None:
1859             return self.url_result(mobj.group('url'), 'EaglePlatform')
1860
1861         # Look for ClipYou (uses Eagle.Platform) embeds
1862         mobj = re.search(
1863             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1864         if mobj is not None:
1865             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1866
1867         # Look for Pladform embeds
1868         pladform_url = PladformIE._extract_url(webpage)
1869         if pladform_url:
1870             return self.url_result(pladform_url)
1871
1872         # Look for Videomore embeds
1873         videomore_url = VideomoreIE._extract_url(webpage)
1874         if videomore_url:
1875             return self.url_result(videomore_url)
1876
1877         # Look for Playwire embeds
1878         mobj = re.search(
1879             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1880         if mobj is not None:
1881             return self.url_result(mobj.group('url'))
1882
1883         # Look for 5min embeds
1884         mobj = re.search(
1885             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1886         if mobj is not None:
1887             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1888
1889         # Look for Crooks and Liars embeds
1890         mobj = re.search(
1891             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1892         if mobj is not None:
1893             return self.url_result(mobj.group('url'))
1894
1895         # Look for NBC Sports VPlayer embeds
1896         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1897         if nbc_sports_url:
1898             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1899
1900         # Look for Google Drive embeds
1901         google_drive_url = GoogleDriveIE._extract_url(webpage)
1902         if google_drive_url:
1903             return self.url_result(google_drive_url, 'GoogleDrive')
1904
1905         # Look for UDN embeds
1906         mobj = re.search(
1907             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1908         if mobj is not None:
1909             return self.url_result(
1910                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1911
1912         # Look for Senate ISVP iframe
1913         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1914         if senate_isvp_url:
1915             return self.url_result(senate_isvp_url, 'SenateISVP')
1916
1917         # Look for Dailymotion Cloud videos
1918         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1919         if dmcloud_url:
1920             return self.url_result(dmcloud_url, 'DailymotionCloud')
1921
1922         # Look for OnionStudios embeds
1923         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1924         if onionstudios_url:
1925             return self.url_result(onionstudios_url)
1926
1927         # Look for SnagFilms embeds
1928         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1929         if snagfilms_url:
1930             return self.url_result(snagfilms_url)
1931
1932         # Look for JWPlatform embeds
1933         jwplatform_url = JWPlatformIE._extract_url(webpage)
1934         if jwplatform_url:
1935             return self.url_result(jwplatform_url, 'JWPlatform')
1936
1937         # Look for ScreenwaveMedia embeds
1938         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1939         if mobj is not None:
1940             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1941
1942         # Look for Digiteka embeds
1943         digiteka_url = DigitekaIE._extract_url(webpage)
1944         if digiteka_url:
1945             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1946
1947         # Look for Limelight embeds
1948         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1949         if mobj:
1950             lm = {
1951                 'Media': 'media',
1952                 'Channel': 'channel',
1953                 'ChannelList': 'channel_list',
1954             }
1955             return self.url_result('limelight:%s:%s' % (
1956                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1957
1958         # Look for AdobeTVVideo embeds
1959         mobj = re.search(
1960             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1961             webpage)
1962         if mobj is not None:
1963             return self.url_result(
1964                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1965                 'AdobeTVVideo')
1966
1967         # Look for Vine embeds
1968         mobj = re.search(
1969             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
1970             webpage)
1971         if mobj is not None:
1972             return self.url_result(
1973                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
1974
1975         # Look for Instagram embeds
1976         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
1977         if instagram_embed_url is not None:
1978             return self.url_result(
1979                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
1980
1981         # Look for LiveLeak embeds
1982         liveleak_url = LiveLeakIE._extract_url(webpage)
1983         if liveleak_url:
1984             return self.url_result(liveleak_url, 'LiveLeak')
1985
1986         def check_video(vurl):
1987             if YoutubeIE.suitable(vurl):
1988                 return True
1989             vpath = compat_urlparse.urlparse(vurl).path
1990             vext = determine_ext(vpath)
1991             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1992
1993         def filter_video(urls):
1994             return list(filter(check_video, urls))
1995
1996         # Start with something easy: JW Player in SWFObject
1997         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1998         if not found:
1999             # Look for gorilla-vid style embedding
2000             found = filter_video(re.findall(r'''(?sx)
2001                 (?:
2002                     jw_plugins|
2003                     JWPlayerOptions|
2004                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2005                 )
2006                 .*?
2007                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2008         if not found:
2009             # Broaden the search a little bit
2010             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2011         if not found:
2012             # Broaden the findall a little bit: JWPlayer JS loader
2013             found = filter_video(re.findall(
2014                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2015         if not found:
2016             # Flow player
2017             found = filter_video(re.findall(r'''(?xs)
2018                 flowplayer\("[^"]+",\s*
2019                     \{[^}]+?\}\s*,
2020                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2021                         ["']?url["']?\s*:\s*["']([^"']+)["']
2022             ''', webpage))
2023         if not found:
2024             # Cinerama player
2025             found = re.findall(
2026                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2027         if not found:
2028             # Try to find twitter cards info
2029             found = filter_video(re.findall(
2030                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2031         if not found:
2032             # We look for Open Graph info:
2033             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2034             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2035             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2036             if m_video_type is not None:
2037                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2038         if not found:
2039             # HTML5 video
2040             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
2041         if not found:
2042             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2043             found = re.search(
2044                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2045                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2046                 webpage)
2047             if not found:
2048                 # Look also in Refresh HTTP header
2049                 refresh_header = head_response.headers.get('Refresh')
2050                 if refresh_header:
2051                     # In python 2 response HTTP headers are bytestrings
2052                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2053                         refresh_header = refresh_header.decode('iso-8859-1')
2054                     found = re.search(REDIRECT_REGEX, refresh_header)
2055             if found:
2056                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2057                 self.report_following_redirect(new_url)
2058                 return {
2059                     '_type': 'url',
2060                     'url': new_url,
2061                 }
2062         if not found:
2063             raise UnsupportedError(url)
2064
2065         entries = []
2066         for video_url in found:
2067             video_url = unescapeHTML(video_url)
2068             video_url = video_url.replace('\\/', '/')
2069             video_url = compat_urlparse.urljoin(url, video_url)
2070             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2071
2072             # Sometimes, jwplayer extraction will result in a YouTube URL
2073             if YoutubeIE.suitable(video_url):
2074                 entries.append(self.url_result(video_url, 'Youtube'))
2075                 continue
2076
2077             # here's a fun little line of code for you:
2078             video_id = os.path.splitext(video_id)[0]
2079
2080             entry_info_dict = {
2081                 'id': video_id,
2082                 'uploader': video_uploader,
2083                 'title': video_title,
2084                 'age_limit': age_limit,
2085             }
2086
2087             ext = determine_ext(video_url)
2088             if ext == 'smil':
2089                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2090             elif ext == 'xspf':
2091                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2092             elif ext == 'm3u8':
2093                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2094             elif ext == 'mpd':
2095                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2096             elif ext == 'f4m':
2097                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2098             else:
2099                 entry_info_dict['url'] = video_url
2100
2101             if entry_info_dict.get('formats'):
2102                 self._sort_formats(entry_info_dict['formats'])
2103
2104             entries.append(entry_info_dict)
2105
2106         if len(entries) == 1:
2107             return entries[0]
2108         else:
2109             for num, e in enumerate(entries, start=1):
2110                 # 'url' results don't have a title
2111                 if e.get('title') is not None:
2112                     e['title'] = '%s (%d)' % (e['title'], num)
2113             return {
2114                 '_type': 'playlist',
2115                 'entries': entries,
2116             }