youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse_unquote,
  12     compat_urllib_request,
  13     compat_urlparse,
  14     compat_xml_parse_error,
  15 )
  16 from ..utils import (
  17     determine_ext,
  18     ExtractorError,
  19     float_or_none,
  20     HEADRequest,
  21     is_html,
  22     orderedSet,
  23     parse_xml,
  24     smuggle_url,
  25     unescapeHTML,
  26     unified_strdate,
  27     unsmuggle_url,
  28     UnsupportedError,
  29     url_basename,
  30     xpath_text,
  31 )
  32 from .brightcove import BrightcoveIE
  33 from .nbc import NBCSportsVPlayerIE
  34 from .ooyala import OoyalaIE
  35 from .rutv import RUTVIE
  36 from .tvc import TVCIE
  37 from .sportbox import SportBoxEmbedIE
  38 from .smotri import SmotriIE
  39 from .myvi import MyviIE
  40 from .condenast import CondeNastIE
  41 from .udn import UDNEmbedIE
  42 from .senateisvp import SenateISVPIE
  43 from .bliptv import BlipTVIE
  44 from .svt import SVTIE
  45 from .pornhub import PornHubIE
  46 from .xhamster import XHamsterEmbedIE
  47 from .vimeo import VimeoIE
  48 from .dailymotion import DailymotionCloudIE
  49 from .onionstudios import OnionStudiosIE
  50 from .snagfilms import SnagFilmsEmbedIE
  51
  52
  53 class GenericIE(InfoExtractor):
  54     IE_DESC = 'Generic downloader that works on some sites'
  55     _VALID_URL = r'.*'
  56     IE_NAME = 'generic'
  57     _TESTS = [
  58         # Direct link to a video
  59         {
  60             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  61             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  62             'info_dict': {
  63                 'id': 'trailer',
  64                 'ext': 'mp4',
  65                 'title': 'trailer',
  66                 'upload_date': '20100513',
  67             }
  68         },
  69         # Direct link to media delivered compressed (until Accept-Encoding is *)
  70         {
  71             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  72             'md5': '128c42e68b13950268b648275386fc74',
  73             'info_dict': {
  74                 'id': 'FictionJunction-Parallel_Hearts',
  75                 'ext': 'flac',
  76                 'title': 'FictionJunction-Parallel_Hearts',
  77                 'upload_date': '20140522',
  78             },
  79             'expected_warnings': [
  80                 'URL could be a direct video link, returning it as such.'
  81             ]
  82         },
  83         # Direct download with broken HEAD
  84         {
  85             'url': 'http://ai-radio.org:8000/radio.opus',
  86             'info_dict': {
  87                 'id': 'radio',
  88                 'ext': 'opus',
  89                 'title': 'radio',
  90             },
  91             'params': {
  92                 'skip_download': True,  # infinite live stream
  93             },
  94             'expected_warnings': [
  95                 r'501.*Not Implemented'
  96             ],
  97         },
  98         # Direct link with incorrect MIME type
  99         {
 100             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 101             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 102             'info_dict': {
 103                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 104                 'id': '5_Lennart_Poettering_-_Systemd',
 105                 'ext': 'webm',
 106                 'title': '5_Lennart_Poettering_-_Systemd',
 107                 'upload_date': '20141120',
 108             },
 109             'expected_warnings': [
 110                 'URL could be a direct video link, returning it as such.'
 111             ]
 112         },
 113         # RSS feed
 114         {
 115             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 116             'info_dict': {
 117                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 118                 'title': 'Zero Punctuation',
 119                 'description': 're:.*groundbreaking video review series.*'
 120             },
 121             'playlist_mincount': 11,
 122         },
 123         # RSS feed with enclosure
 124         {
 125             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 126             'info_dict': {
 127                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 128                 'ext': 'm4v',
 129                 'upload_date': '20150228',
 130                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 131             }
 132         },
 133         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 134         {
 135             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 136             'info_dict': {
 137                 'id': 'smil',
 138                 'ext': 'mp4',
 139                 'title': 'Automatics, robotics and biocybernetics',
 140                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 141                 'formats': 'mincount:16',
 142                 'subtitles': 'mincount:1',
 143             },
 144             'params': {
 145                 'force_generic_extractor': True,
 146                 'skip_download': True,
 147             },
 148         },
 149         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 150         {
 151             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 152             'info_dict': {
 153                 'id': 'hds',
 154                 'ext': 'flv',
 155                 'title': 'hds',
 156                 'formats': 'mincount:1',
 157             },
 158             'params': {
 159                 'skip_download': True,
 160             },
 161         },
 162         # SMIL from https://www.restudy.dk/video/play/id/1637
 163         {
 164             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 165             'info_dict': {
 166                 'id': 'video_1637',
 167                 'ext': 'flv',
 168                 'title': 'video_1637',
 169                 'formats': 'mincount:3',
 170             },
 171             'params': {
 172                 'skip_download': True,
 173             },
 174         },
 175         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 176         {
 177             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 178             'info_dict': {
 179                 'id': 'smil-service',
 180                 'ext': 'flv',
 181                 'title': 'smil-service',
 182                 'formats': 'mincount:1',
 183             },
 184             'params': {
 185                 'skip_download': True,
 186             },
 187         },
 188         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 189         {
 190             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 191             'info_dict': {
 192                 'id': '4719370',
 193                 'ext': 'mp4',
 194                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 195                 'formats': 'mincount:3',
 196             },
 197             'params': {
 198                 'skip_download': True,
 199             },
 200         },
 201         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 202         {
 203             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 204             'info_dict': {
 205                 'id': 'mZlp2ctYIUEB',
 206                 'ext': 'mp4',
 207                 'title': 'Tikibad ontruimd wegens brand',
 208                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 209                 'thumbnail': 're:^https?://.*\.jpg$',
 210                 'duration': 33,
 211             },
 212             'params': {
 213                 'skip_download': True,
 214             },
 215         },
 216         # google redirect
 217         {
 218             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 219             'info_dict': {
 220                 'id': 'cmQHVoWB5FY',
 221                 'ext': 'mp4',
 222                 'upload_date': '20130224',
 223                 'uploader_id': 'TheVerge',
 224                 'description': 're:^Chris Ziegler takes a look at the\.*',
 225                 'uploader': 'The Verge',
 226                 'title': 'First Firefox OS phones side-by-side',
 227             },
 228             'params': {
 229                 'skip_download': False,
 230             }
 231         },
 232         {
 233             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 234             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 235             'info_dict': {
 236                 'id': '13601338388002',
 237                 'ext': 'mp4',
 238                 'uploader': 'www.hodiho.fr',
 239                 'title': 'R\u00e9gis plante sa Jeep',
 240             }
 241         },
 242         # bandcamp page with custom domain
 243         {
 244             'add_ie': ['Bandcamp'],
 245             'url': 'http://bronyrock.com/track/the-pony-mash',
 246             'info_dict': {
 247                 'id': '3235767654',
 248                 'ext': 'mp3',
 249                 'title': 'The Pony Mash',
 250                 'uploader': 'M_Pallante',
 251             },
 252             'skip': 'There is a limit of 200 free downloads / month for the test song',
 253         },
 254         # embedded brightcove video
 255         # it also tests brightcove videos that need to set the 'Referer' in the
 256         # http requests
 257         {
 258             'add_ie': ['Brightcove'],
 259             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 260             'info_dict': {
 261                 'id': '2765128793001',
 262                 'ext': 'mp4',
 263                 'title': 'Le cours de bourse : l’analyse technique',
 264                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 265                 'uploader': 'BFM BUSINESS',
 266             },
 267             'params': {
 268                 'skip_download': True,
 269             },
 270         },
 271         {
 272             # https://github.com/rg3/youtube-dl/issues/2253
 273             'url': 'http://bcove.me/i6nfkrc3',
 274             'md5': '0ba9446db037002366bab3b3eb30c88c',
 275             'info_dict': {
 276                 'id': '3101154703001',
 277                 'ext': 'mp4',
 278                 'title': 'Still no power',
 279                 'uploader': 'thestar.com',
 280                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 281             },
 282             'add_ie': ['Brightcove'],
 283         },
 284         {
 285             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 286             'md5': 'fb973ecf6e4a78a67453647444222983',
 287             'info_dict': {
 288                 'id': '3414141473001',
 289                 'ext': 'mp4',
 290                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 291                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 292                 'uploader': 'Championat',
 293             },
 294         },
 295         {
 296             # https://github.com/rg3/youtube-dl/issues/3541
 297             'add_ie': ['Brightcove'],
 298             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 299             'info_dict': {
 300                 'id': '3866516442001',
 301                 'ext': 'mp4',
 302                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 303                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 304                 'uploader': 'SBS Broadcasting',
 305             },
 306             'skip': 'Restricted to Netherlands',
 307             'params': {
 308                 'skip_download': True,  # m3u8 download
 309             },
 310         },
 311         # ooyala video
 312         {
 313             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 314             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 315             'info_dict': {
 316                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 317                 'ext': 'mp4',
 318                 'title': '2cc213299525360.mov',  # that's what we get
 319             },
 320             'add_ie': ['Ooyala'],
 321         },
 322         {
 323             # ooyala video embedded with http://player.ooyala.com/iframe.js
 324             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 325             'info_dict': {
 326                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 327                 'ext': 'mp4',
 328                 'title': '"Steve Jobs: Man in the Machine" trailer',
 329                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 330             },
 331             'params': {
 332                 'skip_download': True,
 333             },
 334         },
 335         # multiple ooyala embeds on SBN network websites
 336         {
 337             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 338             'info_dict': {
 339                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 340                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 341             },
 342             'playlist_mincount': 3,
 343             'params': {
 344                 'skip_download': True,
 345             },
 346             'add_ie': ['Ooyala'],
 347         },
 348         # embed.ly video
 349         {
 350             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 351             'info_dict': {
 352                 'id': '9ODmcdjQcHQ',
 353                 'ext': 'mp4',
 354                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 355                 'upload_date': '20140225',
 356                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 357                 'uploader': 'Tested',
 358                 'uploader_id': 'testedcom',
 359             },
 360             # No need to test YoutubeIE here
 361             'params': {
 362                 'skip_download': True,
 363             },
 364         },
 365         # funnyordie embed
 366         {
 367             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 368             'info_dict': {
 369                 'id': '18e820ec3f',
 370                 'ext': 'mp4',
 371                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 372                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 373             },
 374         },
 375         # RUTV embed
 376         {
 377             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 378             'info_dict': {
 379                 'id': '776940',
 380                 'ext': 'mp4',
 381                 'title': 'Охотское море стало целиком российским',
 382                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 383             },
 384             'params': {
 385                 # m3u8 download
 386                 'skip_download': True,
 387             },
 388         },
 389         # TVC embed
 390         {
 391             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 392             'info_dict': {
 393                 'id': '55304',
 394                 'ext': 'mp4',
 395                 'title': 'Дошкольное воспитание',
 396             },
 397         },
 398         # SportBox embed
 399         {
 400             'url': 'http://www.vestifinance.ru/articles/25753',
 401             'info_dict': {
 402                 'id': '25753',
 403                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 404             },
 405             'playlist': [{
 406                 'info_dict': {
 407                     'id': '370908',
 408                     'title': 'Госзаказ. День 3',
 409                     'ext': 'mp4',
 410                 }
 411             }, {
 412                 'info_dict': {
 413                     'id': '370905',
 414                     'title': 'Госзаказ. День 2',
 415                     'ext': 'mp4',
 416                 }
 417             }, {
 418                 'info_dict': {
 419                     'id': '370902',
 420                     'title': 'Госзаказ. День 1',
 421                     'ext': 'mp4',
 422                 }
 423             }],
 424             'params': {
 425                 # m3u8 download
 426                 'skip_download': True,
 427             },
 428         },
 429         # Myvi.ru embed
 430         {
 431             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 432             'info_dict': {
 433                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 434                 'ext': 'mp4',
 435                 'title': 'Ужастики, русский трейлер (2015)',
 436                 'thumbnail': 're:^https?://.*\.jpg$',
 437                 'duration': 153,
 438             }
 439         },
 440         # XHamster embed
 441         {
 442             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 443             'info_dict': {
 444                 'id': 'showthread',
 445                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 446             },
 447             'playlist_mincount': 7,
 448         },
 449         # Embedded TED video
 450         {
 451             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 452             'md5': '65fdff94098e4a607385a60c5177c638',
 453             'info_dict': {
 454                 'id': '1969',
 455                 'ext': 'mp4',
 456                 'title': 'Hidden miracles of the natural world',
 457                 'uploader': 'Louie Schwartzberg',
 458                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 459             }
 460         },
 461         # Embeded Ustream video
 462         {
 463             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 464             'md5': '27b99cdb639c9b12a79bca876a073417',
 465             'info_dict': {
 466                 'id': '45734260',
 467                 'ext': 'flv',
 468                 'uploader': 'AU SPA:  The NSA and Privacy',
 469                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 470             }
 471         },
 472         # nowvideo embed hidden behind percent encoding
 473         {
 474             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 475             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 476             'info_dict': {
 477                 'id': '06e53103ca9aa',
 478                 'ext': 'flv',
 479                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 480                 'description': 'No description',
 481             },
 482         },
 483         # arte embed
 484         {
 485             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 486             'md5': '7653032cbb25bf6c80d80f217055fa43',
 487             'info_dict': {
 488                 'id': '048195-004_PLUS7-F',
 489                 'ext': 'flv',
 490                 'title': 'X:enius',
 491                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 492                 'upload_date': '20140320',
 493             },
 494             'params': {
 495                 'skip_download': 'Requires rtmpdump'
 496             }
 497         },
 498         # francetv embed
 499         {
 500             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 501             'info_dict': {
 502                 'id': 'EV_30231',
 503                 'ext': 'mp4',
 504                 'title': 'Alcaline, le concert avec Calogero',
 505                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 506                 'upload_date': '20150226',
 507                 'timestamp': 1424989860,
 508                 'duration': 5400,
 509             },
 510             'params': {
 511                 # m3u8 downloads
 512                 'skip_download': True,
 513             },
 514             'expected_warnings': [
 515                 'Forbidden'
 516             ]
 517         },
 518         # Condé Nast embed
 519         {
 520             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 521             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 522             'info_dict': {
 523                 'id': '53501be369702d3275860000',
 524                 'ext': 'mp4',
 525                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 526             }
 527         },
 528         # Dailymotion embed
 529         {
 530             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 531             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 532             'info_dict': {
 533                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 534                 'ext': 'mp4',
 535                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 536                 'uploader': 'Spi0n',
 537             },
 538             'add_ie': ['Dailymotion'],
 539         },
 540         # YouTube embed
 541         {
 542             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 543             'info_dict': {
 544                 'id': 'FXRb4ykk4S0',
 545                 'ext': 'mp4',
 546                 'title': 'The NBL Auction 2014',
 547                 'uploader': 'BADMINTON England',
 548                 'uploader_id': 'BADMINTONEvents',
 549                 'upload_date': '20140603',
 550                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 551             },
 552             'add_ie': ['Youtube'],
 553             'params': {
 554                 'skip_download': True,
 555             }
 556         },
 557         # MTVSercices embed
 558         {
 559             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 560             'md5': '35727f82f58c76d996fc188f9755b0d5',
 561             'info_dict': {
 562                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 563                 'ext': 'mp4',
 564                 'title': 'Review',
 565                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 566             },
 567         },
 568         # YouTube embed via <data-embed-url="">
 569         {
 570             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 571             'info_dict': {
 572                 'id': '4vAffPZIT44',
 573                 'ext': 'mp4',
 574                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 575                 'uploader': 'Gameloft',
 576                 'uploader_id': 'gameloft',
 577                 'upload_date': '20140828',
 578                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 579             },
 580             'params': {
 581                 'skip_download': True,
 582             }
 583         },
 584         # Camtasia studio
 585         {
 586             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 587             'playlist': [{
 588                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 589                 'info_dict': {
 590                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 591                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 592                     'ext': 'flv',
 593                     'duration': 2235.90,
 594                 }
 595             }, {
 596                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 597                 'info_dict': {
 598                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 599                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 600                     'ext': 'flv',
 601                     'duration': 2235.93,
 602                 }
 603             }],
 604             'info_dict': {
 605                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 606             }
 607         },
 608         # Flowplayer
 609         {
 610             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 611             'md5': '9d65602bf31c6e20014319c7d07fba27',
 612             'info_dict': {
 613                 'id': '5123ea6d5e5a7',
 614                 'ext': 'mp4',
 615                 'age_limit': 18,
 616                 'uploader': 'www.handjobhub.com',
 617                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 618             }
 619         },
 620         # Multiple brightcove videos
 621         # https://github.com/rg3/youtube-dl/issues/2283
 622         {
 623             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 624             'info_dict': {
 625                 'id': 'always-never',
 626                 'title': 'Always / Never - The New Yorker',
 627             },
 628             'playlist_count': 3,
 629             'params': {
 630                 'extract_flat': False,
 631                 'skip_download': True,
 632             }
 633         },
 634         # MLB embed
 635         {
 636             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 637             'md5': '96f09a37e44da40dd083e12d9a683327',
 638             'info_dict': {
 639                 'id': '33322633',
 640                 'ext': 'mp4',
 641                 'title': 'Ump changes call to ball',
 642                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 643                 'duration': 48,
 644                 'timestamp': 1401537900,
 645                 'upload_date': '20140531',
 646                 'thumbnail': 're:^https?://.*\.jpg$',
 647             },
 648         },
 649         # Wistia embed
 650         {
 651             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 652             'md5': '8788b683c777a5cf25621eaf286d0c23',
 653             'info_dict': {
 654                 'id': '1cfaf6b7ea',
 655                 'ext': 'mov',
 656                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 657                 'duration': 643.0,
 658                 'filesize': 182808282,
 659                 'uploader': 'education-portal.com',
 660             },
 661         },
 662         {
 663             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 664             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 665             'info_dict': {
 666                 'id': 'uxjb0lwrcz',
 667                 'ext': 'mp4',
 668                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 669                 'duration': 1715.0,
 670                 'uploader': 'thoughtworks.wistia.com',
 671             },
 672         },
 673         # Soundcloud embed
 674         {
 675             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 676             'info_dict': {
 677                 'id': '174391317',
 678                 'ext': 'mp3',
 679                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 680                 'uploader': 'Sophos Security',
 681                 'title': 'Chet Chat 171 - Oct 29, 2014',
 682                 'upload_date': '20141029',
 683             }
 684         },
 685         # Livestream embed
 686         {
 687             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 688             'info_dict': {
 689                 'id': '67864563',
 690                 'ext': 'flv',
 691                 'upload_date': '20141112',
 692                 'title': 'Rosetta #CometLanding webcast HL 10',
 693             }
 694         },
 695         # LazyYT
 696         {
 697             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 698             'info_dict': {
 699                 'id': '1986',
 700                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 701             },
 702             'playlist_mincount': 2,
 703         },
 704         # Cinchcast embed
 705         {
 706             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 707             'info_dict': {
 708                 'id': '7141703',
 709                 'ext': 'mp3',
 710                 'upload_date': '20141126',
 711                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 712             }
 713         },
 714         # Cinerama player
 715         {
 716             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 717             'info_dict': {
 718                 'id': '730m_DandD_1901_512k',
 719                 'ext': 'mp4',
 720                 'uploader': 'www.abc.net.au',
 721                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 722             }
 723         },
 724         # embedded viddler video
 725         {
 726             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 727             'info_dict': {
 728                 'id': '4d03aad9',
 729                 'ext': 'mp4',
 730                 'uploader': 'deadspin',
 731                 'title': 'WALL-TO-GORTAT',
 732                 'timestamp': 1422285291,
 733                 'upload_date': '20150126',
 734             },
 735             'add_ie': ['Viddler'],
 736         },
 737         # Libsyn embed
 738         {
 739             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 740             'info_dict': {
 741                 'id': '3377616',
 742                 'ext': 'mp3',
 743                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 744                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 745                 'upload_date': '20150220',
 746             },
 747         },
 748         # jwplayer YouTube
 749         {
 750             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 751             'info_dict': {
 752                 'id': 'Mrj4DVp2zeA',
 753                 'ext': 'mp4',
 754                 'upload_date': '20150212',
 755                 'uploader': 'The National Archives UK',
 756                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 757                 'uploader_id': 'NationalArchives08',
 758                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 759             },
 760         },
 761         # rtl.nl embed
 762         {
 763             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 764             'playlist_mincount': 5,
 765             'info_dict': {
 766                 'id': 'aanslagen-kopenhagen',
 767                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 768             }
 769         },
 770         # Zapiks embed
 771         {
 772             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 773             'info_dict': {
 774                 'id': '118046',
 775                 'ext': 'mp4',
 776                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 777             }
 778         },
 779         # Kaltura embed
 780         {
 781             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 782             'info_dict': {
 783                 'id': '1_eergr3h1',
 784                 'ext': 'mp4',
 785                 'upload_date': '20150226',
 786                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 787                 'timestamp': int,
 788                 'title': 'John Carlson Postgame 2/25/15',
 789             },
 790         },
 791         # Kaltura embed (different embed code)
 792         {
 793             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 794             'info_dict': {
 795                 'id': '1_a52wc67y',
 796                 'ext': 'flv',
 797                 'upload_date': '20150127',
 798                 'uploader_id': 'PremierMedia',
 799                 'timestamp': int,
 800                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 801             },
 802         },
 803         # Eagle.Platform embed (generic URL)
 804         {
 805             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 806             'info_dict': {
 807                 'id': '227304',
 808                 'ext': 'mp4',
 809                 'title': 'Навальный вышел на свободу',
 810                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 811                 'thumbnail': 're:^https?://.*\.jpg$',
 812                 'duration': 87,
 813                 'view_count': int,
 814                 'age_limit': 0,
 815             },
 816         },
 817         # ClipYou (Eagle.Platform) embed (custom URL)
 818         {
 819             'url': 'http://muz-tv.ru/play/7129/',
 820             'info_dict': {
 821                 'id': '12820',
 822                 'ext': 'mp4',
 823                 'title': "'O Sole Mio",
 824                 'thumbnail': 're:^https?://.*\.jpg$',
 825                 'duration': 216,
 826                 'view_count': int,
 827             },
 828         },
 829         # Pladform embed
 830         {
 831             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 832             'info_dict': {
 833                 'id': '100183293',
 834                 'ext': 'mp4',
 835                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 836                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 837                 'thumbnail': 're:^https?://.*\.jpg$',
 838                 'duration': 694,
 839                 'age_limit': 0,
 840             },
 841         },
 842         # Playwire embed
 843         {
 844             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 845             'info_dict': {
 846                 'id': '3519514',
 847                 'ext': 'mp4',
 848                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 849                 'thumbnail': 're:^https?://.*\.png$',
 850                 'duration': 45.115,
 851             },
 852         },
 853         # 5min embed
 854         {
 855             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 856             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 857             'info_dict': {
 858                 'id': '518726732',
 859                 'ext': 'mp4',
 860                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 861             },
 862         },
 863         # SVT embed
 864         {
 865             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 866             'info_dict': {
 867                 'id': '2900353',
 868                 'ext': 'flv',
 869                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 870                 'duration': 27,
 871                 'age_limit': 0,
 872             },
 873         },
 874         # Crooks and Liars embed
 875         {
 876             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 877             'info_dict': {
 878                 'id': '8RUoRhRi',
 879                 'ext': 'mp4',
 880                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 881                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 882                 'timestamp': 1428207000,
 883                 'upload_date': '20150405',
 884                 'uploader': 'Heather',
 885             },
 886         },
 887         # Crooks and Liars external embed
 888         {
 889             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 890             'info_dict': {
 891                 'id': 'MTE3MjUtMzQ2MzA',
 892                 'ext': 'mp4',
 893                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 894                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 895                 'timestamp': 1265032391,
 896                 'upload_date': '20100201',
 897                 'uploader': 'Heather',
 898             },
 899         },
 900         # NBC Sports vplayer embed
 901         {
 902             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 903             'info_dict': {
 904                 'id': 'ln7x1qSThw4k',
 905                 'ext': 'flv',
 906                 'title': "PFT Live: New leader in the 'new-look' defense",
 907                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 908             },
 909         },
 910         # UDN embed
 911         {
 912             'url': 'http://www.udn.com/news/story/7314/822787',
 913             'md5': 'fd2060e988c326991037b9aff9df21a6',
 914             'info_dict': {
 915                 'id': '300346',
 916                 'ext': 'mp4',
 917                 'title': '中一中男師變性 全校師生力挺',
 918                 'thumbnail': 're:^https?://.*\.jpg$',
 919             }
 920         },
 921         # Ooyala embed
 922         {
 923             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 924             'info_dict': {
 925                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 926                 'ext': 'mp4',
 927                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 928                 'title': 'This is what separates the Excel masters from the wannabes',
 929             },
 930             'params': {
 931                 # m3u8 downloads
 932                 'skip_download': True,
 933             }
 934         },
 935         # Contains a SMIL manifest
 936         {
 937             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 938             'info_dict': {
 939                 'id': 'file',
 940                 'ext': 'flv',
 941                 'title': '+ Football: Lottery Champions League Europe',
 942                 'uploader': 'www.telewebion.com',
 943             },
 944             'params': {
 945                 # rtmpe downloads
 946                 'skip_download': True,
 947             }
 948         },
 949         # Brightcove URL in single quotes
 950         {
 951             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 952             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 953             'info_dict': {
 954                 'id': '4255764656001',
 955                 'ext': 'mp4',
 956                 'title': 'SN Presents: Russell Martin, World Citizen',
 957                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 958                 'uploader': 'Rogers Sportsnet',
 959             },
 960         },
 961         # Dailymotion Cloud video
 962         {
 963             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
 964             'md5': '49444254273501a64675a7e68c502681',
 965             'info_dict': {
 966                 'id': '5585de919473990de4bee11b',
 967                 'ext': 'mp4',
 968                 'title': 'Le débat',
 969                 'thumbnail': 're:^https?://.*\.jpe?g$',
 970             }
 971         },
 972         # OnionStudios embed
 973         {
 974             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 975             'info_dict': {
 976                 'id': '2855',
 977                 'ext': 'mp4',
 978                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 979                 'thumbnail': 're:^https?://.*\.jpe?g$',
 980                 'uploader': 'ClickHole',
 981                 'uploader_id': 'clickhole',
 982             }
 983         },
 984         # SnagFilms embed
 985         {
 986             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
 987             'info_dict': {
 988                 'id': '74849a00-85a9-11e1-9660-123139220831',
 989                 'ext': 'mp4',
 990                 'title': '#whilewewatch',
 991             }
 992         },
 993         # AdobeTVVideo embed
 994         {
 995             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
 996             'md5': '43662b577c018ad707a63766462b1e87',
 997             'info_dict': {
 998                 'id': '2456',
 999                 'ext': 'mp4',
1000                 'title': 'New experience with Acrobat DC',
1001                 'description': 'New experience with Acrobat DC',
1002                 'duration': 248.667,
1003             },
1004         }
1005     ]
1006
1007     def report_following_redirect(self, new_url):
1008         """Report information extraction."""
1009         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1010
1011     def _extract_rss(self, url, video_id, doc):
1012         playlist_title = doc.find('./channel/title').text
1013         playlist_desc_el = doc.find('./channel/description')
1014         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1015
1016         entries = []
1017         for it in doc.findall('./channel/item'):
1018             next_url = xpath_text(it, 'link', fatal=False)
1019             if not next_url:
1020                 enclosure_nodes = it.findall('./enclosure')
1021                 for e in enclosure_nodes:
1022                     next_url = e.attrib.get('url')
1023                     if next_url:
1024                         break
1025
1026             if not next_url:
1027                 continue
1028
1029             entries.append({
1030                 '_type': 'url',
1031                 'url': next_url,
1032                 'title': it.find('title').text,
1033             })
1034
1035         return {
1036             '_type': 'playlist',
1037             'id': url,
1038             'title': playlist_title,
1039             'description': playlist_desc,
1040             'entries': entries,
1041         }
1042
1043     def _extract_camtasia(self, url, video_id, webpage):
1044         """ Returns None if no camtasia video can be found. """
1045
1046         camtasia_cfg = self._search_regex(
1047             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1048             webpage, 'camtasia configuration file', default=None)
1049         if camtasia_cfg is None:
1050             return None
1051
1052         title = self._html_search_meta('DC.title', webpage, fatal=True)
1053
1054         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1055         camtasia_cfg = self._download_xml(
1056             camtasia_url, video_id,
1057             note='Downloading camtasia configuration',
1058             errnote='Failed to download camtasia configuration')
1059         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1060
1061         entries = []
1062         for n in fileset_node.getchildren():
1063             url_n = n.find('./uri')
1064             if url_n is None:
1065                 continue
1066
1067             entries.append({
1068                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1069                 'title': '%s - %s' % (title, n.tag),
1070                 'url': compat_urlparse.urljoin(url, url_n.text),
1071                 'duration': float_or_none(n.find('./duration').text),
1072             })
1073
1074         return {
1075             '_type': 'playlist',
1076             'entries': entries,
1077             'title': title,
1078         }
1079
1080     def _real_extract(self, url):
1081         if url.startswith('//'):
1082             return {
1083                 '_type': 'url',
1084                 'url': self.http_scheme() + url,
1085             }
1086
1087         parsed_url = compat_urlparse.urlparse(url)
1088         if not parsed_url.scheme:
1089             default_search = self._downloader.params.get('default_search')
1090             if default_search is None:
1091                 default_search = 'fixup_error'
1092
1093             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1094                 if '/' in url:
1095                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1096                     return self.url_result('http://' + url)
1097                 elif default_search != 'fixup_error':
1098                     if default_search == 'auto_warning':
1099                         if re.match(r'^(?:url|URL)$', url):
1100                             raise ExtractorError(
1101                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1102                                 expected=True)
1103                         else:
1104                             self._downloader.report_warning(
1105                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1106                     return self.url_result('ytsearch:' + url)
1107
1108             if default_search in ('error', 'fixup_error'):
1109                 raise ExtractorError(
1110                     '%r is not a valid URL. '
1111                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1112                     % (url, url), expected=True)
1113             else:
1114                 if ':' not in default_search:
1115                     default_search += ':'
1116                 return self.url_result(default_search + url)
1117
1118         url, smuggled_data = unsmuggle_url(url)
1119         force_videoid = None
1120         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1121         if smuggled_data and 'force_videoid' in smuggled_data:
1122             force_videoid = smuggled_data['force_videoid']
1123             video_id = force_videoid
1124         else:
1125             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1126
1127         self.to_screen('%s: Requesting header' % video_id)
1128
1129         head_req = HEADRequest(url)
1130         head_response = self._request_webpage(
1131             head_req, video_id,
1132             note=False, errnote='Could not send HEAD request to %s' % url,
1133             fatal=False)
1134
1135         if head_response is not False:
1136             # Check for redirect
1137             new_url = head_response.geturl()
1138             if url != new_url:
1139                 self.report_following_redirect(new_url)
1140                 if force_videoid:
1141                     new_url = smuggle_url(
1142                         new_url, {'force_videoid': force_videoid})
1143                 return self.url_result(new_url)
1144
1145         full_response = None
1146         if head_response is False:
1147             request = compat_urllib_request.Request(url)
1148             request.add_header('Accept-Encoding', '*')
1149             full_response = self._request_webpage(request, video_id)
1150             head_response = full_response
1151
1152         # Check for direct link to a video
1153         content_type = head_response.headers.get('Content-Type', '')
1154         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1155         if m:
1156             upload_date = unified_strdate(
1157                 head_response.headers.get('Last-Modified'))
1158             return {
1159                 'id': video_id,
1160                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1161                 'direct': True,
1162                 'formats': [{
1163                     'format_id': m.group('format_id'),
1164                     'url': url,
1165                     'vcodec': 'none' if m.group('type') == 'audio' else None
1166                 }],
1167                 'upload_date': upload_date,
1168             }
1169
1170         if not self._downloader.params.get('test', False) and not is_intentional:
1171             force = self._downloader.params.get('force_generic_extractor', False)
1172             self._downloader.report_warning(
1173                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1174
1175         if not full_response:
1176             request = compat_urllib_request.Request(url)
1177             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1178             # making it impossible to download only chunk of the file (yet we need only 512kB to
1179             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1180             # that will always result in downloading the whole file that is not desirable.
1181             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1182             # to accept raw bytes and being able to download only a chunk.
1183             # It may probably better to solve this by checking Content-Type for application/octet-stream
1184             # after HEAD request finishes, but not sure if we can rely on this.
1185             request.add_header('Accept-Encoding', '*')
1186             full_response = self._request_webpage(request, video_id)
1187
1188         # Maybe it's a direct link to a video?
1189         # Be careful not to download the whole thing!
1190         first_bytes = full_response.read(512)
1191         if not is_html(first_bytes):
1192             self._downloader.report_warning(
1193                 'URL could be a direct video link, returning it as such.')
1194             upload_date = unified_strdate(
1195                 head_response.headers.get('Last-Modified'))
1196             return {
1197                 'id': video_id,
1198                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1199                 'direct': True,
1200                 'url': url,
1201                 'upload_date': upload_date,
1202             }
1203
1204         webpage = self._webpage_read_content(
1205             full_response, url, video_id, prefix=first_bytes)
1206
1207         self.report_extraction(video_id)
1208
1209         # Is it an RSS feed or a SMIL file?
1210         try:
1211             doc = parse_xml(webpage)
1212             if doc.tag == 'rss':
1213                 return self._extract_rss(url, video_id, doc)
1214             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1215                 return self._parse_smil(doc, url, video_id)
1216             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1217                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1218         except compat_xml_parse_error:
1219             pass
1220
1221         # Is it a Camtasia project?
1222         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1223         if camtasia_res is not None:
1224             return camtasia_res
1225
1226         # Sometimes embedded video player is hidden behind percent encoding
1227         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1228         # Unescaping the whole page allows to handle those cases in a generic way
1229         webpage = compat_urllib_parse_unquote(webpage)
1230
1231         # it's tempting to parse this further, but you would
1232         # have to take into account all the variations like
1233         #   Video Title - Site Name
1234         #   Site Name | Video Title
1235         #   Video Title - Tagline | Site Name
1236         # and so on and so forth; it's just not practical
1237         video_title = self._html_search_regex(
1238             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1239             default='video')
1240
1241         # Try to detect age limit automatically
1242         age_limit = self._rta_search(webpage)
1243         # And then there are the jokers who advertise that they use RTA,
1244         # but actually don't.
1245         AGE_LIMIT_MARKERS = [
1246             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1247         ]
1248         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1249             age_limit = 18
1250
1251         # video uploader is domain name
1252         video_uploader = self._search_regex(
1253             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1254
1255         # Helper method
1256         def _playlist_from_matches(matches, getter=None, ie=None):
1257             urlrs = orderedSet(
1258                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1259                 for m in matches)
1260             return self.playlist_result(
1261                 urlrs, playlist_id=video_id, playlist_title=video_title)
1262
1263         # Look for BrightCove:
1264         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1265         if bc_urls:
1266             self.to_screen('Brightcove video detected.')
1267             entries = [{
1268                 '_type': 'url',
1269                 'url': smuggle_url(bc_url, {'Referer': url}),
1270                 'ie_key': 'Brightcove'
1271             } for bc_url in bc_urls]
1272
1273             return {
1274                 '_type': 'playlist',
1275                 'title': video_title,
1276                 'id': video_id,
1277                 'entries': entries,
1278             }
1279
1280         # Look for embedded rtl.nl player
1281         matches = re.findall(
1282             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1283             webpage)
1284         if matches:
1285             return _playlist_from_matches(matches, ie='RtlNl')
1286
1287         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1288         if vimeo_url is not None:
1289             return self.url_result(vimeo_url)
1290
1291         vid_me_embed_url = self._search_regex(
1292             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1293             webpage, 'vid.me embed', default=None)
1294         if vid_me_embed_url is not None:
1295             return self.url_result(vid_me_embed_url, 'Vidme')
1296
1297         # Look for embedded YouTube player
1298         matches = re.findall(r'''(?x)
1299             (?:
1300                 <iframe[^>]+?src=|
1301                 data-video-url=|
1302                 <embed[^>]+?src=|
1303                 embedSWF\(?:\s*|
1304                 new\s+SWFObject\(
1305             )
1306             (["\'])
1307                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1308                 (?:embed|v|p)/.+?)
1309             \1''', webpage)
1310         if matches:
1311             return _playlist_from_matches(
1312                 matches, lambda m: unescapeHTML(m[1]))
1313
1314         # Look for lazyYT YouTube embed
1315         matches = re.findall(
1316             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1317         if matches:
1318             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1319
1320         # Look for embedded Dailymotion player
1321         matches = re.findall(
1322             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1323         if matches:
1324             return _playlist_from_matches(
1325                 matches, lambda m: unescapeHTML(m[1]))
1326
1327         # Look for embedded Dailymotion playlist player (#3822)
1328         m = re.search(
1329             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1330         if m:
1331             playlists = re.findall(
1332                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1333             if playlists:
1334                 return _playlist_from_matches(
1335                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1336
1337         # Look for embedded Wistia player
1338         match = re.search(
1339             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1340         if match:
1341             embed_url = self._proto_relative_url(
1342                 unescapeHTML(match.group('url')))
1343             return {
1344                 '_type': 'url_transparent',
1345                 'url': embed_url,
1346                 'ie_key': 'Wistia',
1347                 'uploader': video_uploader,
1348                 'title': video_title,
1349                 'id': video_id,
1350             }
1351
1352         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1353         if match:
1354             return {
1355                 '_type': 'url_transparent',
1356                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1357                 'ie_key': 'Wistia',
1358                 'uploader': video_uploader,
1359                 'title': video_title,
1360                 'id': match.group('id')
1361             }
1362
1363         # Look for embedded blip.tv player
1364         bliptv_url = BlipTVIE._extract_url(webpage)
1365         if bliptv_url:
1366             return self.url_result(bliptv_url, 'BlipTV')
1367
1368         # Look for SVT player
1369         svt_url = SVTIE._extract_url(webpage)
1370         if svt_url:
1371             return self.url_result(svt_url, 'SVT')
1372
1373         # Look for embedded condenast player
1374         matches = re.findall(
1375             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1376             webpage)
1377         if matches:
1378             return {
1379                 '_type': 'playlist',
1380                 'entries': [{
1381                     '_type': 'url',
1382                     'ie_key': 'CondeNast',
1383                     'url': ma,
1384                 } for ma in matches],
1385                 'title': video_title,
1386                 'id': video_id,
1387             }
1388
1389         # Look for Bandcamp pages with custom domain
1390         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1391         if mobj is not None:
1392             burl = unescapeHTML(mobj.group(1))
1393             # Don't set the extractor because it can be a track url or an album
1394             return self.url_result(burl)
1395
1396         # Look for embedded Vevo player
1397         mobj = re.search(
1398             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1399         if mobj is not None:
1400             return self.url_result(mobj.group('url'))
1401
1402         # Look for embedded Viddler player
1403         mobj = re.search(
1404             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1405             webpage)
1406         if mobj is not None:
1407             return self.url_result(mobj.group('url'))
1408
1409         # Look for NYTimes player
1410         mobj = re.search(
1411             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1412             webpage)
1413         if mobj is not None:
1414             return self.url_result(mobj.group('url'))
1415
1416         # Look for Libsyn player
1417         mobj = re.search(
1418             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1419         if mobj is not None:
1420             return self.url_result(mobj.group('url'))
1421
1422         # Look for Ooyala videos
1423         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1424                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1425                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1426                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1427         if mobj is not None:
1428             return OoyalaIE._build_url_result(mobj.group('ec'))
1429
1430         # Look for multiple Ooyala embeds on SBN network websites
1431         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1432         if mobj is not None:
1433             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1434             if embeds:
1435                 return _playlist_from_matches(
1436                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1437
1438         # Look for Aparat videos
1439         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1440         if mobj is not None:
1441             return self.url_result(mobj.group(1), 'Aparat')
1442
1443         # Look for MPORA videos
1444         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1445         if mobj is not None:
1446             return self.url_result(mobj.group(1), 'Mpora')
1447
1448         # Look for embedded NovaMov-based player
1449         mobj = re.search(
1450             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1451                     (?P<url>http://(?:(?:embed|www)\.)?
1452                         (?:novamov\.com|
1453                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1454                            videoweed\.(?:es|com)|
1455                            movshare\.(?:net|sx|ag)|
1456                            divxstage\.(?:eu|net|ch|co|at|ag))
1457                         /embed\.php.+?)\1''', webpage)
1458         if mobj is not None:
1459             return self.url_result(mobj.group('url'))
1460
1461         # Look for embedded Facebook player
1462         mobj = re.search(
1463             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1464         if mobj is not None:
1465             return self.url_result(mobj.group('url'), 'Facebook')
1466
1467         # Look for embedded VK player
1468         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1469         if mobj is not None:
1470             return self.url_result(mobj.group('url'), 'VK')
1471
1472         # Look for embedded ivi player
1473         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1474         if mobj is not None:
1475             return self.url_result(mobj.group('url'), 'Ivi')
1476
1477         # Look for embedded Huffington Post player
1478         mobj = re.search(
1479             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1480         if mobj is not None:
1481             return self.url_result(mobj.group('url'), 'HuffPost')
1482
1483         # Look for embed.ly
1484         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1485         if mobj is not None:
1486             return self.url_result(mobj.group('url'))
1487         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1488         if mobj is not None:
1489             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1490
1491         # Look for funnyordie embed
1492         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1493         if matches:
1494             return _playlist_from_matches(
1495                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1496
1497         # Look for BBC iPlayer embed
1498         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1499         if matches:
1500             return _playlist_from_matches(matches, ie='BBCCoUk')
1501
1502         # Look for embedded RUTV player
1503         rutv_url = RUTVIE._extract_url(webpage)
1504         if rutv_url:
1505             return self.url_result(rutv_url, 'RUTV')
1506
1507         # Look for embedded TVC player
1508         tvc_url = TVCIE._extract_url(webpage)
1509         if tvc_url:
1510             return self.url_result(tvc_url, 'TVC')
1511
1512         # Look for embedded SportBox player
1513         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1514         if sportbox_urls:
1515             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1516
1517         # Look for embedded PornHub player
1518         pornhub_url = PornHubIE._extract_url(webpage)
1519         if pornhub_url:
1520             return self.url_result(pornhub_url, 'PornHub')
1521
1522         # Look for embedded XHamster player
1523         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1524         if xhamster_urls:
1525             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1526
1527         # Look for embedded Tvigle player
1528         mobj = re.search(
1529             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1530         if mobj is not None:
1531             return self.url_result(mobj.group('url'), 'Tvigle')
1532
1533         # Look for embedded TED player
1534         mobj = re.search(
1535             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1536         if mobj is not None:
1537             return self.url_result(mobj.group('url'), 'TED')
1538
1539         # Look for embedded Ustream videos
1540         mobj = re.search(
1541             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1542         if mobj is not None:
1543             return self.url_result(mobj.group('url'), 'Ustream')
1544
1545         # Look for embedded arte.tv player
1546         mobj = re.search(
1547             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1548             webpage)
1549         if mobj is not None:
1550             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1551
1552         # Look for embedded francetv player
1553         mobj = re.search(
1554             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1555             webpage)
1556         if mobj is not None:
1557             return self.url_result(mobj.group('url'))
1558
1559         # Look for embedded smotri.com player
1560         smotri_url = SmotriIE._extract_url(webpage)
1561         if smotri_url:
1562             return self.url_result(smotri_url, 'Smotri')
1563
1564         # Look for embedded Myvi.ru player
1565         myvi_url = MyviIE._extract_url(webpage)
1566         if myvi_url:
1567             return self.url_result(myvi_url)
1568
1569         # Look for embeded soundcloud player
1570         mobj = re.search(
1571             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1572             webpage)
1573         if mobj is not None:
1574             url = unescapeHTML(mobj.group('url'))
1575             return self.url_result(url)
1576
1577         # Look for embedded vulture.com player
1578         mobj = re.search(
1579             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1580             webpage)
1581         if mobj is not None:
1582             url = unescapeHTML(mobj.group('url'))
1583             return self.url_result(url, ie='Vulture')
1584
1585         # Look for embedded mtvservices player
1586         mobj = re.search(
1587             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1588             webpage)
1589         if mobj is not None:
1590             url = unescapeHTML(mobj.group('url'))
1591             return self.url_result(url, ie='MTVServicesEmbedded')
1592
1593         # Look for embedded yahoo player
1594         mobj = re.search(
1595             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1596             webpage)
1597         if mobj is not None:
1598             return self.url_result(mobj.group('url'), 'Yahoo')
1599
1600         # Look for embedded sbs.com.au player
1601         mobj = re.search(
1602             r'''(?x)
1603             (?:
1604                 <meta\s+property="og:video"\s+content=|
1605                 <iframe[^>]+?src=
1606             )
1607             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1608             webpage)
1609         if mobj is not None:
1610             return self.url_result(mobj.group('url'), 'SBS')
1611
1612         # Look for embedded Cinchcast player
1613         mobj = re.search(
1614             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1615             webpage)
1616         if mobj is not None:
1617             return self.url_result(mobj.group('url'), 'Cinchcast')
1618
1619         mobj = re.search(
1620             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1621             webpage)
1622         if not mobj:
1623             mobj = re.search(
1624                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1625                 webpage)
1626         if mobj is not None:
1627             return self.url_result(mobj.group('url'), 'MLB')
1628
1629         mobj = re.search(
1630             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1631             webpage)
1632         if mobj is not None:
1633             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1634
1635         mobj = re.search(
1636             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1637             webpage)
1638         if mobj is not None:
1639             return self.url_result(mobj.group('url'), 'Livestream')
1640
1641         # Look for Zapiks embed
1642         mobj = re.search(
1643             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1644         if mobj is not None:
1645             return self.url_result(mobj.group('url'), 'Zapiks')
1646
1647         # Look for Kaltura embeds
1648         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1649                 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
1650         if mobj is not None:
1651             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1652
1653         # Look for Eagle.Platform embeds
1654         mobj = re.search(
1655             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1656         if mobj is not None:
1657             return self.url_result(mobj.group('url'), 'EaglePlatform')
1658
1659         # Look for ClipYou (uses Eagle.Platform) embeds
1660         mobj = re.search(
1661             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1662         if mobj is not None:
1663             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1664
1665         # Look for Pladform embeds
1666         mobj = re.search(
1667             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1668         if mobj is not None:
1669             return self.url_result(mobj.group('url'), 'Pladform')
1670
1671         # Look for Playwire embeds
1672         mobj = re.search(
1673             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1674         if mobj is not None:
1675             return self.url_result(mobj.group('url'))
1676
1677         # Look for 5min embeds
1678         mobj = re.search(
1679             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1680         if mobj is not None:
1681             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1682
1683         # Look for Crooks and Liars embeds
1684         mobj = re.search(
1685             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1686         if mobj is not None:
1687             return self.url_result(mobj.group('url'))
1688
1689         # Look for NBC Sports VPlayer embeds
1690         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1691         if nbc_sports_url:
1692             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1693
1694         # Look for UDN embeds
1695         mobj = re.search(
1696             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1697         if mobj is not None:
1698             return self.url_result(
1699                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1700
1701         # Look for Senate ISVP iframe
1702         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1703         if senate_isvp_url:
1704             return self.url_result(senate_isvp_url, 'SenateISVP')
1705
1706         # Look for Dailymotion Cloud videos
1707         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1708         if dmcloud_url:
1709             return self.url_result(dmcloud_url, 'DailymotionCloud')
1710
1711         # Look for OnionStudios embeds
1712         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1713         if onionstudios_url:
1714             return self.url_result(onionstudios_url)
1715
1716         # Look for SnagFilms embeds
1717         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1718         if snagfilms_url:
1719             return self.url_result(snagfilms_url)
1720
1721         # Look for AdobeTVVideo embeds
1722         mobj = re.search(
1723             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1724             webpage)
1725         if mobj is not None:
1726             return self.url_result(
1727                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1728                 'AdobeTVVideo')
1729
1730         def check_video(vurl):
1731             if YoutubeIE.suitable(vurl):
1732                 return True
1733             vpath = compat_urlparse.urlparse(vurl).path
1734             vext = determine_ext(vpath)
1735             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1736
1737         def filter_video(urls):
1738             return list(filter(check_video, urls))
1739
1740         # Start with something easy: JW Player in SWFObject
1741         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1742         if not found:
1743             # Look for gorilla-vid style embedding
1744             found = filter_video(re.findall(r'''(?sx)
1745                 (?:
1746                     jw_plugins|
1747                     JWPlayerOptions|
1748                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1749                 )
1750                 .*?
1751                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1752         if not found:
1753             # Broaden the search a little bit
1754             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1755         if not found:
1756             # Broaden the findall a little bit: JWPlayer JS loader
1757             found = filter_video(re.findall(
1758                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1759         if not found:
1760             # Flow player
1761             found = filter_video(re.findall(r'''(?xs)
1762                 flowplayer\("[^"]+",\s*
1763                     \{[^}]+?\}\s*,
1764                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1765                         ["']?url["']?\s*:\s*["']([^"']+)["']
1766             ''', webpage))
1767         if not found:
1768             # Cinerama player
1769             found = re.findall(
1770                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1771         if not found:
1772             # Try to find twitter cards info
1773             found = filter_video(re.findall(
1774                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1775         if not found:
1776             # We look for Open Graph info:
1777             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1778             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1779             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1780             if m_video_type is not None:
1781                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1782         if not found:
1783             # HTML5 video
1784             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1785         if not found:
1786             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1787             found = re.search(
1788                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1789                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1790                 webpage)
1791             if not found:
1792                 # Look also in Refresh HTTP header
1793                 refresh_header = head_response.headers.get('Refresh')
1794                 if refresh_header:
1795                     found = re.search(REDIRECT_REGEX, refresh_header)
1796             if found:
1797                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1798                 self.report_following_redirect(new_url)
1799                 return {
1800                     '_type': 'url',
1801                     'url': new_url,
1802                 }
1803         if not found:
1804             raise UnsupportedError(url)
1805
1806         entries = []
1807         for video_url in found:
1808             video_url = compat_urlparse.urljoin(url, video_url)
1809             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1810
1811             # Sometimes, jwplayer extraction will result in a YouTube URL
1812             if YoutubeIE.suitable(video_url):
1813                 entries.append(self.url_result(video_url, 'Youtube'))
1814                 continue
1815
1816             # here's a fun little line of code for you:
1817             video_id = os.path.splitext(video_id)[0]
1818
1819             ext = determine_ext(video_url)
1820             if ext == 'smil':
1821                 entries.append({
1822                     'id': video_id,
1823                     'formats': self._extract_smil_formats(video_url, video_id),
1824                     'uploader': video_uploader,
1825                     'title': video_title,
1826                     'age_limit': age_limit,
1827                 })
1828             elif ext == 'xspf':
1829                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1830             else:
1831                 entries.append({
1832                     'id': video_id,
1833                     'url': video_url,
1834                     'uploader': video_uploader,
1835                     'title': video_title,
1836                     'age_limit': age_limit,
1837                 })
1838
1839         if len(entries) == 1:
1840             return entries[0]
1841         else:
1842             for num, e in enumerate(entries, start=1):
1843                 # 'url' results don't have a title
1844                 if e.get('title') is not None:
1845                     e['title'] = '%s (%d)' % (e['title'], num)
1846             return {
1847                 '_type': 'playlist',
1848                 'entries': entries,
1849             }