]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/generic.py
[downloader/fragment] Respect --no-part
[yt-dlp.git] / youtube_dl / extractor / generic.py
CommitLineData
dcdb292f 1# coding: utf-8
cfe50f04 2
79649588
PH
3from __future__ import unicode_literals
4
9b122384
PH
5import os
6import re
6c91a5a7 7import sys
9b122384
PH
8
9from .common import InfoExtractor
fc9713a1 10from .youtube import YoutubeIE
8c25f81b 11from ..compat import (
f7854627 12 compat_etree_fromstring,
1ddb9456 13 compat_urllib_parse_unquote,
a5caba1e 14 compat_urlparse,
f7300c5c 15 compat_xml_parse_error,
8c25f81b
PH
16)
17from ..utils import (
b759a0d4 18 determine_ext,
9b122384 19 ExtractorError,
c8e9a235 20 float_or_none,
aa94a6d3 21 HEADRequest,
61ca9a80 22 is_html,
ed2d6a19 23 orderedSet,
5c2266df 24 sanitized_Request,
9d4660ca
PH
25 smuggle_url,
26 unescapeHTML,
42393ce2 27 unified_strdate,
4d54ef20 28 unsmuggle_url,
416c7fcb 29 UnsupportedError,
76c73715 30 xpath_text,
9b122384 31)
ed126900 32from .brightcove import (
4fcaa4f4 33 BrightcoveLegacyIE,
5c17f0a6 34 BrightcoveNewIE,
ed126900 35)
a2edf2e7 36from .nbc import NBCSportsVPlayerIE
c0d0b01f 37from .ooyala import OoyalaIE
93d020dd 38from .rutv import RUTVIE
954c1d05 39from .tvc import TVCIE
d40a3b5b 40from .sportbox import SportBoxEmbedIE
cb3ac1c6 41from .smotri import SmotriIE
6dd94d3a 42from .myvi import MyviIE
1419fafd 43from .condenast import CondeNastIE
418c5cc3 44from .udn import UDNEmbedIE
2fe1b5bd 45from .senateisvp import SenateISVPIE
bab19a8e 46from .svt import SVTIE
65d161c4 47from .pornhub import PornHubIE
2bb5b6d0 48from .xhamster import XHamsterEmbedIE
2c9ca782 49from .tnaflix import TNAFlixNetworkEmbedIE
37e7a71c 50from .drtuber import DrTuberIE
e28ed498 51from .redtube import RedTubeIE
b407e173 52from .vimeo import VimeoIE
ad213a1d
YCH
53from .dailymotion import (
54 DailymotionIE,
55 DailymotionCloudIE,
56)
1ac1c4c2 57from .onionstudios import OnionStudiosIE
67167920 58from .viewlift import ViewLiftEmbedIE
46fde8a1 59from .mtv import MTVServicesEmbeddedIE
45dad7ba 60from .pladform import PladformIE
ff18735c 61from .videomore import VideomoreIE
83f1481b 62from .webcaster import WebcasterFeedIE
5b251628 63from .googledrive import GoogleDriveIE
7cb09524 64from .jwplatform import JWPlatformIE
aecfcd4e 65from .digiteka import DigitekaIE
1979969f 66from .arkena import ArkenaIE
5a51775a 67from .instagram import InstagramIE
b8f67449 68from .liveleak import LiveLeakIE
5d39176f 69from .threeqsdn import ThreeQSDNIE
4d8819d2 70from .theplatform import ThePlatformIE
48a5eabc 71from .vessel import VesselIE
c287f2bc 72from .kaltura import KalturaIE
06a96da1 73from .eagleplatform import EaglePlatformIE
fd6ca382 74from .facebook import FacebookIE
94aae015 75from .soundcloud import SoundcloudIE
027e2312 76from .tunein import TuneInBaseIE
2a1321a2 77from .vbox7 import Vbox7IE
b0c8f2e9 78from .dbtv import DBTVIE
b1c35797 79from .piksel import PikselIE
e186a9ec 80from .videa import VideaIE
b687c85e 81from .twentymin import TwentyMinutenIE
d77ac737 82from .ustream import UstreamIE
17f8deeb 83from .openload import OpenloadIE
6ef3e65a 84from .videopress import VideoPressIE
9b122384 85
0838239e 86
9b122384 87class GenericIE(InfoExtractor):
79649588 88 IE_DESC = 'Generic downloader that works on some sites'
9b122384 89 _VALID_URL = r'.*'
79649588 90 IE_NAME = 'generic'
cfe50f04 91 _TESTS = [
c5fa81fe
S
92 # Direct link to a video
93 {
94 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
95 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
96 'info_dict': {
97 'id': 'trailer',
98 'ext': 'mp4',
99 'title': 'trailer',
100 'upload_date': '20100513',
101 }
102 },
c5138a7c 103 # Direct link to media delivered compressed (until Accept-Encoding is *)
c5fa81fe
S
104 {
105 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
106 'md5': '128c42e68b13950268b648275386fc74',
107 'info_dict': {
108 'id': 'FictionJunction-Parallel_Hearts',
109 'ext': 'flac',
110 'title': 'FictionJunction-Parallel_Hearts',
111 'upload_date': '20140522',
112 },
113 'expected_warnings': [
114 'URL could be a direct video link, returning it as such.'
39efc6e3
YCH
115 ],
116 'skip': 'URL invalid',
c5fa81fe
S
117 },
118 # Direct download with broken HEAD
119 {
120 'url': 'http://ai-radio.org:8000/radio.opus',
121 'info_dict': {
122 'id': 'radio',
123 'ext': 'opus',
124 'title': 'radio',
125 },
126 'params': {
127 'skip_download': True, # infinite live stream
128 },
129 'expected_warnings': [
ef0e4e7b
YCH
130 r'501.*Not Implemented',
131 r'400.*Bad Request',
c5fa81fe
S
132 ],
133 },
134 # Direct link with incorrect MIME type
135 {
136 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
137 'md5': '4ccbebe5f36706d85221f204d7eb5913',
138 'info_dict': {
139 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
140 'id': '5_Lennart_Poettering_-_Systemd',
141 'ext': 'webm',
142 'title': '5_Lennart_Poettering_-_Systemd',
143 'upload_date': '20141120',
144 },
145 'expected_warnings': [
146 'URL could be a direct video link, returning it as such.'
147 ]
148 },
149 # RSS feed
150 {
151 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
152 'info_dict': {
153 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
154 'title': 'Zero Punctuation',
155 'description': 're:.*groundbreaking video review series.*'
156 },
157 'playlist_mincount': 11,
158 },
159 # RSS feed with enclosure
160 {
161 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
162 'info_dict': {
163 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
164 'ext': 'm4v',
165 'upload_date': '20150228',
166 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
167 }
168 },
8765222d
S
169 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
170 {
171 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
172 'info_dict': {
173 'id': 'smil',
174 'ext': 'mp4',
175 'title': 'Automatics, robotics and biocybernetics',
176 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
e327b736 177 'upload_date': '20130627',
8765222d
S
178 'formats': 'mincount:16',
179 'subtitles': 'mincount:1',
180 },
181 'params': {
182 'force_generic_extractor': True,
183 'skip_download': True,
184 },
185 },
186 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
187 {
188 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
189 'info_dict': {
190 'id': 'hds',
191 'ext': 'flv',
192 'title': 'hds',
193 'formats': 'mincount:1',
194 },
195 'params': {
196 'skip_download': True,
197 },
198 },
199 # SMIL from https://www.restudy.dk/video/play/id/1637
200 {
201 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
202 'info_dict': {
203 'id': 'video_1637',
204 'ext': 'flv',
205 'title': 'video_1637',
206 'formats': 'mincount:3',
207 },
208 'params': {
209 'skip_download': True,
210 },
211 },
212 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
213 {
214 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
215 'info_dict': {
216 'id': 'smil-service',
217 'ext': 'flv',
218 'title': 'smil-service',
219 'formats': 'mincount:1',
220 },
221 'params': {
222 'skip_download': True,
223 },
224 },
225 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
226 {
227 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
228 'info_dict': {
229 'id': '4719370',
230 'ext': 'mp4',
231 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
232 'formats': 'mincount:3',
233 },
234 'params': {
235 'skip_download': True,
236 },
237 },
1de5cd3b
S
238 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
239 {
240 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
241 'info_dict': {
242 'id': 'mZlp2ctYIUEB',
243 'ext': 'mp4',
244 'title': 'Tikibad ontruimd wegens brand',
245 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
ec85ded8 246 'thumbnail': r're:^https?://.*\.jpg$',
1de5cd3b
S
247 'duration': 33,
248 },
249 'params': {
250 'skip_download': True,
251 },
252 },
9d939cec
S
253 # MPD from http://dash-mse-test.appspot.com/media.html
254 {
255 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
256 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
257 'info_dict': {
258 'id': 'car-20120827-manifest',
259 'ext': 'mp4',
260 'title': 'car-20120827-manifest',
261 'formats': 'mincount:9',
0738187f 262 'upload_date': '20130904',
9d939cec
S
263 },
264 'params': {
265 'format': 'bestvideo',
266 },
267 },
20938f76
S
268 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
269 {
270 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
271 'info_dict': {
272 'id': 'content',
273 'ext': 'mp4',
274 'title': 'content',
275 'formats': 'mincount:8',
276 },
277 'params': {
278 # m3u8 downloads
279 'skip_download': True,
39efc6e3
YCH
280 },
281 'skip': 'video gone',
20938f76 282 },
edd9b71c
S
283 # m3u8 served with Content-Type: text/plain
284 {
285 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
286 'info_dict': {
287 'id': 'index',
288 'ext': 'mp4',
289 'title': 'index',
290 'upload_date': '20140720',
291 'formats': 'mincount:11',
292 },
293 'params': {
294 # m3u8 downloads
295 'skip_download': True,
39efc6e3
YCH
296 },
297 'skip': 'video gone',
edd9b71c 298 },
c5fa81fe
S
299 # google redirect
300 {
301 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
302 'info_dict': {
303 'id': 'cmQHVoWB5FY',
304 'ext': 'mp4',
305 'upload_date': '20130224',
306 'uploader_id': 'TheVerge',
ec85ded8 307 'description': r're:^Chris Ziegler takes a look at the\.*',
c5fa81fe
S
308 'uploader': 'The Verge',
309 'title': 'First Firefox OS phones side-by-side',
310 },
311 'params': {
312 'skip_download': False,
313 }
314 },
6c91a5a7
S
315 {
316 # redirect in Refresh HTTP header
317 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
318 'info_dict': {
319 'id': 'pO8h3EaFRdo',
320 'ext': 'mp4',
321 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
322 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
323 'upload_date': '20150917',
324 'uploader_id': 'brtvofficial',
325 'uploader': 'Boiler Room',
326 },
327 'params': {
328 'skip_download': False,
329 },
330 },
cfe50f04 331 {
79649588 332 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
d360a146 333 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
79649588 334 'info_dict': {
d360a146
S
335 'id': '13601338388002',
336 'ext': 'mp4',
79649588
PH
337 'uploader': 'www.hodiho.fr',
338 'title': 'R\u00e9gis plante sa Jeep',
cfe50f04
JMF
339 }
340 },
c19f7764
JMF
341 # bandcamp page with custom domain
342 {
79649588
PH
343 'add_ie': ['Bandcamp'],
344 'url': 'http://bronyrock.com/track/the-pony-mash',
79649588 345 'info_dict': {
fd50bf62
S
346 'id': '3235767654',
347 'ext': 'mp3',
79649588
PH
348 'title': 'The Pony Mash',
349 'uploader': 'M_Pallante',
c19f7764 350 },
79649588 351 'skip': 'There is a limit of 200 free downloads / month for the test song',
c19f7764 352 },
eeb165e6 353 {
53a664ed
S
354 # embedded brightcove video
355 # it also tests brightcove videos that need to set the 'Referer'
356 # in the http requests
3b7d9aa4 357 'add_ie': ['BrightcoveLegacy'],
79649588
PH
358 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
359 'info_dict': {
360 'id': '2765128793001',
361 'ext': 'mp4',
362 'title': 'Le cours de bourse : l’analyse technique',
363 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
364 'uploader': 'BFM BUSINESS',
eeb165e6 365 },
79649588
PH
366 'params': {
367 'skip_download': True,
eeb165e6
JMF
368 },
369 },
53a664ed
S
370 {
371 # embedded with itemprop embedURL and video id spelled as `idVideo`
372 'add_id': ['BrightcoveLegacy'],
373 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
374 'info_dict': {
375 'id': '5255628253001',
376 'ext': 'mp4',
377 'title': 'md5:37c519b1128915607601e75a87995fc0',
378 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
379 'uploader': 'BFM BUSINESS',
380 'uploader_id': '876450612001',
381 'timestamp': 1482255315,
382 'upload_date': '20161220',
383 },
384 'params': {
385 'skip_download': True,
386 },
387 },
17ab4d3b
PH
388 {
389 # https://github.com/rg3/youtube-dl/issues/2253
390 'url': 'http://bcove.me/i6nfkrc3',
17ab4d3b
PH
391 'md5': '0ba9446db037002366bab3b3eb30c88c',
392 'info_dict': {
fd50bf62
S
393 'id': '3101154703001',
394 'ext': 'mp4',
17ab4d3b
PH
395 'title': 'Still no power',
396 'uploader': 'thestar.com',
397 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
398 },
3b7d9aa4 399 'add_ie': ['BrightcoveLegacy'],
39efc6e3 400 'skip': 'video gone',
17ab4d3b 401 },
0479c625
S
402 {
403 'url': 'http://www.championat.com/video/football/v/87/87499.html',
404 'md5': 'fb973ecf6e4a78a67453647444222983',
405 'info_dict': {
406 'id': '3414141473001',
407 'ext': 'mp4',
408 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
409 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
410 'uploader': 'Championat',
411 },
412 },
bdf97017 413 {
37aab278 414 # https://github.com/rg3/youtube-dl/issues/3541
3b7d9aa4 415 'add_ie': ['BrightcoveLegacy'],
bdf97017
NJ
416 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
417 'info_dict': {
418 'id': '3866516442001',
37aab278 419 'ext': 'mp4',
bdf97017
NJ
420 'title': 'Leer mij vrouwen kennen: Aflevering 1',
421 'description': 'Leer mij vrouwen kennen: Aflevering 1',
422 'uploader': 'SBS Broadcasting',
423 },
37aab278 424 'skip': 'Restricted to Netherlands',
bdf97017 425 'params': {
37aab278 426 'skip_download': True, # m3u8 download
bdf97017
NJ
427 },
428 },
16e2c8f7
YCH
429 {
430 # Brightcove with alternative playerID key
431 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
432 'info_dict': {
433 'id': 'nmeth.2062_SV1',
434 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
435 },
436 'playlist': [{
437 'info_dict': {
438 'id': '2228375078001',
439 'ext': 'mp4',
440 'title': 'nmeth.2062-sv1',
441 'description': 'nmeth.2062-sv1',
442 'timestamp': 1363357591,
443 'upload_date': '20130315',
444 'uploader': 'Nature Publishing Group',
445 'uploader_id': '1964492299001',
446 },
447 }],
448 },
c0d0b01f
JMF
449 # ooyala video
450 {
79649588 451 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
87830900 452 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
79649588
PH
453 'info_dict': {
454 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
455 'ext': 'mp4',
3486df38 456 'title': '2cc213299525360.mov', # that's what we get
53e06b25 457 'duration': 238.231,
c0d0b01f 458 },
87830900 459 'add_ie': ['Ooyala'],
c0d0b01f 460 },
bf94d763
S
461 {
462 # ooyala video embedded with http://player.ooyala.com/iframe.js
463 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
464 'info_dict': {
465 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
466 'ext': 'mp4',
467 'title': '"Steve Jobs: Man in the Machine" trailer',
468 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
53e06b25 469 'duration': 135.427,
bf94d763
S
470 },
471 'params': {
472 'skip_download': True,
473 },
39efc6e3 474 'skip': 'movie expired',
bf94d763 475 },
1b86cc41 476 # embed.ly video
477 {
478 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
479 'info_dict': {
480 'id': '9ODmcdjQcHQ',
481 'ext': 'mp4',
0a5bce56
PH
482 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
483 'upload_date': '20140225',
484 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
485 'uploader': 'Tested',
486 'uploader_id': 'testedcom',
1b86cc41 487 },
488 # No need to test YoutubeIE here
489 'params': {
490 'skip_download': True,
491 },
492 },
60cc4dc4
PH
493 # funnyordie embed
494 {
495 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
60cc4dc4
PH
496 'info_dict': {
497 'id': '18e820ec3f',
498 'ext': 'mp4',
499 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
500 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
93d020dd 501 },
39efc6e3
YCH
502 # HEAD requests lead to endless 301, while GET is OK
503 'expected_warnings': ['301'],
60cc4dc4 504 },
93d020dd
S
505 # RUTV embed
506 {
507 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
508 'info_dict': {
509 'id': '776940',
510 'ext': 'mp4',
511 'title': 'Охотское море стало целиком российским',
512 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
513 },
514 'params': {
515 # m3u8 download
516 'skip_download': True,
517 },
aab74fa1 518 },
f37bdbe5
S
519 # TVC embed
520 {
521 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
522 'info_dict': {
523 'id': '55304',
524 'ext': 'mp4',
525 'title': 'Дошкольное воспитание',
526 },
527 },
b827a601
S
528 # SportBox embed
529 {
530 'url': 'http://www.vestifinance.ru/articles/25753',
531 'info_dict': {
532 'id': '25753',
05d1e7aa 533 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
b827a601
S
534 },
535 'playlist': [{
536 'info_dict': {
537 'id': '370908',
538 'title': 'Госзаказ. День 3',
539 'ext': 'mp4',
540 }
541 }, {
542 'info_dict': {
543 'id': '370905',
544 'title': 'Госзаказ. День 2',
545 'ext': 'mp4',
546 }
547 }, {
548 'info_dict': {
549 'id': '370902',
550 'title': 'Госзаказ. День 1',
551 'ext': 'mp4',
552 }
553 }],
554 'params': {
555 # m3u8 download
556 'skip_download': True,
557 },
558 },
bf20b9c5
S
559 # Myvi.ru embed
560 {
561 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
562 'info_dict': {
563 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
564 'ext': 'mp4',
565 'title': 'Ужастики, русский трейлер (2015)',
ec85ded8 566 'thumbnail': r're:^https?://.*\.jpg$',
bf20b9c5
S
567 'duration': 153,
568 }
569 },
c76799c5
S
570 # XHamster embed
571 {
572 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
573 'info_dict': {
574 'id': 'showthread',
575 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
576 },
577 'playlist_mincount': 7,
39efc6e3
YCH
578 # This forum does not allow <iframe> syntaxes anymore
579 # Now HTML tags are displayed as-is
580 'skip': 'No videos on this page',
c76799c5 581 },
aab74fa1
PH
582 # Embedded TED video
583 {
584 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
a8eb5a8e 585 'md5': '65fdff94098e4a607385a60c5177c638',
aab74fa1 586 'info_dict': {
a8eb5a8e 587 'id': '1969',
aab74fa1 588 'ext': 'mp4',
a8eb5a8e
PH
589 'title': 'Hidden miracles of the natural world',
590 'uploader': 'Louie Schwartzberg',
591 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
aab74fa1 592 }
60cc4dc4 593 },
d95e35d6
S
594 # nowvideo embed hidden behind percent encoding
595 {
596 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
597 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
598 'info_dict': {
599 'id': '06e53103ca9aa',
600 'ext': 'flv',
601 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
602 'description': 'No description',
603 },
0f2a2ba1 604 },
893f8832
PH
605 # arte embed
606 {
607 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
608 'md5': '7653032cbb25bf6c80d80f217055fa43',
609 'info_dict': {
610 'id': '048195-004_PLUS7-F',
611 'ext': 'flv',
612 'title': 'X:enius',
613 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
614 'upload_date': '20140320',
615 },
616 'params': {
617 'skip_download': 'Requires rtmpdump'
39efc6e3
YCH
618 },
619 'skip': 'video gone',
893f8832 620 },
cbd55ade
S
621 # francetv embed
622 {
623 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
624 'info_dict': {
625 'id': 'EV_30231',
626 'ext': 'mp4',
627 'title': 'Alcaline, le concert avec Calogero',
628 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
629 'upload_date': '20150226',
630 'timestamp': 1424989860,
631 'duration': 5400,
632 },
633 'params': {
634 # m3u8 downloads
635 'skip_download': True,
636 },
637 'expected_warnings': [
638 'Forbidden'
639 ]
640 },
fa35cdad
PH
641 # Condé Nast embed
642 {
643 'url': 'http://www.wired.com/2014/04/honda-asimo/',
644 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
645 'info_dict': {
646 'id': '53501be369702d3275860000',
647 'ext': 'mp4',
648 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
649 }
ebd3c7b3
PH
650 },
651 # Dailymotion embed
652 {
653 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
654 'md5': '441aeeb82eb72c422c7f14ec533999cd',
655 'info_dict': {
656 'id': 'k2mm4bCdJ6CQ2i7c8o2',
657 'ext': 'mp4',
658 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
0738187f 659 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
ebd3c7b3 660 'uploader': 'Spi0n',
0738187f
YCH
661 'uploader_id': 'xgditw',
662 'upload_date': '20140425',
663 'timestamp': 1398441542,
ebd3c7b3
PH
664 },
665 'add_ie': ['Dailymotion'],
2b88feed
PH
666 },
667 # YouTube embed
668 {
669 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
670 'info_dict': {
671 'id': 'FXRb4ykk4S0',
672 'ext': 'mp4',
673 'title': 'The NBL Auction 2014',
674 'uploader': 'BADMINTON England',
675 'uploader_id': 'BADMINTONEvents',
676 'upload_date': '20140603',
677 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
678 },
679 'add_ie': ['Youtube'],
680 'params': {
681 'skip_download': True,
682 }
683 },
c5cd249e
JMF
684 # MTVSercices embed
685 {
1fa309da
YCH
686 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
687 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
c5cd249e 688 'info_dict': {
1fa309da 689 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
c5cd249e 690 'ext': 'mp4',
1fa309da
YCH
691 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
692 'description': 'Two valets share their love for movie star Liam Neesons.',
05d1e7aa
YCH
693 'timestamp': 1349922600,
694 'upload_date': '20121011',
c5cd249e
JMF
695 },
696 },
61013473 697 # YouTube embed via <data-embed-url="">
698 {
699 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
61013473 700 'info_dict': {
a8eb5a8e 701 'id': '4vAffPZIT44',
61013473 702 'ext': 'mp4',
a8eb5a8e 703 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
ed2d6a19
PH
704 'uploader': 'Gameloft',
705 'uploader_id': 'gameloft',
a8eb5a8e
PH
706 'upload_date': '20140828',
707 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
ed2d6a19
PH
708 },
709 'params': {
710 'skip_download': True,
61013473 711 }
c8e9a235
PH
712 },
713 # Camtasia studio
714 {
715 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
716 'playlist': [{
717 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
718 'info_dict': {
719 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
720 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
721 'ext': 'flv',
722 'duration': 2235.90,
723 }
724 }, {
725 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
726 'info_dict': {
727 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
728 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
729 'ext': 'flv',
730 'duration': 2235.93,
731 }
732 }],
733 'info_dict': {
734 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
735 }
4d805e06
PH
736 },
737 # Flowplayer
738 {
739 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
740 'md5': '9d65602bf31c6e20014319c7d07fba27',
741 'info_dict': {
742 'id': '5123ea6d5e5a7',
743 'ext': 'mp4',
744 'age_limit': 18,
745 'uploader': 'www.handjobhub.com',
d6d9186f 746 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
4d805e06 747 }
0990305d 748 },
22a6f150
PH
749 # Multiple brightcove videos
750 # https://github.com/rg3/youtube-dl/issues/2283
751 {
752 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
753 'info_dict': {
754 'id': 'always-never',
755 'title': 'Always / Never - The New Yorker',
756 },
757 'playlist_count': 3,
758 'params': {
759 'extract_flat': False,
760 'skip_download': True,
761 }
1a94ff68
S
762 },
763 # MLB embed
764 {
765 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
766 'md5': '96f09a37e44da40dd083e12d9a683327',
767 'info_dict': {
768 'id': '33322633',
769 'ext': 'mp4',
770 'title': 'Ump changes call to ball',
771 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
772 'duration': 48,
773 'timestamp': 1401537900,
774 'upload_date': '20140531',
ec85ded8 775 'thumbnail': r're:^https?://.*\.jpg$',
1a94ff68
S
776 },
777 },
746c67d7
NJ
778 # Wistia embed
779 {
6c114b12
S
780 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
781 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
746c67d7 782 'info_dict': {
6c114b12 783 'id': '6e2wtrbdaf',
746c67d7 784 'ext': 'mov',
6c114b12
S
785 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
786 'description': 'a Paywall Videos video from Remilon',
787 'duration': 644.072,
788 'uploader': 'study.com',
789 'timestamp': 1459678540,
790 'upload_date': '20160403',
791 'filesize': 24687186,
746c67d7
NJ
792 },
793 },
52cffcb1 794 {
795 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
796 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
797 'info_dict': {
798 'id': 'uxjb0lwrcz',
799 'ext': 'mp4',
6c114b12 800 'title': 'Conversation about Hexagonal Rails Part 1',
0738187f 801 'description': 'a Martin Fowler video from ThoughtWorks',
52cffcb1 802 'duration': 1715.0,
85d7b765 803 'uploader': 'thoughtworks.wistia.com',
0738187f 804 'timestamp': 1401832161,
6c114b12 805 'upload_date': '20140603',
70b7e3fb 806 },
52cffcb1 807 },
7ded6545
S
808 # Wistia standard embed (async)
809 {
810 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
811 'info_dict': {
812 'id': '807fafadvk',
813 'ext': 'mp4',
814 'title': 'Drip Brennan Dunn Workshop',
815 'description': 'a JV Webinars video from getdrip-1',
816 'duration': 4986.95,
7ded6545 817 'timestamp': 1463607249,
6c114b12 818 'upload_date': '20160518',
7ded6545
S
819 },
820 'params': {
821 'skip_download': True,
822 }
823 },
ac645ac7
PH
824 # Soundcloud embed
825 {
826 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
827 'info_dict': {
828 'id': '174391317',
829 'ext': 'mp3',
830 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
831 'uploader': 'Sophos Security',
832 'title': 'Chet Chat 171 - Oct 29, 2014',
833 'upload_date': '20141029',
834 }
af63fed7 835 },
db19df6c
S
836 # Soundcloud multiple embeds
837 {
838 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
839 'info_dict': {
840 'id': '52809',
841 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
842 },
843 'playlist_mincount': 7,
844 },
027e2312
S
845 # TuneIn station embed
846 {
847 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
848 'info_dict': {
849 'id': '204146',
850 'ext': 'mp3',
851 'title': 'CNRV',
852 'location': 'Paris, France',
853 'is_live': True,
854 },
855 'params': {
856 # Live stream
857 'skip_download': True,
858 },
859 },
af63fed7
PH
860 # Livestream embed
861 {
862 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
863 'info_dict': {
864 'id': '67864563',
865 'ext': 'flv',
866 'upload_date': '20141112',
867 'title': 'Rosetta #CometLanding webcast HL 10',
868 }
869 },
78d3b3e2
YCH
870 # Another Livestream embed, without 'new.' in URL
871 {
872 'url': 'https://www.freespeech.org/',
873 'info_dict': {
874 'id': '123537347',
875 'ext': 'mp4',
876 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
877 },
878 'params': {
879 # Live stream
880 'skip_download': True,
881 },
882 },
65f3a228
PH
883 # LazyYT
884 {
885 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
886 'info_dict': {
11e611a7 887 'id': '1986',
65f3a228
PH
888 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
889 },
890 'playlist_mincount': 2,
4e262a88 891 },
42bdd9d0
PH
892 # Cinchcast embed
893 {
894 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
895 'info_dict': {
896 'id': '7141703',
897 'ext': 'mp3',
898 'upload_date': '20141126',
899 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
900 }
901 },
501f13fb
PH
902 # Cinerama player
903 {
904 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
905 'info_dict': {
906 'id': '730m_DandD_1901_512k',
907 'ext': 'mp4',
908 'uploader': 'www.abc.net.au',
909 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
910 }
796df3c6
S
911 },
912 # embedded viddler video
913 {
914 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
915 'info_dict': {
916 'id': '4d03aad9',
917 'ext': 'mp4',
918 'uploader': 'deadspin',
919 'title': 'WALL-TO-GORTAT',
920 'timestamp': 1422285291,
921 'upload_date': '20150126',
922 },
923 'add_ie': ['Viddler'],
a0f71985 924 },
2051acde
S
925 # Libsyn embed
926 {
927 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
928 'info_dict': {
929 'id': '3377616',
930 'ext': 'mp3',
931 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
932 'description': 'md5:601cb790edd05908957dae8aaa866465',
933 'upload_date': '20150220',
934 },
326fa4e6 935 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
2051acde 936 },
a0f71985
PH
937 # jwplayer YouTube
938 {
939 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
940 'info_dict': {
941 'id': 'Mrj4DVp2zeA',
942 'ext': 'mp4',
f37e3f99 943 'upload_date': '20150212',
a0f71985
PH
944 'uploader': 'The National Archives UK',
945 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
946 'uploader_id': 'NationalArchives08',
947 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
948 },
59b8ab58
PH
949 },
950 # rtl.nl embed
951 {
952 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
953 'playlist_mincount': 5,
954 'info_dict': {
955 'id': 'aanslagen-kopenhagen',
956 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
957 }
255fca5e
S
958 },
959 # Zapiks embed
960 {
961 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
962 'info_dict': {
963 'id': '118046',
964 'ext': 'mp4',
965 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
966 }
967 },
66e568de
S
968 # Kaltura embed (different embed code)
969 {
970 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
971 'info_dict': {
972 'id': '1_a52wc67y',
973 'ext': 'flv',
974 'upload_date': '20150127',
975 'uploader_id': 'PremierMedia',
976 'timestamp': int,
977 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
978 },
979 },
6da620de
S
980 # Kaltura embed protected with referrer
981 {
982 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
983 'info_dict': {
984 'id': '1_g4fbemnq',
985 'ext': 'mp4',
986 'title': 'Violetta - Achter De Schermen - Ruggero',
987 'description': 'Achter de schermen met Ruggero',
988 'timestamp': 1435133761,
989 'upload_date': '20150624',
990 'uploader_id': 'echojecka',
991 },
992 },
87703231
YCH
993 # Kaltura embed with single quotes
994 {
995 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
996 'info_dict': {
997 'id': '0_izeg5utt',
998 'ext': 'mp4',
999 'title': '35871',
1000 'timestamp': 1355743100,
1001 'upload_date': '20121217',
1002 'uploader_id': 'batchUser',
1003 },
1004 'add_ie': ['Kaltura'],
1005 },
427cd050
S
1006 {
1007 # Kaltura embedded via quoted entry_id
1008 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1009 'info_dict': {
1010 'id': '0_utuok90b',
1011 'ext': 'mp4',
1012 'title': '06_matthew_brender_raj_dutt',
1013 'timestamp': 1466638791,
1014 'upload_date': '20160622',
1015 },
1016 'add_ie': ['Kaltura'],
1017 'expected_warnings': [
1018 'Could not send HEAD request'
1019 ],
1020 'params': {
1021 'skip_download': True,
1022 }
1023 },
8ab7e6c4
YCH
1024 {
1025 # Kaltura embedded, some fileExt broken (#11480)
1026 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1027 'info_dict': {
1028 'id': '1_sgtvehim',
1029 'ext': 'mp4',
1030 'title': 'Our "Standard Models" of particle physics and cosmology',
1031 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1032 'timestamp': 1321158993,
1033 'upload_date': '20111113',
1034 'uploader_id': 'kps1',
1035 },
1036 'add_ie': ['Kaltura'],
1037 },
135c9c42
S
1038 # Eagle.Platform embed (generic URL)
1039 {
1040 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
4645432d 1041 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
135c9c42
S
1042 'info_dict': {
1043 'id': '227304',
1044 'ext': 'mp4',
1045 'title': 'Навальный вышел на свободу',
1046 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
ec85ded8 1047 'thumbnail': r're:^https?://.*\.jpg$',
135c9c42
S
1048 'duration': 87,
1049 'view_count': int,
1050 'age_limit': 0,
1051 },
1052 },
d47ae7f6
S
1053 # ClipYou (Eagle.Platform) embed (custom URL)
1054 {
1055 'url': 'http://muz-tv.ru/play/7129/',
4645432d 1056 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
d47ae7f6
S
1057 'info_dict': {
1058 'id': '12820',
1059 'ext': 'mp4',
1060 'title': "'O Sole Mio",
ec85ded8 1061 'thumbnail': r're:^https?://.*\.jpg$',
d47ae7f6
S
1062 'duration': 216,
1063 'view_count': int,
1064 },
1065 },
f8388757
S
1066 # Pladform embed
1067 {
1068 'url': 'http://muz-tv.ru/kinozal/view/7400/',
1069 'info_dict': {
1070 'id': '100183293',
1071 'ext': 'mp4',
62259846 1072 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
f8388757 1073 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
ec85ded8 1074 'thumbnail': r're:^https?://.*\.jpg$',
f8388757
S
1075 'duration': 694,
1076 'age_limit': 0,
1077 },
1078 },
c798f15b
S
1079 # Playwire embed
1080 {
1081 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1082 'info_dict': {
1083 'id': '3519514',
1084 'ext': 'mp4',
1085 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
ec85ded8 1086 'thumbnail': r're:^https?://.*\.png$',
c798f15b
S
1087 'duration': 45.115,
1088 },
1089 },
ad320e9b
NJ
1090 # 5min embed
1091 {
1092 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1093 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1094 'info_dict': {
1095 'id': '518726732',
1096 'ext': 'mp4',
1097 'title': 'Facebook Creates "On This Day" | Crunch Report',
1098 },
1099 },
dc455a5f
S
1100 # SVT embed
1101 {
1102 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1103 'info_dict': {
1104 'id': '2900353',
1105 'ext': 'flv',
1106 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1107 'duration': 27,
1108 'age_limit': 0,
1109 },
1110 },
a4257017
S
1111 # Crooks and Liars embed
1112 {
1113 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1114 'info_dict': {
1115 'id': '8RUoRhRi',
1116 'ext': 'mp4',
1117 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1118 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1119 'timestamp': 1428207000,
1120 'upload_date': '20150405',
1121 'uploader': 'Heather',
1122 },
1123 },
1124 # Crooks and Liars external embed
1125 {
1126 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1127 'info_dict': {
1128 'id': 'MTE3MjUtMzQ2MzA',
1129 'ext': 'mp4',
1130 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1131 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1132 'timestamp': 1265032391,
1133 'upload_date': '20100201',
1134 'uploader': 'Heather',
1135 },
1136 },
facecb84 1137 # NBC Sports vplayer embed
a2edf2e7 1138 {
facecb84 1139 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
a2edf2e7 1140 'info_dict': {
facecb84
S
1141 'id': 'ln7x1qSThw4k',
1142 'ext': 'flv',
1143 'title': "PFT Live: New leader in the 'new-look' defense",
1144 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
0738187f
YCH
1145 'uploader': 'NBCU-SPORTS',
1146 'upload_date': '20140107',
1147 'timestamp': 1389118457,
a2edf2e7 1148 },
418c5cc3 1149 },
de3eb07e
YCH
1150 # NBC News embed
1151 {
1152 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1153 'md5': '1aa589c675898ae6d37a17913cf68d66',
1154 'info_dict': {
1155 'id': '701714499682',
1156 'ext': 'mp4',
1157 'title': 'PREVIEW: On Assignment: David Letterman',
1158 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1159 },
1160 },
418c5cc3
YCH
1161 # UDN embed
1162 {
811586eb 1163 'url': 'https://video.udn.com/news/300346',
01c58f84 1164 'md5': 'fd2060e988c326991037b9aff9df21a6',
418c5cc3 1165 'info_dict': {
01c58f84 1166 'id': '300346',
418c5cc3 1167 'ext': 'mp4',
01c58f84 1168 'title': '中一中男師變性 全校師生力挺',
ec85ded8 1169 'thumbnail': r're:^https?://.*\.jpg$',
811586eb
YCH
1170 },
1171 'params': {
1172 # m3u8 download
1173 'skip_download': True,
1174 },
edfcf7ab
YCH
1175 },
1176 # Ooyala embed
1177 {
1178 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1179 'info_dict': {
1180 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1181 'ext': 'mp4',
cce9d15d 1182 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
edfcf7ab 1183 'title': 'This is what separates the Excel masters from the wannabes',
53e06b25 1184 'duration': 191.933,
edfcf7ab
YCH
1185 },
1186 'params': {
1187 # m3u8 downloads
1188 'skip_download': True,
1189 }
d6fd958c 1190 },
b26733ba
YCH
1191 # Brightcove URL in single quotes
1192 {
1193 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1194 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1195 'info_dict': {
1196 'id': '4255764656001',
1197 'ext': 'mp4',
1198 'title': 'SN Presents: Russell Martin, World Citizen',
1199 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1200 'uploader': 'Rogers Sportsnet',
0738187f
YCH
1201 'uploader_id': '1704050871',
1202 'upload_date': '20150525',
1203 'timestamp': 1432570283,
b26733ba 1204 },
756f574e
YCH
1205 },
1206 # Dailymotion Cloud video
1207 {
1208 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
7d52c052 1209 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
756f574e 1210 'info_dict': {
7d52c052 1211 'id': 'x2uy8t3',
756f574e 1212 'ext': 'mp4',
7d52c052
YCH
1213 'title': 'Sauvons les abeilles ! - Le débat',
1214 'description': 'md5:d9082128b1c5277987825d684939ca26',
ec85ded8 1215 'thumbnail': r're:^https?://.*\.jpe?g$',
7d52c052
YCH
1216 'timestamp': 1434970506,
1217 'upload_date': '20150622',
1218 'uploader': 'Public Sénat',
1219 'uploader_id': 'xa9gza',
756f574e 1220 }
a5158f38 1221 },
8084be78
S
1222 # OnionStudios embed
1223 {
1224 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1225 'info_dict': {
1226 'id': '2855',
1227 'ext': 'mp4',
1228 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
ec85ded8 1229 'thumbnail': r're:^https?://.*\.jpe?g$',
8084be78
S
1230 'uploader': 'ClickHole',
1231 'uploader_id': 'clickhole',
1232 }
1233 },
b8c1cc1a
S
1234 # SnagFilms embed
1235 {
1236 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1237 'info_dict': {
1238 'id': '74849a00-85a9-11e1-9660-123139220831',
1239 'ext': 'mp4',
1240 'title': '#whilewewatch',
1241 }
1242 },
a5158f38
YCH
1243 # AdobeTVVideo embed
1244 {
1245 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1246 'md5': '43662b577c018ad707a63766462b1e87',
1247 'info_dict': {
1248 'id': '2456',
1249 'ext': 'mp4',
1250 'title': 'New experience with Acrobat DC',
1251 'description': 'New experience with Acrobat DC',
1252 'duration': 248.667,
1253 },
1f812580 1254 },
ed126900 1255 # BrightcoveInPageEmbed embed
1256 {
1257 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1258 'info_dict': {
1259 'id': '4238694884001',
1260 'ext': 'flv',
1261 'title': 'Tabletop: Dread, Last Thoughts',
1262 'description': 'Tabletop: Dread, Last Thoughts',
1263 'duration': 51690,
1264 },
750b9ff0 1265 },
d10fe835
YCH
1266 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1267 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1268 {
1269 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1270 'info_dict': {
1271 'id': '4785848093001',
1272 'ext': 'mp4',
1273 'title': 'The Cardinal Pell Interview',
1274 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1275 'uploader': 'GlobeCast Australia - GlobeStream',
0738187f
YCH
1276 'uploader_id': '2733773828001',
1277 'upload_date': '20160304',
1278 'timestamp': 1457083087,
d10fe835
YCH
1279 },
1280 'params': {
1281 # m3u8 downloads
1282 'skip_download': True,
1283 },
1284 },
134c207e
YCH
1285 # Another form of arte.tv embed
1286 {
1287 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1288 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1289 'info_dict': {
1290 'id': '030273-562_PLUS7-F',
1291 'ext': 'mp4',
1292 'title': 'ARTE Reportage - Nulle part, en France',
1293 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1294 'upload_date': '20160409',
1295 },
1296 },
b8f67449
KM
1297 # LiveLeak embed
1298 {
1299 'url': 'http://www.wykop.pl/link/3088787/',
1300 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1301 'info_dict': {
1302 'id': '874_1459135191',
1303 'ext': 'mp4',
1304 'title': 'Man shows poor quality of new apartment building',
1305 'description': 'The wall is like a sand pile.',
1306 'uploader': 'Lake8737',
1307 }
1308 },
4a120778
YCH
1309 # Duplicated embedded video URLs
1310 {
1311 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1312 'info_dict': {
1313 'id': '149298443_480_16c25b74_2',
1314 'ext': 'mp4',
1315 'title': 'vs. Blue Orange Spring Game',
1316 'uploader': 'www.hudl.com',
1317 },
1318 },
371ddb14
S
1319 # twitter:player:stream embed
1320 {
1321 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1322 'info_dict': {
1323 'id': 'master',
1324 'ext': 'mp4',
1325 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1326 'uploader': 'www.rtl.be',
1327 },
1328 'params': {
1329 # m3u8 downloads
1330 'skip_download': True,
1331 },
1332 },
32917907
RA
1333 # twitter:player embed
1334 {
1335 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1336 'md5': 'a3e0df96369831de324f0778e126653c',
1337 'info_dict': {
1338 'id': '4909620399001',
1339 'ext': 'mp4',
1340 'title': 'What Do Black Holes Sound Like?',
1341 'description': 'what do black holes sound like',
1342 'upload_date': '20160524',
1343 'uploader_id': '29913724001',
1344 'timestamp': 1464107587,
1345 'uploader': 'TheAtlantic',
1346 },
1347 'add_ie': ['BrightcoveLegacy'],
fd6ca382
YCH
1348 },
1349 # Facebook <iframe> embed
1350 {
1351 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
dbf0157a 1352 'md5': 'fbcde74f534176ecb015849146dd3aee',
fd6ca382
YCH
1353 'info_dict': {
1354 'id': '599637780109885',
1355 'ext': 'mp4',
1356 'title': 'Facebook video #599637780109885',
1357 },
1358 },
1359 # Facebook API embed
1360 {
1361 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
dbf0157a 1362 'md5': 'a47372ee61b39a7b90287094d447d94e',
fd6ca382
YCH
1363 'info_dict': {
1364 'id': '10153467542406923',
1365 'ext': 'mp4',
1366 'title': 'Facebook video #10153467542406923',
1367 },
7deef1ba
YCH
1368 },
1369 # Wordpress "YouTube Video Importer" plugin
1370 {
1371 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
dbf0157a 1372 'md5': 'd16797741b560b485194eddda8121b48',
7deef1ba
YCH
1373 'info_dict': {
1374 'id': 'HNTXWDXV9Is',
1375 'ext': 'mp4',
1376 'title': 'Blue Devils Drumline Stanford lot 2016',
1377 'upload_date': '20160627',
1378 'uploader_id': 'GENOCIDE8GENERAL10',
1379 'uploader': 'cylus cyrus',
1380 },
1381 },
81953d1a
RA
1382 {
1383 # video stored on custom kaltura server
1384 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1385 'md5': '537617d06e64dfed891fa1593c4b30cc',
1386 'info_dict': {
1387 'id': '0_1iotm5bh',
1388 'ext': 'mp4',
1389 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1390 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1391 'uploader_id': 'videos.expansion@el-mundo.net',
1392 'upload_date': '20150429',
1393 'timestamp': 1430303472,
1394 },
1395 'add_ie': ['Kaltura'],
1396 },
c03adf90
YCH
1397 {
1398 # Non-standard Vimeo embed
1399 'url': 'https://openclassrooms.com/courses/understanding-the-web',
1400 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1401 'info_dict': {
1402 'id': '148867247',
1403 'ext': 'mp4',
1404 'title': 'Understanding the web - Teaser',
1405 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1406 'upload_date': '20151214',
1407 'uploader': 'OpenClassrooms',
1408 'uploader_id': 'openclassrooms',
1409 },
1410 'add_ie': ['Vimeo'],
1411 },
a5ff05df
S
1412 {
1413 # generic vimeo embed that requires original URL passed as Referer
1414 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1415 'only_matching': True,
1416 },
1979969f
S
1417 {
1418 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1419 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1420 'info_dict': {
1421 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1422 'ext': 'mp4',
1423 'title': 'Big Buck Bunny',
1424 'description': 'Royalty free test video',
1425 'timestamp': 1432816365,
1426 'upload_date': '20150528',
1427 'is_live': False,
1428 },
1429 'params': {
1430 'skip_download': True,
1431 },
1432 'add_ie': [ArkenaIE.ie_key()],
1433 },
2a1321a2
S
1434 {
1435 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1436 'info_dict': {
1437 'id': '1c7141f46c',
1438 'ext': 'mp4',
1439 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1440 },
1441 'params': {
1442 'skip_download': True,
1443 },
1444 'add_ie': [Vbox7IE.ie_key()],
1445 },
b0c8f2e9
DR
1446 {
1447 # DBTV embeds
1448 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
fd3ec986
S
1449 'info_dict': {
1450 'id': '43254897',
1451 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1452 },
b0c8f2e9
DR
1453 'playlist_mincount': 3,
1454 },
e186a9ec
S
1455 {
1456 # Videa embeds
1457 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1458 'info_dict': {
1459 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1460 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1461 },
1462 'playlist_mincount': 2,
1463 },
b687c85e
S
1464 {
1465 # 20 minuten embed
1466 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1467 'info_dict': {
1468 'id': '523629',
1469 'ext': 'mp4',
1470 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1471 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1472 },
1473 'params': {
1474 'skip_download': True,
1475 },
1476 'add_ie': [TwentyMinutenIE.ie_key()],
6ef3e65a
S
1477 },
1478 {
1479 # VideoPress embed
1480 'url': 'https://en.support.wordpress.com/videopress/',
1481 'info_dict': {
1482 'id': 'OcobLTqC',
1483 'ext': 'm4v',
1484 'title': 'IMG_5786',
1485 'timestamp': 1435711927,
1486 'upload_date': '20150701',
1487 },
1488 'params': {
1489 'skip_download': True,
1490 },
1491 'add_ie': [VideoPressIE.ie_key()],
b687c85e 1492 }
6e6b70d6
S
1493 # {
1494 # # TODO: find another test
1495 # # http://schema.org/VideoObject
1496 # 'url': 'https://flipagram.com/f/nyvTSJMKId',
1497 # 'md5': '888dcf08b7ea671381f00fab74692755',
1498 # 'info_dict': {
1499 # 'id': 'nyvTSJMKId',
1500 # 'ext': 'mp4',
1501 # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1502 # 'description': '#love for cats.',
1503 # 'timestamp': 1461244995,
1504 # 'upload_date': '20160421',
1505 # },
1506 # 'params': {
1507 # 'force_generic_extractor': True,
1508 # },
1509 # }
cfe50f04 1510 ]
9b122384 1511
9b122384
PH
1512 def report_following_redirect(self, new_url):
1513 """Report information extraction."""
79649588 1514 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
9b122384 1515
4fc946b5
PH
1516 def _extract_rss(self, url, video_id, doc):
1517 playlist_title = doc.find('./channel/title').text
1518 playlist_desc_el = doc.find('./channel/description')
1519 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1520
76c73715
PH
1521 entries = []
1522 for it in doc.findall('./channel/item'):
1523 next_url = xpath_text(it, 'link', fatal=False)
1524 if not next_url:
1525 enclosure_nodes = it.findall('./enclosure')
1526 for e in enclosure_nodes:
1527 next_url = e.attrib.get('url')
1528 if next_url:
1529 break
1530
1531 if not next_url:
1532 continue
1533
1534 entries.append({
1535 '_type': 'url',
1536 'url': next_url,
1537 'title': it.find('title').text,
1538 })
4fc946b5
PH
1539
1540 return {
1541 '_type': 'playlist',
1542 'id': url,
1543 'title': playlist_title,
1544 'description': playlist_desc,
1545 'entries': entries,
1546 }
1547
c8e9a235
PH
1548 def _extract_camtasia(self, url, video_id, webpage):
1549 """ Returns None if no camtasia video can be found. """
1550
1551 camtasia_cfg = self._search_regex(
1552 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1553 webpage, 'camtasia configuration file', default=None)
1554 if camtasia_cfg is None:
1555 return None
1556
1557 title = self._html_search_meta('DC.title', webpage, fatal=True)
1558
1559 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1560 camtasia_cfg = self._download_xml(
1561 camtasia_url, video_id,
1562 note='Downloading camtasia configuration',
1563 errnote='Failed to download camtasia configuration')
1564 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1565
1566 entries = []
1567 for n in fileset_node.getchildren():
1568 url_n = n.find('./uri')
1569 if url_n is None:
1570 continue
1571
1572 entries.append({
1573 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1574 'title': '%s - %s' % (title, n.tag),
1575 'url': compat_urlparse.urljoin(url, url_n.text),
1576 'duration': float_or_none(n.find('./duration').text),
1577 })
1578
1579 return {
1580 '_type': 'playlist',
1581 'entries': entries,
1582 'title': title,
1583 }
1584
9b122384 1585 def _real_extract(self, url):
ebd3c7b3
PH
1586 if url.startswith('//'):
1587 return {
1588 '_type': 'url',
20991253 1589 'url': self.http_scheme() + url,
ebd3c7b3
PH
1590 }
1591
a7130543
JMF
1592 parsed_url = compat_urlparse.urlparse(url)
1593 if not parsed_url.scheme:
04b4d394
PH
1594 default_search = self._downloader.params.get('default_search')
1595 if default_search is None:
1f7ccb90 1596 default_search = 'fixup_error'
04b4d394 1597
1f7ccb90 1598 if default_search in ('auto', 'auto_warning', 'fixup_error'):
04b4d394
PH
1599 if '/' in url:
1600 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1601 return self.url_result('http://' + url)
1f7ccb90 1602 elif default_search != 'fixup_error':
9c1fc022 1603 if default_search == 'auto_warning':
0e67ab0d
PH
1604 if re.match(r'^(?:url|URL)$', url):
1605 raise ExtractorError(
1606 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
1607 expected=True)
1608 else:
1609 self._downloader.report_warning(
7571c02c 1610 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
04b4d394 1611 return self.url_result('ytsearch:' + url)
1f7ccb90
PH
1612
1613 if default_search in ('error', 'fixup_error'):
7571c02c 1614 raise ExtractorError(
b74e86f4
PH
1615 '%r is not a valid URL. '
1616 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
1617 % (url, url), expected=True)
04b4d394 1618 else:
f2f2c0c2
PH
1619 if ':' not in default_search:
1620 default_search += ':'
04b4d394 1621 return self.url_result(default_search + url)
4d54ef20
PH
1622
1623 url, smuggled_data = unsmuggle_url(url)
1624 force_videoid = None
d6e6a422 1625 is_intentional = smuggled_data and smuggled_data.get('to_generic')
4d54ef20
PH
1626 if smuggled_data and 'force_videoid' in smuggled_data:
1627 force_videoid = smuggled_data['force_videoid']
1628 video_id = force_videoid
1629 else:
9dcd6fd3 1630 video_id = self._generic_id(url)
3d83a1ae 1631
79649588 1632 self.to_screen('%s: Requesting header' % video_id)
c1d1facd 1633
ebab4520 1634 head_req = HEADRequest(url)
23be51d8 1635 head_response = self._request_webpage(
ebab4520
PH
1636 head_req, video_id,
1637 note=False, errnote='Could not send HEAD request to %s' % url,
1638 fatal=False)
42393ce2 1639
23be51d8 1640 if head_response is not False:
42393ce2 1641 # Check for redirect
23be51d8 1642 new_url = head_response.geturl()
42393ce2
PH
1643 if url != new_url:
1644 self.report_following_redirect(new_url)
4d54ef20
PH
1645 if force_videoid:
1646 new_url = smuggle_url(
1647 new_url, {'force_videoid': force_videoid})
cecaaf3f 1648 return self.url_result(new_url)
42393ce2 1649
23be51d8
PH
1650 full_response = None
1651 if head_response is False:
5c2266df 1652 request = sanitized_Request(url)
58bde34a
S
1653 request.add_header('Accept-Encoding', '*')
1654 full_response = self._request_webpage(request, video_id)
23be51d8
PH
1655 head_response = full_response
1656
f930e0c7
S
1657 info_dict = {
1658 'id': video_id,
9dcd6fd3 1659 'title': self._generic_title(url),
303dcdb9 1660 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
f930e0c7
S
1661 }
1662
23be51d8 1663 # Check for direct link to a video
955737b2 1664 content_type = head_response.headers.get('Content-Type', '').lower()
263eff95 1665 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
23be51d8 1666 if m:
f930e0c7
S
1667 format_id = m.group('format_id')
1668 if format_id.endswith('mpegurl'):
eadc3ccd 1669 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
f930e0c7
S
1670 elif format_id == 'f4m':
1671 formats = self._extract_f4m_formats(url, video_id)
eadc3ccd 1672 else:
1673 formats = [{
1674 'format_id': m.group('format_id'),
1675 'url': url,
1676 'vcodec': 'none' if m.group('type') == 'audio' else None
1677 }]
de6c51e8 1678 info_dict['direct'] = True
19dbaeec 1679 self._sort_formats(formats)
de6c51e8 1680 info_dict['formats'] = formats
f930e0c7 1681 return info_dict
42393ce2 1682
d6e6a422 1683 if not self._downloader.params.get('test', False) and not is_intentional:
2fece970
S
1684 force = self._downloader.params.get('force_generic_extractor', False)
1685 self._downloader.report_warning(
1686 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
d6e6a422 1687
4e262a88 1688 if not full_response:
5c2266df 1689 request = sanitized_Request(url)
58bde34a
S
1690 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1691 # making it impossible to download only chunk of the file (yet we need only 512kB to
1692 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1693 # that will always result in downloading the whole file that is not desirable.
1694 # Therefore for extraction pass we have to override Accept-Encoding to any in order
1695 # to accept raw bytes and being able to download only a chunk.
1696 # It may probably better to solve this by checking Content-Type for application/octet-stream
1697 # after HEAD request finishes, but not sure if we can rely on this.
1698 request.add_header('Accept-Encoding', '*')
1699 full_response = self._request_webpage(request, video_id)
4e262a88 1700
5940862d
S
1701 first_bytes = full_response.read(512)
1702
1703 # Is it an M3U playlist?
0d769bcb 1704 if first_bytes.startswith(b'#EXTM3U'):
5940862d 1705 info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
19dbaeec 1706 self._sort_formats(info_dict['formats'])
5940862d
S
1707 return info_dict
1708
4e262a88
PH
1709 # Maybe it's a direct link to a video?
1710 # Be careful not to download the whole thing!
61ca9a80 1711 if not is_html(first_bytes):
4e262a88
PH
1712 self._downloader.report_warning(
1713 'URL could be a direct video link, returning it as such.')
f930e0c7 1714 info_dict.update({
4e262a88
PH
1715 'direct': True,
1716 'url': url,
f930e0c7
S
1717 })
1718 return info_dict
4e262a88
PH
1719
1720 webpage = self._webpage_read_content(
1721 full_response, url, video_id, prefix=first_bytes)
1722
9b122384 1723 self.report_extraction(video_id)
887c6acd 1724
1b840245 1725 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
4fc946b5 1726 try:
f7854627 1727 doc = compat_etree_fromstring(webpage.encode('utf-8'))
4fc946b5
PH
1728 if doc.tag == 'rss':
1729 return self._extract_rss(url, video_id, doc)
cc99a77a
S
1730 elif doc.tag == 'SmoothStreamingMedia':
1731 info_dict['formats'] = self._parse_ism_formats(doc, url)
1732 self._sort_formats(info_dict['formats'])
1733 return info_dict
e5e8d20a 1734 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
19dbaeec
S
1735 smil = self._parse_smil(doc, url, video_id)
1736 self._sort_formats(smil['formats'])
1737 return smil
729accb4
S
1738 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1739 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1b840245 1740 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
f930e0c7 1741 info_dict['formats'] = self._parse_mpd_formats(
86f4d14f
S
1742 doc, video_id,
1743 mpd_base_url=full_response.geturl().rpartition('/')[0],
1744 mpd_url=url)
19dbaeec 1745 self._sort_formats(info_dict['formats'])
f930e0c7
S
1746 return info_dict
1747 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1748 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
19dbaeec 1749 self._sort_formats(info_dict['formats'])
f930e0c7 1750 return info_dict
f7300c5c 1751 except compat_xml_parse_error:
4fc946b5
PH
1752 pass
1753
c8e9a235
PH
1754 # Is it a Camtasia project?
1755 camtasia_res = self._extract_camtasia(url, video_id, webpage)
1756 if camtasia_res is not None:
1757 return camtasia_res
1758
14390730
S
1759 # Sometimes embedded video player is hidden behind percent encoding
1760 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1761 # Unescaping the whole page allows to handle those cases in a generic way
45eedbe5 1762 webpage = compat_urllib_parse_unquote(webpage)
1f7659db 1763
887c6acd
PH
1764 # it's tempting to parse this further, but you would
1765 # have to take into account all the variations like
1766 # Video Title - Site Name
1767 # Site Name | Video Title
1768 # Video Title - Tagline | Site Name
1769 # and so on and so forth; it's just not practical
6f41b2bc
S
1770 video_title = self._og_search_title(
1771 webpage, default=None) or self._html_search_regex(
79649588
PH
1772 r'(?s)<title>(.*?)</title>', webpage, 'video title',
1773 default='video')
ef4fd848 1774
4d805e06
PH
1775 # Try to detect age limit automatically
1776 age_limit = self._rta_search(webpage)
1777 # And then there are the jokers who advertise that they use RTA,
1778 # but actually don't.
1779 AGE_LIMIT_MARKERS = [
1780 r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1781 ]
1782 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1783 age_limit = 18
1784
ef4fd848
PH
1785 # video uploader is domain name
1786 video_uploader = self._search_regex(
79649588 1787 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
887c6acd 1788
6f41b2bc
S
1789 video_description = self._og_search_description(webpage, default=None)
1790 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1791
ed2d6a19 1792 # Helper method
83992676 1793 def _playlist_from_matches(matches, getter=None, ie=None):
3b2f933b 1794 urlrs = orderedSet(
83992676 1795 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
3b2f933b 1796 for m in matches)
ed2d6a19
PH
1797 return self.playlist_result(
1798 urlrs, playlist_id=video_id, playlist_title=video_title)
1799
1f4b722b 1800 # Look for Brightcove Legacy Studio embeds
4fcaa4f4 1801 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
99877772 1802 if bc_urls:
79649588 1803 self.to_screen('Brightcove video detected.')
99877772
PH
1804 entries = [{
1805 '_type': 'url',
1806 'url': smuggle_url(bc_url, {'Referer': url}),
3b7d9aa4 1807 'ie_key': 'BrightcoveLegacy'
99877772
PH
1808 } for bc_url in bc_urls]
1809
1810 return {
1811 '_type': 'playlist',
1812 'title': video_title,
1813 'id': video_id,
1814 'entries': entries,
1815 }
cfe50f04 1816
f6519f89
S
1817 # Look for Brightcove New Studio embeds
1818 bc_urls = BrightcoveNewIE._extract_urls(webpage)
1819 if bc_urls:
1820 return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
ed126900 1821
4d8819d2
S
1822 # Look for ThePlatform embeds
1823 tp_urls = ThePlatformIE._extract_urls(webpage)
1824 if tp_urls:
1825 return _playlist_from_matches(tp_urls, ie='ThePlatform')
1826
48a5eabc
S
1827 # Look for Vessel embeds
1828 vessel_urls = VesselIE._extract_urls(webpage)
1829 if vessel_urls:
1830 return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
1831
59b8ab58
PH
1832 # Look for embedded rtl.nl player
1833 matches = re.findall(
97b570a9 1834 r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
59b8ab58
PH
1835 webpage)
1836 if matches:
1837 return _playlist_from_matches(matches, ie='RtlNl')
1838
09b9c45e
S
1839 vimeo_urls = VimeoIE._extract_urls(url, webpage)
1840 if vimeo_urls:
1841 return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
7115ca84 1842
a1b85269
YCH
1843 vid_me_embed_url = self._search_regex(
1844 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1845 webpage, 'vid.me embed', default=None)
1846 if vid_me_embed_url is not None:
1847 return self.url_result(vid_me_embed_url, 'Vidme')
1848
53c1d3ef 1849 # Look for embedded YouTube player
1f9da904 1850 matches = re.findall(r'''(?x)
2b88feed
PH
1851 (?:
1852 <iframe[^>]+?src=|
c71dfccc 1853 data-video-url=|
2b88feed 1854 <embed[^>]+?src=|
a7e97f6d
PH
1855 embedSWF\(?:\s*|
1856 new\s+SWFObject\(
2b88feed
PH
1857 )
1858 (["\'])
1bf5423e 1859 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
6b08cdf6 1860 (?:embed|v|p)/.+?)
1f9da904 1861 \1''', webpage)
887c6acd 1862 if matches:
ed2d6a19 1863 return _playlist_from_matches(
3b2f933b 1864 matches, lambda m: unescapeHTML(m[1]))
53c1d3ef 1865
65f3a228
PH
1866 # Look for lazyYT YouTube embed
1867 matches = re.findall(
1868 r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1869 if matches:
1870 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1871
7deef1ba
YCH
1872 # Look for Wordpress "YouTube Video Importer" plugin
1873 matches = re.findall(r'''(?x)<div[^>]+
1874 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1875 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1876 if matches:
1877 return _playlist_from_matches(matches, lambda m: m[-1])
1878
ad213a1d 1879 matches = DailymotionIE._extract_urls(webpage)
355e4fd0 1880 if matches:
ad213a1d 1881 return _playlist_from_matches(matches)
355e4fd0 1882
8489578d
NJ
1883 # Look for embedded Dailymotion playlist player (#3822)
1884 m = re.search(
1885 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1886 if m:
1887 playlists = re.findall(
1888 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1889 if playlists:
1890 return _playlist_from_matches(
1891 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1892
ef4fd848
PH
1893 # Look for embedded Wistia player
1894 match = re.search(
281d3f1d 1895 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
ef4fd848 1896 if match:
9471c444
NJ
1897 embed_url = self._proto_relative_url(
1898 unescapeHTML(match.group('url')))
ef4fd848
PH
1899 return {
1900 '_type': 'url_transparent',
9471c444 1901 'url': embed_url,
ef4fd848
PH
1902 'ie_key': 'Wistia',
1903 'uploader': video_uploader,
ef4fd848 1904 }
5f6a1245 1905
9471c444 1906 match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
746c67d7
NJ
1907 if match:
1908 return {
1909 '_type': 'url_transparent',
6c114b12 1910 'url': 'wistia:%s' % match.group('id'),
746c67d7
NJ
1911 'ie_key': 'Wistia',
1912 'uploader': video_uploader,
746c67d7 1913 }
ef4fd848 1914
aa5957ac
S
1915 match = re.search(
1916 r'''(?sx)
1917 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1918 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1919 ''', webpage)
1920 if match:
1921 return self.url_result(self._proto_relative_url(
1922 'wistia:%s' % match.group('id')), 'Wistia')
1923
bab19a8e
S
1924 # Look for SVT player
1925 svt_url = SVTIE._extract_url(webpage)
1926 if svt_url:
1927 return self.url_result(svt_url, 'SVT')
1928
fa35cdad
PH
1929 # Look for embedded condenast player
1930 matches = re.findall(
1931 r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1932 webpage)
1933 if matches:
1934 return {
1935 '_type': 'playlist',
1936 'entries': [{
1937 '_type': 'url',
1938 'ie_key': 'CondeNast',
1939 'url': ma,
1940 } for ma in matches],
1941 'title': video_title,
1942 'id': video_id,
1943 }
1944
c19f7764
JMF
1945 # Look for Bandcamp pages with custom domain
1946 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1947 if mobj is not None:
1948 burl = unescapeHTML(mobj.group(1))
09804265
JMF
1949 # Don't set the extractor because it can be a track url or an album
1950 return self.url_result(burl)
c19f7764 1951
f25571ff
PH
1952 # Look for embedded Vevo player
1953 mobj = re.search(
1954 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1955 if mobj is not None:
1956 return self.url_result(mobj.group('url'))
796df3c6
S
1957
1958 # Look for embedded Viddler player
cb454b33
S
1959 mobj = re.search(
1960 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1961 webpage)
796df3c6
S
1962 if mobj is not None:
1963 return self.url_result(mobj.group('url'))
f25571ff 1964
3378d67a
S
1965 # Look for NYTimes player
1966 mobj = re.search(
1967 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1968 webpage)
1969 if mobj is not None:
1970 return self.url_result(mobj.group('url'))
1971
cefdf970
S
1972 # Look for Libsyn player
1973 mobj = re.search(
1974 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1975 if mobj is not None:
1976 return self.url_result(mobj.group('url'))
1977
c0d0b01f 1978 # Look for Ooyala videos
8a37aa15 1979 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
f076b638 1980 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
edfcf7ab
YCH
1981 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1982 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
c0d0b01f 1983 if mobj is not None:
9837cb75
RA
1984 embed_token = self._search_regex(
1985 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
1986 webpage, 'ooyala embed token', default=None)
1987 return OoyalaIE._build_url_result(smuggle_url(
1988 mobj.group('ec'), {
1989 'domain': url,
1990 'embed_token': embed_token,
1991 }))
c0d0b01f 1992
f076b638 1993 # Look for multiple Ooyala embeds on SBN network websites
1994 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1995 if mobj is not None:
1996 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1997 if embeds:
1998 return _playlist_from_matches(
cce9d15d 1999 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
f076b638 2000
aa94a6d3 2001 # Look for Aparat videos
48099643 2002 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
aa94a6d3
PH
2003 if mobj is not None:
2004 return self.url_result(mobj.group(1), 'Aparat')
2005
c93c2ab1 2006 # Look for MPORA videos
c3f51436 2007 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
c93c2ab1
PH
2008 if mobj is not None:
2009 return self.url_result(mobj.group(1), 'Mpora')
5f59ee79 2010
15c0e8e7 2011 # Look for embedded NovaMov-based player
8f89e687 2012 mobj = re.search(
8dfa187b 2013 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
15c0e8e7
S
2014 (?P<url>http://(?:(?:embed|www)\.)?
2015 (?:novamov\.com|
2016 nowvideo\.(?:ch|sx|eu|at|ag|co)|
2017 videoweed\.(?:es|com)|
2018 movshare\.(?:net|sx|ag)|
2019 divxstage\.(?:eu|net|ch|co|at|ag))
2020 /embed\.php.+?)\1''', webpage)
8f89e687 2021 if mobj is not None:
15c0e8e7 2022 return self.url_result(mobj.group('url'))
50f56607 2023
9834872b 2024 # Look for embedded Facebook player
fd6ca382
YCH
2025 facebook_url = FacebookIE._extract_url(webpage)
2026 if facebook_url is not None:
2027 return self.url_result(facebook_url, 'Facebook')
9834872b 2028
ca97a56e
S
2029 # Look for embedded VK player
2030 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2031 if mobj is not None:
2032 return self.url_result(mobj.group('url'), 'VK')
2033
33d4fdab
S
2034 # Look for embedded Odnoklassniki player
2035 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2036 if mobj is not None:
2037 return self.url_result(mobj.group('url'), 'Odnoklassniki')
2038
0364fa8b
S
2039 # Look for embedded ivi player
2040 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2041 if mobj is not None:
2042 return self.url_result(mobj.group('url'), 'Ivi')
2043
db1f3888
PH
2044 # Look for embedded Huffington Post player
2045 mobj = re.search(
c3f51436 2046 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
db1f3888
PH
2047 if mobj is not None:
2048 return self.url_result(mobj.group('url'), 'HuffPost')
2049
1b86cc41 2050 # Look for embed.ly
2051 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2052 if mobj is not None:
2053 return self.url_result(mobj.group('url'))
2054 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2055 if mobj is not None:
f7e6f7fa 2056 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1b86cc41 2057
60cc4dc4
PH
2058 # Look for funnyordie embed
2059 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2060 if matches:
ed2d6a19
PH
2061 return _playlist_from_matches(
2062 matches, getter=unescapeHTML, ie='FunnyOrDie')
60cc4dc4 2063
db546cf8
S
2064 # Look for BBC iPlayer embed
2065 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2066 if matches:
476eae0c 2067 return _playlist_from_matches(matches, ie='BBCCoUk')
db546cf8 2068
93d020dd
S
2069 # Look for embedded RUTV player
2070 rutv_url = RUTVIE._extract_url(webpage)
2071 if rutv_url:
2072 return self.url_result(rutv_url, 'RUTV')
2073
494f20cb 2074 # Look for embedded TVC player
b8599718
S
2075 tvc_url = TVCIE._extract_url(webpage)
2076 if tvc_url:
2077 return self.url_result(tvc_url, 'TVC')
494f20cb 2078
d40a3b5b
S
2079 # Look for embedded SportBox player
2080 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2081 if sportbox_urls:
2082 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
2083
2bb5b6d0
S
2084 # Look for embedded XHamster player
2085 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2086 if xhamster_urls:
2087 return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
2088
2c9ca782
S
2089 # Look for embedded TNAFlixNetwork player
2090 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2091 if tnaflix_urls:
2092 return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
2093
b52c9ef1
S
2094 # Look for embedded PornHub player
2095 pornhub_urls = PornHubIE._extract_urls(webpage)
2096 if pornhub_urls:
2097 return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
2098
37e7a71c
S
2099 # Look for embedded DrTuber player
2100 drtuber_urls = DrTuberIE._extract_urls(webpage)
2101 if drtuber_urls:
2102 return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
2103
e28ed498
S
2104 # Look for embedded RedTube player
2105 redtube_urls = RedTubeIE._extract_urls(webpage)
2106 if redtube_urls:
2107 return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
2108
9872d311
S
2109 # Look for embedded Tvigle player
2110 mobj = re.search(
2111 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2112 if mobj is not None:
2113 return self.url_result(mobj.group('url'), 'Tvigle')
2114
7e2ede98
JMF
2115 # Look for embedded TED player
2116 mobj = re.search(
d7cc31b6 2117 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
7e2ede98
JMF
2118 if mobj is not None:
2119 return self.url_result(mobj.group('url'), 'TED')
2120
5c386252 2121 # Look for embedded Ustream videos
d77ac737
YCH
2122 ustream_url = UstreamIE._extract_url(webpage)
2123 if ustream_url:
2124 return self.url_result(ustream_url, UstreamIE.ie_key())
5c386252 2125
893f8832
PH
2126 # Look for embedded arte.tv player
2127 mobj = re.search(
134c207e 2128 r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
893f8832
PH
2129 webpage)
2130 if mobj is not None:
2131 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2132
cbd55ade
S
2133 # Look for embedded francetv player
2134 mobj = re.search(
2135 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2136 webpage)
2137 if mobj is not None:
2138 return self.url_result(mobj.group('url'))
2139
cb3ac1c6
S
2140 # Look for embedded smotri.com player
2141 smotri_url = SmotriIE._extract_url(webpage)
2142 if smotri_url:
2143 return self.url_result(smotri_url, 'Smotri')
2144
e6c2d9ad 2145 # Look for embedded Myvi.ru player
6dd94d3a 2146 myvi_url = MyviIE._extract_url(webpage)
e6c2d9ad
S
2147 if myvi_url:
2148 return self.url_result(myvi_url)
2149
dfb1b146 2150 # Look for embedded soundcloud player
94aae015
S
2151 soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2152 if soundcloud_urls:
2153 return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
20991253 2154
027e2312
S
2155 # Look for tunein player
2156 tunein_urls = TuneInBaseIE._extract_urls(webpage)
2157 if tunein_urls:
2158 return _playlist_from_matches(tunein_urls)
2159
c5cd249e 2160 # Look for embedded mtvservices player
46fde8a1
S
2161 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2162 if mtvservices_url:
2163 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
c5cd249e 2164
49807b4a
S
2165 # Look for embedded yahoo player
2166 mobj = re.search(
2167 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2168 webpage)
2169 if mobj is not None:
2170 return self.url_result(mobj.group('url'), 'Yahoo')
2171
2ef6fcb5
PH
2172 # Look for embedded sbs.com.au player
2173 mobj = re.search(
e98b8e79
PH
2174 r'''(?x)
2175 (?:
2176 <meta\s+property="og:video"\s+content=|
2177 <iframe[^>]+?src=
2178 )
2179 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2ef6fcb5
PH
2180 webpage)
2181 if mobj is not None:
2182 return self.url_result(mobj.group('url'), 'SBS')
2183
42bdd9d0
PH
2184 # Look for embedded Cinchcast player
2185 mobj = re.search(
2186 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2187 webpage)
2188 if mobj is not None:
2189 return self.url_result(mobj.group('url'), 'Cinchcast')
2190
1a94ff68 2191 mobj = re.search(
5263cdfc 2192 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1a94ff68 2193 webpage)
8001607e
YCH
2194 if not mobj:
2195 mobj = re.search(
2196 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2197 webpage)
1a94ff68
S
2198 if mobj is not None:
2199 return self.url_result(mobj.group('url'), 'MLB')
2200
1419fafd 2201 mobj = re.search(
dd467d33 2202 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1419fafd
S
2203 webpage)
2204 if mobj is not None:
2205 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2206
af63fed7 2207 mobj = re.search(
78d3b3e2 2208 r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
af63fed7
PH
2209 webpage)
2210 if mobj is not None:
2211 return self.url_result(mobj.group('url'), 'Livestream')
2212
255fca5e
S
2213 # Look for Zapiks embed
2214 mobj = re.search(
2215 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2216 if mobj is not None:
2217 return self.url_result(mobj.group('url'), 'Zapiks')
2218
e3216b82 2219 # Look for Kaltura embeds
c287f2bc
S
2220 kaltura_url = KalturaIE._extract_url(webpage)
2221 if kaltura_url:
2222 return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
e3216b82 2223
135c9c42 2224 # Look for Eagle.Platform embeds
06a96da1
S
2225 eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2226 if eagleplatform_url:
2227 return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
135c9c42 2228
d47ae7f6
S
2229 # Look for ClipYou (uses Eagle.Platform) embeds
2230 mobj = re.search(
2231 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2232 if mobj is not None:
2233 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2234
f8388757 2235 # Look for Pladform embeds
45dad7ba
S
2236 pladform_url = PladformIE._extract_url(webpage)
2237 if pladform_url:
2238 return self.url_result(pladform_url)
f8388757 2239
ff18735c
S
2240 # Look for Videomore embeds
2241 videomore_url = VideomoreIE._extract_url(webpage)
2242 if videomore_url:
2243 return self.url_result(videomore_url)
2244
83f1481b
S
2245 # Look for Webcaster embeds
2246 webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2247 if webcaster_url:
2248 return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2249
2dcc114f
S
2250 # Look for Playwire embeds
2251 mobj = re.search(
2252 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2253 if mobj is not None:
2254 return self.url_result(mobj.group('url'))
2255
ad320e9b
NJ
2256 # Look for 5min embeds
2257 mobj = re.search(
2258 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2259 if mobj is not None:
2260 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2261
18153f1b
S
2262 # Look for Crooks and Liars embeds
2263 mobj = re.search(
2264 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2265 if mobj is not None:
2266 return self.url_result(mobj.group('url'))
2267
a2edf2e7
YCH
2268 # Look for NBC Sports VPlayer embeds
2269 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2270 if nbc_sports_url:
2271 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2272
de3eb07e
YCH
2273 # Look for NBC News embeds
2274 nbc_news_embed_url = re.search(
2275 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2276 if nbc_news_embed_url:
2277 return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2278
653789af 2279 # Look for Google Drive embeds
5b251628 2280 google_drive_url = GoogleDriveIE._extract_url(webpage)
653789af 2281 if google_drive_url:
2282 return self.url_result(google_drive_url, 'GoogleDrive')
2283
418c5cc3
YCH
2284 # Look for UDN embeds
2285 mobj = re.search(
c39fd7b1 2286 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
418c5cc3
YCH
2287 if mobj is not None:
2288 return self.url_result(
0a160363 2289 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
418c5cc3 2290
2fe1b5bd
YCH
2291 # Look for Senate ISVP iframe
2292 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2293 if senate_isvp_url:
25c3a734 2294 return self.url_result(senate_isvp_url, 'SenateISVP')
2fe1b5bd 2295
756f574e
YCH
2296 # Look for Dailymotion Cloud videos
2297 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2298 if dmcloud_url:
2299 return self.url_result(dmcloud_url, 'DailymotionCloud')
2300
1ac1c4c2
S
2301 # Look for OnionStudios embeds
2302 onionstudios_url = OnionStudiosIE._extract_url(webpage)
2303 if onionstudios_url:
2304 return self.url_result(onionstudios_url)
2305
67167920 2306 # Look for ViewLift embeds
2307 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2308 if viewlift_url:
2309 return self.url_result(viewlift_url)
eedd20ef 2310
7cb09524 2311 # Look for JWPlatform embeds
2312 jwplatform_url = JWPlatformIE._extract_url(webpage)
2313 if jwplatform_url:
2314 return self.url_result(jwplatform_url, 'JWPlatform')
2315
aecfcd4e
S
2316 # Look for Digiteka embeds
2317 digiteka_url = DigitekaIE._extract_url(webpage)
2318 if digiteka_url:
2319 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
6aeba407 2320
1979969f
S
2321 # Look for Arkena embeds
2322 arkena_url = ArkenaIE._extract_url(webpage)
2323 if arkena_url:
2324 return self.url_result(arkena_url, ArkenaIE.ie_key())
2325
b1c35797
RA
2326 # Look for Piksel embeds
2327 piksel_url = PikselIE._extract_url(webpage)
2328 if piksel_url:
2329 return self.url_result(piksel_url, PikselIE.ie_key())
2330
1bf996fa 2331 # Look for Limelight embeds
2332 mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2333 if mobj:
2334 lm = {
2335 'Media': 'media',
2336 'Channel': 'channel',
2337 'ChannelList': 'channel_list',
2338 }
2339 return self.url_result('limelight:%s:%s' % (
2340 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
2341
8f8f182d
S
2342 mobj = re.search(
2343 r'''(?sx)
2344 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2345 <param[^>]+
2346 name=(["\'])flashVars\2[^>]+
2347 value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2348 ''', webpage)
2349 if mobj:
2350 return self.url_result('limelight:media:%s' % mobj.group('id'))
2351
a5158f38
YCH
2352 # Look for AdobeTVVideo embeds
2353 mobj = re.search(
2354 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2355 webpage)
2356 if mobj is not None:
2357 return self.url_result(
2358 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2359 'AdobeTVVideo')
2360
088e1aac
YCH
2361 # Look for Vine embeds
2362 mobj = re.search(
2363 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2364 webpage)
2365 if mobj is not None:
2366 return self.url_result(
2367 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2368
217d5ae0
RA
2369 # Look for VODPlatform embeds
2370 mobj = re.search(
93b84045 2371 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
217d5ae0
RA
2372 webpage)
2373 if mobj is not None:
2374 return self.url_result(
93b84045 2375 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
217d5ae0 2376
7d273a38
RA
2377 # Look for Mangomolo embeds
2378 mobj = re.search(
d05ef09d 2379 r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
7d273a38
RA
2380 (?:
2381 video\?.*?\bid=(?P<video_id>\d+)|
2382 index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2383 ).+?)\1''', webpage)
2384 if mobj is not None:
2385 info = {
2386 '_type': 'url_transparent',
2387 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2388 'title': video_title,
2389 'description': video_description,
2390 'thumbnail': video_thumbnail,
2391 'uploader': video_uploader,
2392 }
2393 video_id = mobj.group('video_id')
2394 if video_id:
2395 info.update({
2396 'ie_key': 'MangomoloVideo',
2397 'id': video_id,
2398 })
2399 else:
2400 info.update({
2401 'ie_key': 'MangomoloLive',
2402 'id': mobj.group('channel_id'),
2403 })
2404 return info
2405
5a51775a
YCH
2406 # Look for Instagram embeds
2407 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2408 if instagram_embed_url is not None:
11e60fca
S
2409 return self.url_result(
2410 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
5a51775a 2411
b8f67449
KM
2412 # Look for LiveLeak embeds
2413 liveleak_url = LiveLeakIE._extract_url(webpage)
2414 if liveleak_url:
2415 return self.url_result(liveleak_url, 'LiveLeak')
2416
5d39176f
S
2417 # Look for 3Q SDN embeds
2418 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2419 if threeqsdn_url:
6f41b2bc
S
2420 return {
2421 '_type': 'url_transparent',
2422 'ie_key': ThreeQSDNIE.ie_key(),
2423 'url': self._proto_relative_url(threeqsdn_url),
2424 'title': video_title,
2425 'description': video_description,
2426 'thumbnail': video_thumbnail,
2427 'uploader': video_uploader,
2428 }
5d39176f 2429
2a1321a2
S
2430 # Look for VBOX7 embeds
2431 vbox7_url = Vbox7IE._extract_url(webpage)
2432 if vbox7_url:
2433 return self.url_result(vbox7_url, Vbox7IE.ie_key())
2434
b0c8f2e9
DR
2435 # Look for DBTV embeds
2436 dbtv_urls = DBTVIE._extract_urls(webpage)
2437 if dbtv_urls:
2438 return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
2439
e186a9ec
S
2440 # Look for Videa embeds
2441 videa_urls = VideaIE._extract_urls(webpage)
2442 if videa_urls:
2443 return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
2444
b687c85e
S
2445 # Look for 20 minuten embeds
2446 twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2447 if twentymin_urls:
2448 return _playlist_from_matches(
2449 twentymin_urls, ie=TwentyMinutenIE.ie_key())
2450
17f8deeb
S
2451 # Look for Openload embeds
2452 openload_urls = OpenloadIE._extract_urls(webpage)
2453 if openload_urls:
2454 return _playlist_from_matches(
2455 openload_urls, ie=OpenloadIE.ie_key())
2456
6ef3e65a
S
2457 # Look for VideoPress embeds
2458 videopress_urls = VideoPressIE._extract_urls(webpage)
2459 if videopress_urls:
2460 return _playlist_from_matches(
2461 videopress_urls, ie=VideoPressIE.ie_key())
2462
0de168f7
S
2463 # Looking for http://schema.org/VideoObject
2464 json_ld = self._search_json_ld(
082395d0
S
2465 webpage, video_id, default={}, expected_type='VideoObject')
2466 if json_ld.get('url'):
0de168f7
S
2467 info_dict.update({
2468 'title': video_title or info_dict['title'],
2469 'description': video_description,
2470 'thumbnail': video_thumbnail,
2471 'age_limit': age_limit
2472 })
2473 info_dict.update(json_ld)
2474 return info_dict
2475
bd264412
YCH
2476 # Look for HTML5 media
2477 entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2478 if entries:
2479 for entry in entries:
2480 entry.update({
2481 'id': video_id,
2482 'title': video_title,
2483 })
2484 self._sort_formats(entry['formats'])
2485 return self.playlist_result(entries)
2486
ced659bb 2487 def check_video(vurl):
a0f71985
PH
2488 if YoutubeIE.suitable(vurl):
2489 return True
ced659bb
S
2490 vpath = compat_urlparse.urlparse(vurl).path
2491 vext = determine_ext(vpath)
bd264412 2492 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
ced659bb
S
2493
2494 def filter_video(urls):
2495 return list(filter(check_video, urls))
2496
9b122384 2497 # Start with something easy: JW Player in SWFObject
ced659bb 2498 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
b30b8698 2499 if not found:
d981cef6 2500 # Look for gorilla-vid style embedding
ced659bb 2501 found = filter_video(re.findall(r'''(?sx)
c0292e8a
PH
2502 (?:
2503 jw_plugins|
2504 JWPlayerOptions|
2505 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2506 )
a0f71985
PH
2507 .*?
2508 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
b30b8698 2509 if not found:
9b122384 2510 # Broaden the search a little bit
ced659bb 2511 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
b30b8698
PH
2512 if not found:
2513 # Broaden the findall a little bit: JWPlayer JS loader
ced659bb 2514 found = filter_video(re.findall(
54a9328b 2515 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
4d805e06
PH
2516 if not found:
2517 # Flow player
ced659bb 2518 found = filter_video(re.findall(r'''(?xs)
4d805e06
PH
2519 flowplayer\("[^"]+",\s*
2520 \{[^}]+?\}\s*,
52585fd6 2521 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
4d805e06 2522 ["']?url["']?\s*:\s*["']([^"']+)["']
ced659bb 2523 ''', webpage))
501f13fb
PH
2524 if not found:
2525 # Cinerama player
2526 found = re.findall(
2527 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
b30b8698 2528 if not found:
9b122384 2529 # Try to find twitter cards info
371ddb14
S
2530 # twitter:player:stream should be checked before twitter:player since
2531 # it is expected to contain a raw stream (see
2532 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
ced659bb
S
2533 found = filter_video(re.findall(
2534 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
b30b8698 2535 if not found:
9b122384
PH
2536 # We look for Open Graph info:
2537 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
b30b8698 2538 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
9b122384
PH
2539 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2540 if m_video_type is not None:
ced659bb 2541 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
b30b8698 2542 if not found:
ed9a25dd 2543 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
a5a45015 2544 found = re.search(
89ef304b 2545 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
ed9a25dd 2546 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
89ef304b 2547 webpage)
84f81016
S
2548 if not found:
2549 # Look also in Refresh HTTP header
2550 refresh_header = head_response.headers.get('Refresh')
2551 if refresh_header:
6c91a5a7
S
2552 # In python 2 response HTTP headers are bytestrings
2553 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2554 refresh_header = refresh_header.decode('iso-8859-1')
ed9a25dd 2555 found = re.search(REDIRECT_REGEX, refresh_header)
b30b8698 2556 if found:
b37317d8 2557 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
89ef304b
PH
2558 self.report_following_redirect(new_url)
2559 return {
2560 '_type': 'url',
2561 'url': new_url,
2562 }
371ddb14
S
2563
2564 if not found:
2565 # twitter:player is a https URL to iframe player that may or may not
2566 # be supported by youtube-dl thus this is checked the very last (see
2567 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2568 embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2569 if embed_url:
2570 return self.url_result(embed_url)
2571
b30b8698 2572 if not found:
416c7fcb 2573 raise UnsupportedError(url)
9b122384 2574
b30b8698 2575 entries = []
4a120778 2576 for video_url in orderedSet(found):
949b6497 2577 video_url = unescapeHTML(video_url)
6cc37c69 2578 video_url = video_url.replace('\\/', '/')
b30b8698 2579 video_url = compat_urlparse.urljoin(url, video_url)
f7e6f7fa 2580 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
9b122384 2581
b30b8698
PH
2582 # Sometimes, jwplayer extraction will result in a YouTube URL
2583 if YoutubeIE.suitable(video_url):
2584 entries.append(self.url_result(video_url, 'Youtube'))
2585 continue
9b122384 2586
b30b8698
PH
2587 # here's a fun little line of code for you:
2588 video_id = os.path.splitext(video_id)[0]
fc9713a1 2589
28602e74
YCH
2590 entry_info_dict = {
2591 'id': video_id,
2592 'uploader': video_uploader,
2593 'title': video_title,
2594 'age_limit': age_limit,
2595 }
2596
729accb4
S
2597 ext = determine_ext(video_url)
2598 if ext == 'smil':
28602e74 2599 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
729accb4
S
2600 elif ext == 'xspf':
2601 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
750b9ff0
YCH
2602 elif ext == 'm3u8':
2603 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
79a35085
S
2604 elif ext == 'mpd':
2605 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
3f2f4a94
S
2606 elif ext == 'f4m':
2607 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
4119a96c 2608 elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
26aae566
S
2609 # Just matching .ism/manifest is not enough to be reliably sure
2610 # whether it's actually an ISM manifest or some other streaming
2611 # manifest since there are various streaming URL formats
2612 # possible (see [1]) as well as some other shenanigans like
2613 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2614 # so on.
2615 # Thus the most reasonable way to solve this is to delegate
2616 # to generic extractor in order to look into the contents of
2617 # the manifest itself.
2618 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2619 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2620 entry_info_dict = self.url_result(
2621 smuggle_url(video_url, {'to_generic': True}),
2622 GenericIE.ie_key())
d6fd958c 2623 else:
28602e74
YCH
2624 entry_info_dict['url'] = video_url
2625
19dbaeec
S
2626 if entry_info_dict.get('formats'):
2627 self._sort_formats(entry_info_dict['formats'])
2628
28602e74 2629 entries.append(entry_info_dict)
b30b8698
PH
2630
2631 if len(entries) == 1:
669f0e7c 2632 return entries[0]
b30b8698
PH
2633 else:
2634 for num, e in enumerate(entries, start=1):
13d8fbef
JMF
2635 # 'url' results don't have a title
2636 if e.get('title') is not None:
2637 e['title'] = '%s (%d)' % (e['title'], num)
b30b8698
PH
2638 return {
2639 '_type': 'playlist',
2640 'entries': entries,
2641 }