X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/10dc85924a74ae69bcf3170c37b351036eacca58..5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe:/yt_dlp/extractor/generic.py
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index bf3c9c1e8..3b8e1e957 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -4,7 +4,7 @@
import urllib.parse
import xml.etree.ElementTree
-from .common import InfoExtractor # isort: split
+from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring
@@ -14,7 +14,10 @@
ExtractorError,
UnsupportedError,
determine_ext,
+ determine_protocol,
dict_get,
+ extract_basic_auth,
+ filter_dict,
format_field,
int_or_none,
is_html,
@@ -31,7 +34,10 @@
unescapeHTML,
unified_timestamp,
unsmuggle_url,
+ update_url_query,
url_or_none,
+ urlhandle_detect_ext,
+ urljoin,
variadic,
xpath_attr,
xpath_text,
@@ -54,7 +60,9 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'trailer',
'upload_date': '20100513',
- }
+ 'direct': True,
+ 'timestamp': 1273772943.0,
+ },
},
# Direct link to media delivered compressed (until Accept-Encoding is *)
{
@@ -67,7 +75,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20140522',
},
'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
+ 'URL could be a direct video link, returning it as such.',
],
'skip': 'URL invalid',
},
@@ -97,10 +105,12 @@ class GenericIE(InfoExtractor):
'ext': 'webm',
'title': '5_Lennart_Poettering_-_Systemd',
'upload_date': '20141120',
+ 'direct': True,
+ 'timestamp': 1416498816.0,
},
'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
- ]
+ 'URL could be a direct video link, returning it as such.',
+ ],
},
# RSS feed
{
@@ -108,7 +118,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': 'https://phihag.de/2014/youtube-dl/rss2.xml',
'title': 'Zero Punctuation',
- 'description': 're:.*groundbreaking video review series.*'
+ 'description': 're:.*groundbreaking video review series.*',
},
'playlist_mincount': 11,
},
@@ -129,6 +139,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20201204',
},
}],
+ 'skip': 'Dead link',
},
# RSS feed with item with description and thumbnails
{
@@ -141,12 +152,12 @@ class GenericIE(InfoExtractor):
'playlist': [{
'info_dict': {
'ext': 'm4a',
- 'id': 'c1c879525ce2cb640b344507e682c36d',
+ 'id': '818a5d38-01cd-152f-2231-ee479677fa82',
'title': 're:Hydrogen!',
'description': 're:.*In this episode we are going.*',
'timestamp': 1567977776,
'upload_date': '20190908',
- 'duration': 459,
+ 'duration': 423,
'thumbnail': r're:^https?://.*\.jpg$',
'episode_number': 1,
'season_number': 1,
@@ -263,6 +274,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': '404 Not Found',
},
# MPD from http://dash-mse-test.appspot.com/media.html
{
@@ -274,6 +286,7 @@ class GenericIE(InfoExtractor):
'title': 'car-20120827-manifest',
'formats': 'mincount:9',
'upload_date': '20130904',
+ 'timestamp': 1378272859.0,
},
},
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
@@ -314,14 +327,14 @@ class GenericIE(InfoExtractor):
'id': 'cmQHVoWB5FY',
'ext': 'mp4',
'upload_date': '20130224',
- 'uploader_id': 'TheVerge',
+ 'uploader_id': '@TheVerge',
'description': r're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side',
},
'params': {
'skip_download': False,
- }
+ },
},
{
# redirect in Refresh HTTP header
@@ -347,7 +360,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'uploader': 'www.hodiho.fr',
'title': 'R\u00e9gis plante sa Jeep',
- }
+ },
},
# bandcamp page with custom domain
{
@@ -361,46 +374,6 @@ class GenericIE(InfoExtractor):
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
- # ooyala video
- {
- 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
- 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
- 'info_dict': {
- 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
- 'ext': 'mp4',
- 'title': '2cc213299525360.mov', # that's what we get
- 'duration': 238.231,
- },
- 'add_ie': ['Ooyala'],
- },
- {
- # ooyala video embedded with http://player.ooyala.com/iframe.js
- 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
- 'info_dict': {
- 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
- 'ext': 'mp4',
- 'title': '"Steve Jobs: Man in the Machine" trailer',
- 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
- 'duration': 135.427,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'movie expired',
- },
- # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
- {
- 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
- 'info_dict': {
- 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
- 'ext': 'mp4',
- 'title': 'Steampunk Fest Comes to Honesdale',
- 'duration': 43.276,
- },
- 'params': {
- 'skip_download': True,
- }
- },
# embed.ly video
{
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -465,19 +438,19 @@ class GenericIE(InfoExtractor):
'id': '370908',
'title': 'ÐоÑзаказ. ÐÐµÐ½Ñ 3',
'ext': 'mp4',
- }
+ },
}, {
'info_dict': {
'id': '370905',
'title': 'ÐоÑзаказ. ÐÐµÐ½Ñ 2',
'ext': 'mp4',
- }
+ },
}, {
'info_dict': {
'id': '370902',
'title': 'ÐоÑзаказ. ÐÐµÐ½Ñ 1',
'ext': 'mp4',
- }
+ },
}],
'params': {
# m3u8 download
@@ -493,7 +466,8 @@ class GenericIE(InfoExtractor):
'title': 'УжаÑÑики, ÑÑÑÑкий ÑÑÐµÐ¹Ð»ÐµÑ (2015)',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 153,
- }
+ },
+ 'skip': 'Site dead',
},
# XHamster embed
{
@@ -517,7 +491,7 @@ class GenericIE(InfoExtractor):
'title': 'Hidden miracles of the natural world',
'uploader': 'Louie Schwartzberg',
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
- }
+ },
},
# nowvideo embed hidden behind percent encoding
{
@@ -542,7 +516,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20140320',
},
'params': {
- 'skip_download': 'Requires rtmpdump'
+ 'skip_download': 'Requires rtmpdump',
},
'skip': 'video gone',
},
@@ -563,8 +537,8 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
'expected_warnings': [
- 'Forbidden'
- ]
+ 'Forbidden',
+ ],
},
# Condé Nast embed
{
@@ -574,7 +548,7 @@ class GenericIE(InfoExtractor):
'id': '53501be369702d3275860000',
'ext': 'mp4',
'title': 'Hondaâs New Asimo Robot Is More Human Than Ever',
- }
+ },
},
# Dailymotion embed
{
@@ -621,7 +595,7 @@ class GenericIE(InfoExtractor):
'add_ie': ['Youtube'],
'params': {
'skip_download': True,
- }
+ },
},
# MTVServices embed
{
@@ -650,7 +624,7 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': True,
- }
+ },
},
# Flowplayer
{
@@ -662,7 +636,7 @@ class GenericIE(InfoExtractor):
'age_limit': 18,
'uploader': 'www.handjobhub.com',
'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
- }
+ },
},
# MLB embed
{
@@ -706,7 +680,7 @@ class GenericIE(InfoExtractor):
'uploader': 'Sophos Security',
'title': 'Chet Chat 171 - Oct 29, 2014',
'upload_date': '20141029',
- }
+ },
},
# Soundcloud multiple embeds
{
@@ -740,7 +714,7 @@ class GenericIE(InfoExtractor):
'ext': 'flv',
'upload_date': '20141112',
'title': 'Rosetta #CometLanding webcast HL 10',
- }
+ },
},
# Another Livestream embed, without 'new.' in URL
{
@@ -765,15 +739,17 @@ class GenericIE(InfoExtractor):
'playlist_mincount': 1,
'add_ie': ['Youtube'],
},
- # Cinchcast embed
+ # Libsyn embed
{
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
'info_dict': {
- 'id': '7141703',
+ 'id': '3793998',
'ext': 'mp3',
'upload_date': '20141126',
- 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
- }
+ 'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing',
+ 'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90',
+ 'duration': 3989.0,
+ },
},
# Cinerama player
{
@@ -783,7 +759,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'uploader': 'www.abc.net.au',
'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
- }
+ },
},
# embedded viddler video
{
@@ -864,21 +840,7 @@ class GenericIE(InfoExtractor):
},
},
{
- # JWPlayer config passed as variable
- 'url': 'http://www.txxx.com/videos/3326530/ariele/',
- 'info_dict': {
- 'id': '3326530_hq',
- 'ext': 'mp4',
- 'title': 'ARIELE | Tube Cup',
- 'uploader': 'www.txxx.com',
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- }
- },
- {
- # Video.js embed, multiple formats
+ # Youtube embed, formerly: Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-ÑÑок-6-наÑÑÑойка-ÑеÑÑежа_33f9b7351.html',
'info_dict': {
'id': 'yygqldloqIk',
@@ -905,6 +867,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': '404 Not Found',
},
# rtl.nl embed
{
@@ -913,7 +876,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': 'aanslagen-kopenhagen',
'title': 'Aanslagen Kopenhagen',
- }
+ },
},
# Zapiks embed
{
@@ -922,7 +885,7 @@ class GenericIE(InfoExtractor):
'id': '118046',
'ext': 'mp4',
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
- }
+ },
},
# Kaltura embed (different embed code)
{
@@ -961,11 +924,11 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
'expected_warnings': [
- 'Could not send HEAD request'
+ 'Could not send HEAD request',
],
'params': {
'skip_download': True,
- }
+ },
},
{
# Kaltura embedded, some fileExt broken (#11480)
@@ -1092,7 +1055,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '8RUoRhRi',
'ext': 'mp4',
- 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+ 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
'timestamp': 1428207000,
'upload_date': '20150405',
@@ -1168,7 +1131,7 @@ class GenericIE(InfoExtractor):
'uploader': 'clickhole',
'upload_date': '20150527',
'timestamp': 1432744860,
- }
+ },
},
# SnagFilms embed
{
@@ -1177,7 +1140,7 @@ class GenericIE(InfoExtractor):
'id': '74849a00-85a9-11e1-9660-123139220831',
'ext': 'mp4',
'title': '#whilewewatch',
- }
+ },
},
# AdobeTVVideo embed
{
@@ -1473,7 +1436,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20211217',
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
},
- }]
+ }],
},
{
'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
@@ -1567,16 +1530,6 @@ class GenericIE(InfoExtractor):
'title': 'СÑÐ°Ñ Ðамин: «ÐÑ Ð½Ð°ÑÑÑили девÑÑвенноÑÑÑ ÐÑемлÑ»',
},
},
- {
- # vzaar embed
- 'url': 'http://help.vzaar.com/article/165-embedding-video',
- 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
- 'info_dict': {
- 'id': '8707641',
- 'ext': 'mp4',
- 'title': 'Building A Business Online: Principal Chairs Q & A',
- },
- },
{
# multiple HTML5 videos on one page
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
@@ -1594,7 +1547,7 @@ class GenericIE(InfoExtractor):
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
- }
+ },
},
{
'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
@@ -1606,7 +1559,7 @@ class GenericIE(InfoExtractor):
'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
'timestamp': 1474354800,
'upload_date': '20160920',
- }
+ },
},
{
'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
@@ -1698,7 +1651,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '83645793',
'title': 'Lock up and get excited',
- 'ext': 'mp4'
+ 'ext': 'mp4',
},
'skip': 'TODO: fix nested playlists processing in tests',
},
@@ -1774,7 +1727,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20220110',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
- }
+ },
},
{
# blogger embed
@@ -1867,11 +1820,13 @@ class GenericIE(InfoExtractor):
'display_id': 'kelis-4th-of-july',
'ext': 'mp4',
'title': 'Kelis - 4th Of July',
- 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ 'description': 'Kelis - 4th Of July',
+ 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
},
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['Untested major version'],
}, {
# KVS Player
'url': 'https://www.kvs-demo.com/embed/105/',
@@ -1880,35 +1835,12 @@ class GenericIE(InfoExtractor):
'display_id': 'kelis-4th-of-july',
'ext': 'mp4',
'title': 'Kelis - 4th Of July / Embed Player',
- 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
},
'params': {
'skip_download': True,
},
}, {
- # KVS Player
- 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
- 'md5': '3397979512c682f6b85b3b04989df224',
- 'info_dict': {
- 'id': '2400174',
- 'display_id': 'french-boy-pantsed',
- 'ext': 'mp4',
- 'title': 'French Boy Pantsed - ThisVid.com',
- 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
- }
- }, {
- # KVS Player
- 'url': 'https://thisvid.com/embed/2400174/',
- 'md5': '3397979512c682f6b85b3b04989df224',
- 'info_dict': {
- 'id': '2400174',
- 'display_id': 'french-boy-pantsed',
- 'ext': 'mp4',
- 'title': 'French Boy Pantsed - ThisVid.com',
- 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
- }
- }, {
- # KVS Player
'url': 'https://youix.com/video/leningrad-zoj/',
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
'info_dict': {
@@ -1916,8 +1848,8 @@ class GenericIE(InfoExtractor):
'display_id': 'leningrad-zoj',
'ext': 'mp4',
'title': 'Ðлип: ÐенингÑад - ÐÐÐ ÑкаÑаÑÑ, ÑмоÑÑеÑÑ Ð¾Ð½Ð»Ð°Ð¹Ð½ | Youix.com',
- 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
- }
+ 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+ },
}, {
# KVS Player
'url': 'https://youix.com/embed/18485',
@@ -1927,19 +1859,20 @@ class GenericIE(InfoExtractor):
'display_id': 'leningrad-zoj',
'ext': 'mp4',
'title': 'ÐенингÑад - ÐÐÐ',
- 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
- }
+ 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+ },
}, {
# KVS Player
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
'md5': '94166bdb26b4cb1fb9214319a629fc51',
'info_dict': {
'id': '21217',
- 'display_id': '40-nochey-40-nights-2016',
+ 'display_id': '40-nochey-2016',
'ext': 'mp4',
'title': '40 ноÑей (2016) - BogMedia.org',
+ 'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
- }
+ },
},
{
# KVS Player (for sites that serve kt_player.js via non-https urls)
@@ -1949,9 +1882,9 @@ class GenericIE(InfoExtractor):
'id': '389508',
'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
'ext': 'mp4',
- 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плееÑ',
- 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg',
- }
+ 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плееÑ',
+ 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
+ },
},
{
# Reddit-hosted video that will redirect and be processed by RedditIE
@@ -1964,8 +1897,8 @@ class GenericIE(InfoExtractor):
'timestamp': 1501941939.0,
'title': 'That small heart attack.',
'upload_date': '20170805',
- 'uploader': 'Antw87'
- }
+ 'uploader': 'Antw87',
+ },
},
{
# 1080p Reddit-hosted video that will redirect and be processed by RedditIE
@@ -1977,8 +1910,8 @@ class GenericIE(InfoExtractor):
'title': "The game Didn't want me to Knife that Guy I guess",
'uploader': 'paraf1ve',
'timestamp': 1636788683.0,
- 'upload_date': '20211113'
- }
+ 'upload_date': '20211113',
+ },
},
{
# MainStreaming player
@@ -1990,15 +1923,15 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'live_status': 'not_live',
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
- 'duration': 1512
- }
+ 'duration': 1512,
+ },
},
{
# Multiple gfycat iframe embeds
'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
'info_dict': {
'title': 'ì¬ì´, ì¤, ì¸ì í©ê¸ ëë ì¤ë¥¼ ì
ê³ ë¹ëë¤',
- 'id': 'board'
+ 'id': 'board',
},
'playlist_count': 8,
},
@@ -2007,18 +1940,18 @@ class GenericIE(InfoExtractor):
'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
'info_dict': {
'title': 'ì³ê² ë í¬ë¡ ëí¸ ì¤í
ì´ì¨ ìì´ì¬',
- 'id': 'board'
+ 'id': 'board',
},
- 'playlist_count': 6
+ 'playlist_count': 6,
},
{
# Multiple gfycat embeds, with uppercase "IFR" in urls
'url': 'https://kkzz.kr/?vid=2295',
'info_dict': {
'title': 'ì§ë°©ì ì°ë²ìë ìì¤í 카리ë ì짤',
- 'id': '?vid=2295'
+ 'id': '?vid=2295',
},
- 'playlist_count': 9
+ 'playlist_count': 9,
},
{
# Panopto embeds
@@ -2051,9 +1984,9 @@ class GenericIE(InfoExtractor):
'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html',
'info_dict': {
'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä',
- 'id': 'art-2000008762560'
+ 'id': 'art-2000008762560',
},
- 'playlist_count': 3
+ 'playlist_count': 3,
},
{
# Ruutu embed in hs.fi with a single video
@@ -2082,7 +2015,7 @@ class GenericIE(InfoExtractor):
'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
'timestamp': 1652833414,
'age_limit': 0,
- }
+ },
},
{
'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
@@ -2122,7 +2055,7 @@ class GenericIE(InfoExtractor):
'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
'duration': 5688.0,
'upload_date': '20210111',
- }
+ },
},
{
'note': 'JSON LD with multiple @type',
@@ -2138,7 +2071,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20200411',
'age_limit': 0,
'duration': 111.0,
- }
+ },
},
{
'note': 'JSON LD with unexpected data type',
@@ -2153,13 +2086,69 @@ class GenericIE(InfoExtractor):
'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$',
'age_limit': 0,
'direct': True,
- }
- }
+ },
+ },
+ {
+ 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
+ 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
+ 'info_dict': {
+ 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
+ 'ext': 'mp4',
+ 'title': 'Äauky lidi 70 finall',
+ 'description': 'Äauky lidi 70 finall',
+ 'thumbnail': 'h',
+ 'upload_date': '20220606',
+ 'timestamp': 1654513791,
+ 'duration': 318.0,
+ 'direct': True,
+ 'age_limit': 0,
+ },
+ },
+ {
+ 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
+ 'md5': 'e2f0a4c329f7986280b7328e24036d60',
+ 'info_dict': {
+ 'id': '284002',
+ 'display_id': 'just-out-of-the-shower-joi',
+ 'ext': 'mp4',
+ 'title': 'Just Out Of The Shower JOI - Shooshtime',
+ 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg',
+ 'height': 720,
+ 'age_limit': 18,
+ },
+ },
+ {
+ 'note': 'Live HLS direct link',
+ 'url': 'https://d18j67ugtrocuq.cloudfront.net/out/v1/2767aec339144787926bd0322f72c6e9/index.m3u8',
+ 'info_dict': {
+ 'id': 'index',
+ 'title': r're:index',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ {
+ 'note': 'Video.js VOD HLS',
+ 'url': 'https://gist.githubusercontent.com/bashonly/2aae0862c50f4a4b84f220c315767208/raw/e3380d413749dabbe804c9c2d8fd9a45142475c7/videojs_hls_test.html',
+ 'info_dict': {
+ 'id': 'videojs_hls_test',
+ 'title': 'video',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'duration': 1800,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
]
def report_following_redirect(self, new_url):
"""Report information extraction."""
- self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+ self._downloader.to_screen(f'[redirect] Following redirect to {new_url}')
def report_detected(self, name, num=1, note=None):
if num > 1:
@@ -2171,12 +2160,49 @@ def report_detected(self, name, num=1, note=None):
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
- def _fragment_query(self, url):
- if self._configuration_arg('fragment_query'):
- query_string = urllib.parse.urlparse(url).query
- if query_string:
- return {'extra_param_to_segment_url': query_string}
- return {}
+ def _extra_manifest_info(self, info, manifest_url):
+ fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
+ if fragment_query is not None:
+ info['extra_param_to_segment_url'] = (
+ urllib.parse.urlparse(fragment_query).query or fragment_query
+ or urllib.parse.urlparse(manifest_url).query or None)
+
+ key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
+ if key_query is not None:
+ info['extra_param_to_key_url'] = (
+ urllib.parse.urlparse(key_query).query or key_query
+ or urllib.parse.urlparse(manifest_url).query or None)
+
+ def hex_or_none(value):
+ return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
+
+ info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
+ 'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
+ }) or None
+
+ variant_query = self._configuration_arg('variant_query', [None], casesense=True)[0]
+ if variant_query is not None:
+ query = urllib.parse.parse_qs(
+ urllib.parse.urlparse(variant_query).query or variant_query
+ or urllib.parse.urlparse(manifest_url).query)
+ for fmt in self._downloader._get_formats(info):
+ fmt['url'] = update_url_query(fmt['url'], query)
+
+ # Attempt to detect live HLS or set VOD duration
+ m3u8_format = next((f for f in self._downloader._get_formats(info)
+ if determine_protocol(f) == 'm3u8_native'), None)
+ if m3u8_format:
+ is_live = self._configuration_arg('is_live', [None])[0]
+ if is_live is not None:
+ info['live_status'] = 'not_live' if is_live == 'false' else 'is_live'
+ return
+ headers = m3u8_format.get('http_headers') or info.get('http_headers')
+ duration = self._extract_m3u8_vod_duration(
+ m3u8_format['url'], info.get('id'), note='Checking m3u8 live status',
+ errnote='Failed to download m3u8 media playlist', headers=headers)
+ if not duration:
+ info['live_status'] = 'is_live'
+ info['duration'] = info.get('duration') or duration
def _extract_rss(self, url, video_id, doc):
NS_MAP = {
@@ -2220,43 +2246,87 @@ def itunes(key):
'entries': entries,
}
- def _kvs_getrealurl(self, video_url, license_code):
+ @classmethod
+ def _kvs_get_real_url(cls, video_url, license_code):
if not video_url.startswith('function/0/'):
return video_url # not obfuscated
- url_path, _, url_query = video_url.partition('?')
- urlparts = url_path.split('/')[2:]
- license = self._kvs_getlicensetoken(license_code)
- newmagic = urlparts[5][:32]
+ parsed = urllib.parse.urlparse(video_url[len('function/0/'):])
+ license_token = cls._kvs_get_license_token(license_code)
+ urlparts = parsed.path.split('/')
- for o in range(len(newmagic) - 1, -1, -1):
- new = ''
- l = (o + sum(int(n) for n in license[o:])) % 32
+ HASH_LENGTH = 32
+ hash_ = urlparts[3][:HASH_LENGTH]
+ indices = list(range(HASH_LENGTH))
- for i in range(0, len(newmagic)):
- if i == o:
- new += newmagic[l]
- elif i == l:
- new += newmagic[o]
- else:
- new += newmagic[i]
- newmagic = new
+ # Swap indices of hash according to the destination calculated from the license token
+ accum = 0
+ for src in reversed(range(HASH_LENGTH)):
+ accum += license_token[src]
+ dest = (src + accum) % HASH_LENGTH
+ indices[src], indices[dest] = indices[dest], indices[src]
+
+ urlparts[3] = ''.join(hash_[index] for index in indices) + urlparts[3][HASH_LENGTH:]
+ return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts)))
- urlparts[5] = newmagic + urlparts[5][32:]
- return '/'.join(urlparts) + '?' + url_query
+ @staticmethod
+ def _kvs_get_license_token(license_code):
+ license_code = license_code.replace('$', '')
+ license_values = [int(char) for char in license_code]
- def _kvs_getlicensetoken(self, license):
- modlicense = license.replace('$', '').replace('0', '1')
- center = int(len(modlicense) / 2)
+ modlicense = license_code.replace('0', '1')
+ center = len(modlicense) // 2
fronthalf = int(modlicense[:center + 1])
backhalf = int(modlicense[center:])
+ modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1]
+
+ return [
+ (license_values[index + offset] + current) % 10
+ for index, current in enumerate(map(int, modlicense))
+ for offset in range(4)
+ ]
+
+ def _extract_kvs(self, url, webpage, video_id):
+ flashvars = self._search_json(
+ r'(?s:', webpage)
- flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
-
- # extract the part after the last / as the display_id from the
- # canonical URL.
- display_id = self._search_regex(
- r'(?:'
- r'|)',
- webpage, 'display_id', fatal=False
- )
- title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)(?:h1|title)>', webpage, 'title')
-
- thumbnail = flashvars['preview_url']
- if thumbnail.startswith('//'):
- protocol, _, _ = url.partition('/')
- thumbnail = protocol + thumbnail
-
- url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys()))
- formats = []
- for key in url_keys:
- if '/get_file/' not in flashvars[key]:
- continue
- format_id = flashvars.get(f'{key}_text', key)
- formats.append({
- 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
- 'format_id': format_id,
- 'ext': 'mp4',
- **(parse_resolution(format_id) or parse_resolution(flashvars[key]))
- })
- if not formats[-1].get('height'):
- formats[-1]['quality'] = 1
-
- return [{
- 'id': flashvars['video_id'],
- 'display_id': display_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }]
if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
@@ -2702,7 +2735,7 @@ def filter_video(urls):
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)