import base64
from yt_dlp.aes import (
- BLOCK_SIZE_BYTES,
aes_cbc_decrypt,
aes_cbc_decrypt_bytes,
aes_cbc_encrypt,
def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
- data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES)
- encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv))
+ encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual(
encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
+BLOCK_SIZE_BYTES = 16
+
+
def unpad_pkcs7(data):
return data[:-compat_ord(data[-1])]
-BLOCK_SIZE_BYTES = 16
+def pkcs7_padding(data):
+ """
+ PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @returns {int[]} padding data
+ """
+
+ remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
+ return data + [remaining_length] * remaining_length
def pad_block(block, padding_mode):
def aes_ecb_encrypt(data, key, iv=None):
"""
- Encrypt with aes in ECB mode
+ Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- encrypted_data += aes_encrypt(block, expanded_key)
- encrypted_data = encrypted_data[:len(data)]
+ encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
return encrypted_data
'key_expansion',
'pad_block',
+ 'pkcs7_padding',
'unpad_pkcs7',
]
compat_basestring = str
+compat_casefold = str.casefold
compat_chr = chr
compat_collections_abc = collections.abc
compat_cookiejar = http.cookiejar
class ADNIE(InfoExtractor):
- IE_DESC = 'Anime Digital Network'
- _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
- 'md5': '0319c99885ff5547565cacb4f3f9348d',
+ IE_DESC = 'Animation Digital Network'
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+ 'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
- 'id': '7778',
+ 'id': '9841',
'ext': 'mp4',
- 'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
- 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
- 'series': 'Blue Exorcist - Kyôto Saga',
- 'duration': 1467,
- 'release_date': '20170106',
+ 'title': 'Fruits Basket - Episode 1',
+ 'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
+ 'series': 'Fruits Basket',
+ 'duration': 1437,
+ 'release_date': '20190405',
'comment_count': int,
'average_rating': float,
- 'season_number': 2,
- 'episode': 'Début des hostilités',
+ 'season_number': 1,
+ 'episode': 'À ce soir !',
'episode_number': 1,
- }
- }
+ },
+ 'skip': 'Only available in region (FR, ...)',
+ }, {
+ 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+ 'only_matching': True,
+ }]
- _NETRC_MACHINE = 'animedigitalnetwork'
- _BASE_URL = 'http://animedigitalnetwork.fr'
- _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+ _NETRC_MACHINE = 'animationdigitalnetwork'
+ _BASE = 'animationdigitalnetwork.fr'
+ _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
if subtitle_location:
enc_subtitles = self._download_webpage(
subtitle_location, video_id, 'Downloading subtitles data',
- fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
+ fatal=False, headers={'Origin': 'https://' + self._BASE})
if not enc_subtitles:
return None
- # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+ # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]),
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
ExtractorError,
float_or_none,
sanitized_Request,
+ str_or_none,
traverse_obj,
urlencode_postdata,
USER_AGENTS,
class CeskaTelevizeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
'info_dict': {
'id': '61924494877028507',
'ext': 'mp4',
- 'title': 'Hyde Park Civilizace: Bonus 01 - En',
+ 'title': 'Bonus 01 - En - Hyde Park Civilizace',
'description': 'English Subtittles',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 81.3,
},
}, {
# live stream
- 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'url': 'http://www.ceskatelevize.cz/zive/ct1/',
'info_dict': {
- 'id': 402,
+ 'id': '102',
'ext': 'mp4',
- 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': r'ČT1 - živé vysílání online',
+ 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
'is_live': True,
},
'params': {
# m3u8 download
'skip_download': True,
},
- 'skip': 'Georestricted to Czech Republic',
+ }, {
+ # another
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'only_matching': True,
+ 'info_dict': {
+ 'id': 402,
+ 'ext': 'mp4',
+ 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ # 'skip': 'Georestricted to Czech Republic',
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
- 'title': 'Queer: Bogotart',
- 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko',
+ 'title': 'Bogotart - Queer',
+ 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
},
'playlist': [{
'info_dict': {
'id': '61924494877311053',
'ext': 'mp4',
- 'title': 'Queer: Bogotart (Varování 18+)',
+ 'title': 'Bogotart - Queer (Varování 18+)',
'duration': 11.9,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
- 'title': 'Queer: Bogotart (Queer)',
+ 'title': 'Bogotart - Queer (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
def _real_extract(self, url):
playlist_id = self._match_id(url)
- parsed_url = compat_urllib_parse_urlparse(url)
- webpage = self._download_webpage(url, playlist_id)
- site_name = self._og_search_property('site_name', webpage, fatal=False, default=None)
+ webpage, urlh = self._download_webpage_handle(url, playlist_id)
+ parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
+ site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
- playlist_title = playlist_title.replace(f' — {site_name}', '', 1)
+ playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
playlist_description = self._og_search_description(webpage, default=None)
if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ')
- if parsed_url.path.startswith('/porady/'):
+ type_ = 'IDEC'
+ if re.search(r'(^/porady|/zive)/', parsed_url.path):
next_data = self._search_nextjs_data(webpage, playlist_id)
- idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if '/zive/' in parsed_url.path:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
+ else:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if not idec:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
+ if idec:
+ type_ = 'bonus'
if not idec:
raise ExtractorError('Failed to find IDEC id')
- iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
- webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
- query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
+ iframe_hash = self._download_webpage(
+ 'https://www.ceskatelevize.cz/v-api/iframe-hash/',
+ playlist_id, note='Getting IFRAME hash')
+ query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
+ webpage = self._download_webpage(
+ 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
+ playlist_id, note='Downloading player', query=query)
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
- raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+ self.raise_geo_restricted(NOT_AVAILABLE_STRING)
+ if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
+ raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
type_ = None
episode_id = None
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item.get('streamUrls', {}).items():
- stream_url = stream_url.replace('https://', 'http://')
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native',
entries[num]['formats'].extend(formats)
continue
- item_id = item.get('id') or item['assetId']
+ item_id = str_or_none(item.get('id') or item['assetId'])
title = item['title']
duration = float_or_none(item.get('duration'))
for e in entries:
self._sort_formats(e['formats'])
+ if len(entries) == 1:
+ return entries[0]
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs):
+import re
+
from .common import InfoExtractor
from ..utils import (
determine_ext,
+ extract_attributes,
int_or_none,
str_to_int,
+ url_or_none,
urlencode_postdata,
)
'id': '133957',
'ext': 'mp4',
'title': 'everthing about me (Preview)',
+ 'uploader': 'ellyxxix',
'view_count': int,
'like_count': int,
},
}, {
# full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
- 'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+ 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
'info_dict': {
'id': '935718',
'ext': 'mp4',
'title': 'MY FACE REVEAL',
+ 'description': 'md5:ec5901d41808b3746fed90face161612',
+ 'uploader': 'Sarah Calanthe',
'view_count': int,
'like_count': int,
},
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
+ try:
+ webpage = self._download_webpage(real_url, video_id)
+ except Exception:
+ # probably useless fallback
+ webpage = self._download_webpage(url, video_id)
+
+ info = self._search_regex(
+ r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
+ webpage, 'meta details', default='')
+ info = extract_attributes(info)
+
+ player = self._search_regex(
+ r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
+ webpage, 'player details', default='')
+ player = extract_attributes(player)
+
+ video_urls_and_ids = (
+ (info.get('data-meta-video'), 'video'),
+ (player.get('data-video-transcoded'), 'transcoded'),
+ (player.get('data-video-filepath'), 'filepath'),
+ (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
+ )
+
+ def txt_or_none(s, default=None):
+ return (s.strip() or default) if isinstance(s, compat_str) else default
+
+ uploader = txt_or_none(info.get('data-meta-author'))
- video_url = self._search_regex(
- r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'video URL', group='url')
+ def mung_title(s):
+ if uploader:
+ s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
+ return txt_or_none(s)
- title = self._html_search_regex(
- (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
- r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
- webpage, 'title', default=None) or self._html_search_meta(
- 'twitter:title', webpage, 'title', fatal=True)
+ title = (
+ mung_title(info.get('data-meta-title'))
+ or self._html_search_regex(
+ (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+ r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+ webpage, 'title', default=None)
+ or self._html_search_meta(
+ 'twitter:title', webpage, 'title', fatal=True))
+
+ title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
- video_id, fatal=False, data=urlencode_postdata({
+ video_id, note='Setting format cookies', fatal=False,
+ data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
'X-Requested-With': 'XMLHttpRequest'
})
- if determine_ext(video_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- else:
- formats = [{'url': video_url}]
+ formats = []
+ for v_url, fmt in video_urls_and_ids:
+ v_url = url_or_none(v_url)
+ if not v_url:
+ continue
+ if determine_ext(v_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls'))
+ else:
+ formats.append({
+ 'url': v_url,
+ 'format_id': fmt,
+ })
+
+ self._remove_duplicate_formats(formats)
+
+ for f in formats:
+ if f.get('height') is None:
+ f['height'] = int_or_none(
+ self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
+ if '/preview/' in f['url']:
+ f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
+ f['preference'] = -10
+ if 'transcoded' in f['format_id']:
+ f['preference'] = f.get('preference', -1) - 1
+
+ self._sort_formats(formats)
+
+ def get_likes():
+ likes = self._search_regex(
+ r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
+ webpage, 'likes', default='')
+ likes = extract_attributes(likes)
+ return int_or_none(likes.get('data-likes'))
- like_count = int_or_none(self._search_regex(
- r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
- view_count = str_to_int(self._html_search_regex(
- r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
- 'view count', default=None))
+ def get_views():
+ return str_to_int(self._html_search_regex(
+ r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
+ webpage, 'view count', default=None))
return {
'id': video_id,
'title': title,
- 'view_count': view_count,
- 'like_count': like_count,
'formats': formats,
- 'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'),
+ 'description': txt_or_none(info.get('data-meta-description')),
+ 'uploader': txt_or_none(info.get('data-meta-author')),
+ 'thumbnail': (
+ url_or_none(info.get('data-meta-image'))
+ or url_or_none(player.get('data-video-screenshot'))),
+ 'view_count': get_views(),
+ 'like_count': get_likes(),
}
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
- 'uploader_id': 'yonbiw',
+ 'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
- comment_count = webpage.count('class="media-comment-contents"')
+ comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
uploader_id = self._html_search_regex(
- (r'"media-meta-member">\s+<a href="/m/([^"]+)"',
- r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
+ (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
+ r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id', fatal=False)
+
categories = self._html_search_meta('keywords', webpage, default=None)
if categories:
categories = [cat.strip() for cat in categories.split(',')]
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
- page_count = self._int(self._search_regex(
- r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
- webpage, 'page_count', default=0), 'page_count')
+ page_count = str_to_int(self._search_regex(
+ r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
+ webpage, 'page_count', default=0))
if not page_count:
message = self._search_regex(
- r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
+ r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
webpage, 'error_msg', default=None) or 'This group has no videos.'
self.report_warning(message, group_id)
+ page_count = 1
PAGE_SIZE = 80
def _get_page(idx):
- if not page_count:
- return
- webpage = self._download_webpage(
- page_url, group_id, query={'page': idx + 1},
- note='Downloading page %d/%d' % (idx + 1, page_count)
- )
+ if idx > 0:
+ webpage = self._download_webpage(
+ page_url, group_id, query={'page': idx + 1},
+ note='Downloading page %d/%d' % (idx + 1, page_count)
+ )
for entry in self._extract_entries(webpage, url):
yield entry
-import itertools
+import json
import re
+import time
from base64 import b64encode
+from binascii import hexlify
from datetime import datetime
from hashlib import md5
+from random import randint
from .common import InfoExtractor
-from ..compat import compat_str, compat_urllib_parse_urlencode
-from ..utils import float_or_none, sanitized_Request
+from ..aes import aes_ecb_encrypt, pkcs7_padding
+from ..compat import compat_urllib_parse_urlencode
+from ..utils import (
+ ExtractorError,
+ bytes_to_intlist,
+ error_to_compat_str,
+ float_or_none,
+ int_or_none,
+ intlist_to_bytes,
+ sanitized_Request,
+ try_get,
+)
class NetEaseMusicBaseIE(InfoExtractor):
@classmethod
def _encrypt(cls, dfsid):
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
- string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
+ string_bytes = bytearray(str(dfsid).encode('ascii'))
salt_len = len(salt_bytes)
for i in range(len(string_bytes)):
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')
+ @classmethod
+ def make_player_api_request_data_and_headers(cls, song_id, bitrate):
+ KEY = b'e82ckenh8dichen8'
+ URL = '/api/song/enhance/player/url'
+ now = int(time.time() * 1000)
+ rand = randint(0, 1000)
+ cookie = {
+ 'osver': None,
+ 'deviceId': None,
+ 'appver': '8.0.0',
+ 'versioncode': '140',
+ 'mobilename': None,
+ 'buildver': '1623435496',
+ 'resolution': '1920x1080',
+ '__csrf': '',
+ 'os': 'pc',
+ 'channel': None,
+ 'requestId': '{0}_{1:04}'.format(now, rand),
+ }
+ request_text = json.dumps(
+ {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
+ separators=(',', ':'))
+ message = 'nobody{0}use{1}md5forencrypt'.format(
+ URL, request_text).encode('latin1')
+ msg_digest = md5(message).hexdigest()
+
+ data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
+ URL, request_text, msg_digest)
+ data = pkcs7_padding(bytes_to_intlist(data))
+ encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
+ encrypted_params = hexlify(encrypted).decode('ascii').upper()
+
+ cookie = '; '.join(
+ ['{0}={1}'.format(k, v if v is not None else 'undefined')
+ for [k, v] in cookie.items()])
+
+ headers = {
+ 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': 'https://music.163.com',
+ 'Cookie': cookie,
+ }
+ return ('params={0}'.format(encrypted_params), headers)
+
+ def _call_player_api(self, song_id, bitrate):
+ url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
+ data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
+ try:
+ msg = 'empty result'
+ result = self._download_json(
+ url, song_id, data=data.encode('ascii'), headers=headers)
+ if result:
+ return result
+ except ExtractorError as e:
+ if type(e.cause) in (ValueError, TypeError):
+ # JSON load failure
+ raise
+ except Exception as e:
+ msg = error_to_compat_str(e)
+ self.report_warning('%s API call (%s) failed: %s' % (
+ song_id, bitrate, msg))
+ return {}
+
def extract_formats(self, info):
+ err = 0
formats = []
+ song_id = info['id']
for song_format in self._FORMATS:
details = info.get(song_format)
if not details:
continue
- song_file_path = '/%s/%s.%s' % (
- self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
-
- # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
- # from NetEase's CDN provider that can be used if m5.music.126.net does not
- # work, especially for users outside of Mainland China
- # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
- for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
- 'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
- song_url = host + song_file_path
+
+ bitrate = int_or_none(details.get('bitrate')) or 999000
+ data = self._call_player_api(song_id, bitrate)
+ for song in try_get(data, lambda x: x['data'], list) or []:
+ song_url = try_get(song, lambda x: x['url'])
+ if not song_url:
+ continue
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'ext': details.get('extension'),
- 'abr': float_or_none(details.get('bitrate'), scale=1000),
+ 'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format,
- 'filesize': details.get('size'),
- 'asr': details.get('sr')
+ 'filesize': int_or_none(song.get('size')),
+ 'asr': int_or_none(details.get('sr')),
})
- break
+ elif err == 0:
+ err = try_get(song, lambda x: x['code'], int)
+
+ if not formats:
+ msg = 'No media links found'
+ if err != 0 and (err < 200 or err >= 400):
+ raise ExtractorError(
+ '%s (site code %d)' % (msg, err, ), expected=True)
+ else:
+ self.raise_geo_restricted(
+ msg + ': probably this video is not available from your location due to geo restriction.',
+ countries=['CN'])
+
return formats
@classmethod
class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song'
IE_DESC = '网易云音乐'
- _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397',
- 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+ 'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
- 'title': 'Bad Blood (feat. Kendrick Lamar)',
+ 'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
- 'upload_date': '20150517',
- 'timestamp': 1431878400,
- 'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
- },
- 'skip': 'Blocked outside Mainland China',
- }, {
- 'note': 'No lyrics translation.',
- 'url': 'http://music.163.com/#/song?id=29822014',
- 'info_dict': {
- 'id': '29822014',
- 'ext': 'mp3',
- 'title': '听见下雨的声音',
- 'creator': '周杰伦',
- 'upload_date': '20141225',
- 'timestamp': 1419523200,
- 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
+ 'upload_date': '20150516',
+ 'timestamp': 1431792000,
+ 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
},
- 'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424',
'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211',
+ 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600,
},
- 'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
},
- 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
+ 'md5': '95826c73ea50b1c288b22180ec9e754d',
+ 'info_dict': {
+ 'id': '95670',
+ 'ext': 'mp3',
+ 'title': '国际歌',
+ 'creator': '马备',
+ 'upload_date': '19911130',
+ 'timestamp': 691516800,
+ 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
+ },
}]
def _process_lyrics(self, lyrics_info):
return self._download_json(
urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
- fatal=fatal, query=query,
- headers={'Accept-Encoding': 'gzip, deflate, br'})
+ fatal=fatal, query=query)
class NRKIE(NRKBaseIE):
if '://player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex(
- r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+ r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
determine_ext,
+ extract_attributes,
float_or_none,
int_or_none,
join_nonempty,
merge_dicts,
- NO_DEFAULT,
- orderedSet,
parse_codecs,
qualities,
traverse_obj,
},
}, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
- 'md5': '57af4423db0455a3975d2dc4578536bc',
+ 'md5': '1b93bdec7d02fc0b703c5e7687461628',
'info_dict': {
'ext': 'mp4',
'id': 'video_funk_1770473',
title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target']
-
- ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
-
+ ptmd_path = traverse_obj(t, (
+ (('streams', 'default'), None),
+ ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
+ ), get_all=False)
if not ptmd_path:
- ptmd_path = traverse_obj(
- t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
- 'http://zdf.de/rels/streams/ptmd-template').replace(
- '{playerId}', 'ngplayer_2_4')
+ raise ExtractorError('Could not extract ptmd_path')
info = self._extract_ptmd(
- urljoin(url, ptmd_path), video_id, player['apiToken'], url)
+ urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url)
thumbnails = []
layouts = try_get(
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
video_id)
- document = video['document']
-
- title = document['titel']
- content_id = document['basename']
-
formats = []
- format_urls = set()
- for f in document['formitaeten']:
- self._extract_format(content_id, formats, format_urls, f)
+ formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+ document = formitaeten and video['document']
+ if formitaeten:
+ title = document['titel']
+ content_id = document['basename']
+
+ format_urls = set()
+ for f in formitaeten or []:
+ self._extract_format(content_id, formats, format_urls, f)
self._sort_formats(formats)
thumbnails = []
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
'info_dict': {
'id': 'das-aktuelle-sportstudio',
- 'title': 'das aktuelle sportstudio | ZDF',
+ 'title': 'das aktuelle sportstudio',
},
- 'playlist_mincount': 23,
+ 'playlist_mincount': 18,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': {
'title': 'planet e.',
},
'playlist_mincount': 50,
+ }, {
+ 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
+ 'info_dict': {
+ 'id': 'aktenzeichen-xy-ungeloest',
+ 'title': 'Aktenzeichen XY... ungelöst',
+ 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+ },
+ 'playlist_mincount': 2,
}, {
'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True,
def suitable(cls, url):
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
+ def _og_search_title(self, webpage, fatal=False):
+ title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
+ return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
+
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(url, channel_id)
- entries = [
- self.url_result(item_url, ie=ZDFIE.ie_key())
- for item_url in orderedSet(re.findall(
- r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
-
- return self.playlist_result(
- entries, channel_id, self._og_search_title(webpage, fatal=False))
-
- r"""
- player = self._extract_player(webpage, channel_id)
-
- channel_id = self._search_regex(
- r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
- 'channel id', group='id')
-
- channel = self._call_api(
- 'https://api.zdf.de/content/documents/%s.json' % channel_id,
- player, url, channel_id)
-
- items = []
- for module in channel['module']:
- for teaser in try_get(module, lambda x: x['teaser'], list) or []:
- t = try_get(
- teaser, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- items.extend(try_get(
- t,
- lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
- items.extend(try_get(
- module,
- lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
-
- entries = []
- entry_urls = set()
- for item in items:
- t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- sharing_url = t.get('http://zdf.de/rels/sharing-url')
- if not sharing_url or not isinstance(sharing_url, compat_str):
- continue
- if sharing_url in entry_urls:
- continue
- entry_urls.add(sharing_url)
- entries.append(self.url_result(
- sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
- return self.playlist_result(entries, channel_id, channel.get('title'))
- """
+ matches = re.finditer(
+ r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
+ webpage)
+
+ if self._downloader.params.get('noplaylist', False):
+ entry = next(
+ (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+ None)
+ self.to_screen('Downloading just the main video because of --no-playlist')
+ if entry:
+ return entry
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
+
+ def check_video(m):
+ v_ref = self._search_regex(
+ r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
+ webpage, 'check id', default='')
+ v_ref = extract_attributes(v_ref)
+ return v_ref.get('data-target-video-type') != 'novideo'
+
+ return self.playlist_from_matches(
+ (m.group('url') for m in matches if check_video(m)),
+ channel_id, self._og_search_title(webpage, fatal=False))
return '\0_'
return char
- if restricted and is_id is NO_DEFAULT:
+ # Replace look-alike Unicode glyphs
+ if restricted and (is_id is NO_DEFAULT or not is_id):
s = unicodedata.normalize('NFKC', s)
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))