jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import itertools
	2	import re
	3	import urllib.parse
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	int_or_none,
	8	mimetype2ext,
	9	remove_end,
	10	strip_or_none,
	11	unified_strdate,
	12	url_or_none,
	13	urljoin,
	14	)
	15
	16
	17	class IwaraBaseIE(InfoExtractor):
	18	_BASE_REGEX = r'(?P<base_url>https?://(?:www\.\|ecchi\.)?iwara\.tv)'
	19
	20	def _extract_playlist(self, base_url, webpage):
	21	for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage):
	22	yield self.url_result(urljoin(base_url, path))
	23
	24
	25	class IwaraIE(IwaraBaseIE):
	26	_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)'
	27	_TESTS = [{
	28	'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
	29	# md5 is unstable
	30	'info_dict': {
	31	'id': 'amVwUl1EHpAD9RD',
	32	'ext': 'mp4',
	33	'title': '【MMD R-18】ガールフレンド carry_me_off',
	34	'age_limit': 18,
	35	'thumbnail': 'https://i.iwara.tv/sites/default/files/videos/thumbnails/7951/thumbnail-7951_0001.png',
	36	'uploader': 'Reimu丨Action',
	37	'upload_date': '20150828',
	38	'description': 'md5:1d4905ce48c66c9299c617f08e106e0f',
	39	},
	40	}, {
	41	'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
	42	'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
	43	'info_dict': {
	44	'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
	45	'ext': 'mp4',
	46	'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
	47	'age_limit': 18,
	48	},
	49	'add_ie': ['GoogleDrive'],
	50	}, {
	51	'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
	52	# md5 is unstable
	53	'info_dict': {
	54	'id': '6liAP9s2Ojc',
	55	'ext': 'mp4',
	56	'age_limit': 18,
	57	'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
	58	'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
	59	'upload_date': '20160910',
	60	'uploader': 'aMMDsork',
	61	'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
	62	},
	63	'add_ie': ['Youtube'],
	64	}]
	65
	66	def _real_extract(self, url):
	67	video_id = self._match_id(url)
	68
	69	webpage, urlh = self._download_webpage_handle(url, video_id)
	70
	71	hostname = urllib.parse.urlparse(urlh.geturl()).hostname
	72	# ecchi is 'sexy' in Japanese
	73	age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
	74
	75	video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
	76
	77	if not video_data:
	78	iframe_url = self._html_search_regex(
	79	r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
	80	webpage, 'iframe URL', group='url')
	81	return {
	82	'_type': 'url_transparent',
	83	'url': iframe_url,
	84	'age_limit': age_limit,
	85	}
	86
	87	title = remove_end(self._html_extract_title(webpage), ' \| Iwara')
	88
	89	thumbnail = self._html_search_regex(
	90	r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
	91
	92	uploader = self._html_search_regex(
	93	r'class="username">([^<]+)', webpage, 'uploader', fatal=False)
	94
	95	upload_date = unified_strdate(self._html_search_regex(
	96	r'作成日:([^\s]+)', webpage, 'upload_date', fatal=False))
	97
	98	description = strip_or_none(self._search_regex(
	99	r'<p>(.+?(?=</div))', webpage, 'description', fatal=False,
	100	flags=re.DOTALL))
	101
	102	formats = []
	103	for a_format in video_data:
	104	format_uri = url_or_none(a_format.get('uri'))
	105	if not format_uri:
	106	continue
	107	format_id = a_format.get('resolution')
	108	height = int_or_none(self._search_regex(
	109	r'(\d+)p', format_id, 'height', default=None))
	110	formats.append({
	111	'url': self._proto_relative_url(format_uri, 'https:'),
	112	'format_id': format_id,
	113	'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
	114	'height': height,
	115	'width': int_or_none(height / 9.0 * 16.0 if height else None),
	116	'quality': 1 if format_id == 'Source' else 0,
	117	})
	118
	119	return {
	120	'id': video_id,
	121	'title': title,
	122	'age_limit': age_limit,
	123	'formats': formats,
	124	'thumbnail': self._proto_relative_url(thumbnail, 'https:'),
	125	'uploader': uploader,
	126	'upload_date': upload_date,
	127	'description': description,
	128	}
	129
	130
	131	class IwaraPlaylistIE(IwaraBaseIE):
	132	_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)'
	133	IE_NAME = 'iwara:playlist'
	134
	135	_TESTS = [{
	136	'url': 'https://ecchi.iwara.tv/playlist/best-enf',
	137	'info_dict': {
	138	'title': 'Best enf',
	139	'uploader': 'Jared98112',
	140	'id': 'best-enf',
	141	},
	142	'playlist_mincount': 1097,
	143	}, {
	144	# urlencoded
	145	'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2',
	146	'info_dict': {
	147	'id': 'プレイリスト-2',
	148	'title': 'プレイリスト',
	149	'uploader': 'mainyu',
	150	},
	151	'playlist_mincount': 91,
	152	}]
	153
	154	def _real_extract(self, url):
	155	playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
	156	playlist_id = urllib.parse.unquote(playlist_id)
	157	webpage = self._download_webpage(url, playlist_id)
	158
	159	return {
	160	'_type': 'playlist',
	161	'id': playlist_id,
	162	'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False),
	163	'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False),
	164	'entries': self._extract_playlist(base_url, webpage),
	165	}
	166
	167
	168	class IwaraUserIE(IwaraBaseIE):
	169	_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)'
	170	IE_NAME = 'iwara:user'
	171
	172	_TESTS = [{
	173	'note': 'number of all videos page is just 1 page. less than 40 videos',
	174	'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
	175	'info_dict': {
	176	'title': 'Uploaded videos from Infinity_YukariP',
	177	'id': 'infinityyukarip',
	178	'uploader': 'Infinity_YukariP',
	179	'uploader_id': 'infinityyukarip',
	180	},
	181	'playlist_mincount': 39,
	182	}, {
	183	'note': 'no even all videos page. probably less than 10 videos',
	184	'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
	185	'info_dict': {
	186	'title': 'Uploaded videos from mmd quintet',
	187	'id': 'mmd-quintet',
	188	'uploader': 'mmd quintet',
	189	'uploader_id': 'mmd-quintet',
	190	},
	191	'playlist_mincount': 6,
	192	}, {
	193	'note': 'has paging. more than 40 videos',
	194	'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
	195	'info_dict': {
	196	'title': 'Uploaded videos from TheBlackbirdCalls',
	197	'id': 'theblackbirdcalls',
	198	'uploader': 'TheBlackbirdCalls',
	199	'uploader_id': 'theblackbirdcalls',
	200	},
	201	'playlist_mincount': 420,
	202	}, {
	203	'note': 'foreign chars in URL. there must be foreign characters in URL',
	204	'url': 'https://ecchi.iwara.tv/users/ぶた丼',
	205	'info_dict': {
	206	'title': 'Uploaded videos from ぶた丼',
	207	'id': 'ぶた丼',
	208	'uploader': 'ぶた丼',
	209	'uploader_id': 'ぶた丼',
	210	},
	211	'playlist_mincount': 170,
	212	}]
	213
	214	def _entries(self, playlist_id, base_url):
	215	webpage = self._download_webpage(
	216	f'{base_url}/users/{playlist_id}', playlist_id)
	217	videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
	218	if not videos_url:
	219	yield from self._extract_playlist(base_url, webpage)
	220	return
	221
	222	videos_url = urljoin(base_url, videos_url)
	223
	224	for n in itertools.count(1):
	225	page = self._download_webpage(
	226	videos_url, playlist_id, note=f'Downloading playlist page {n}',
	227	query={'page': str(n - 1)} if n > 1 else {})
	228	yield from self._extract_playlist(
	229	base_url, page)
	230
	231	if f'page={n}' not in page:
	232	break
	233
	234	def _real_extract(self, url):
	235	playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
	236	playlist_id = urllib.parse.unquote(playlist_id)
	237
	238	return self.playlist_result(
	239	self._entries(playlist_id, base_url), playlist_id)