jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..utils import parse_iso8601
	6
	7
	8	class NextMediaIE(InfoExtractor):
	9	IE_DESC = '蘋果日報'
	10	_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
	11	_TESTS = [{
	12	'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
	13	'md5': 'dff9fad7009311c421176d1ac90bfe4f',
	14	'info_dict': {
	15	'id': '53109199',
	16	'ext': 'mp4',
	17	'title': '【佔領金鐘】50外國領事議員撐場讚學生勇敢香港有希望',
	18	'thumbnail': 're:^https?://.*\.jpg$',
	19	'description': 'md5:28222b9912b6665a21011b034c70fcc7',
	20	'timestamp': 1415456273,
	21	'upload_date': '20141108',
	22	}
	23	}]
	24
	25	_URL_PATTERN = r'\{ url: \'(.+)\' \}'
	26
	27	def _real_extract(self, url):
	28	news_id = self._match_id(url)
	29	page = self._download_webpage(url, news_id)
	30	return self._extract_from_nextmedia_page(news_id, url, page)
	31
	32	def _extract_from_nextmedia_page(self, news_id, url, page):
	33	title = self._fetch_title(page)
	34	video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
	35
	36	attrs = {
	37	'id': news_id,
	38	'title': title,
	39	'url': video_url, # ext can be inferred from url
	40	'thumbnail': self._fetch_thumbnail(page),
	41	'description': self._fetch_description(page),
	42	}
	43
	44	timestamp = self._fetch_timestamp(page)
	45	if timestamp:
	46	attrs['timestamp'] = timestamp
	47	else:
	48	attrs['upload_date'] = self._fetch_upload_date(url)
	49
	50	return attrs
	51
	52	def _fetch_title(self, page):
	53	return self._og_search_title(page)
	54
	55	def _fetch_thumbnail(self, page):
	56	return self._og_search_thumbnail(page)
	57
	58	def _fetch_timestamp(self, page):
	59	dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
	60	return parse_iso8601(dateCreated)
	61
	62	def _fetch_upload_date(self, url):
	63	return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
	64
	65	def _fetch_description(self, page):
	66	return self._og_search_property('description', page)
	67
	68
	69	class NextMediaActionNewsIE(NextMediaIE):
	70	IE_DESC = '蘋果日報 - 動新聞'
	71	_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
	72	_TESTS = [{
	73	'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
	74	'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
	75	'info_dict': {
	76	'id': '19009428',
	77	'ext': 'mp4',
	78	'title': '【壹週刊】細10年男友偷食　50歲邵美琪再失戀',
	79	'thumbnail': 're:^https?://.*\.jpg$',
	80	'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
	81	'timestamp': 1421791200,
	82	'upload_date': '20150120',
	83	}
	84	}]
	85
	86	def _real_extract(self, url):
	87	news_id = self._match_id(url)
	88	actionnews_page = self._download_webpage(url, news_id)
	89	article_url = self._og_search_url(actionnews_page)
	90	article_page = self._download_webpage(article_url, news_id)
	91	return self._extract_from_nextmedia_page(news_id, url, article_page)
	92
	93
	94	class AppleDailyIE(NextMediaIE):
	95	IE_DESC = '臺灣蘋果日報'
	96	_VALID_URL = r'http://(www\|ent).appledaily.com.tw/(?:animation\|appledaily\|enews\|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
	97	_TESTS = [{
	98	'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
	99	'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
	100	'info_dict': {
	101	'id': '36354694',
	102	'ext': 'mp4',
	103	'title': '周亭羽走過摩鐵陰霾2男陪吃九把刀孤寒看醫生',
	104	'thumbnail': 're:^https?://.*\.jpg$',
	105	'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
	106	'upload_date': '20150128',
	107	}
	108	}, {
	109	'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
	110	'md5': '86b4e9132d158279c7883822d94ccc49',
	111	'info_dict': {
	112	'id': '550549',
	113	'ext': 'mp4',
	114	'title': '不滿被踩腳　山東兩大媽一路打下車',
	115	'thumbnail': 're:^https?://.*\.jpg$',
	116	'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
	117	'upload_date': '20150128',
	118	}
	119	}, {
	120	'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
	121	'md5': '03df296d95dedc2d5886debbb80cb43f',
	122	'info_dict': {
	123	'id': '5003671',
	124	'ext': 'mp4',
	125	'title': '20正妹熱舞　《刀龍傳說Online》火辣上市',
	126	'thumbnail': 're:^https?://.*\.jpg$',
	127	'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
	128	'upload_date': '20150128',
	129	},
	130	'skip': 'redirect to http://www.appledaily.com.tw/animation/',
	131	}, {
	132	# No thumbnail
	133	'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
	134	'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
	135	'info_dict': {
	136	'id': '5003673',
	137	'ext': 'mp4',
	138	'title': '半夜尿尿　好像會看到___',
	139	'description': 'md5:61d2da7fe117fede148706cdb85ac066',
	140	'upload_date': '20150128',
	141	},
	142	'expected_warnings': [
	143	'video thumbnail',
	144	],
	145	'skip': 'redirect to http://www.appledaily.com.tw/animation/',
	146	}, {
	147	'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
	148	'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
	149	'info_dict': {
	150	'id': '35770334',
	151	'ext': 'mp4',
	152	'title': '咖啡占卜測 XU裝熟指數',
	153	'thumbnail': 're:^https?://.*\.jpg$',
	154	'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
	155	'upload_date': '20140417',
	156	},
	157	}]
	158
	159	_URL_PATTERN = r'\{url: \'(.+)\'\}'
	160
	161	def _fetch_title(self, page):
	162	return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
	163	self._html_search_meta('description', page, 'news title'))
	164
	165	def _fetch_thumbnail(self, page):
	166	return self._html_search_regex(r"setInitialImage$\'([^']+)'$", page, 'video thumbnail', fatal=False)
	167
	168	def _fetch_timestamp(self, page):
	169	return None
	170
	171	def _fetch_description(self, page):
	172	return self._html_search_meta('description', page, 'news description')