]>
Commit | Line | Data |
---|---|---|
72e450c5 AL |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | unified_strdate | |
9 | ) | |
10 | ||
11 | ||
12 | class THVideoIE(InfoExtractor): | |
df8f53f7 | 13 | _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' |
72e450c5 AL |
14 | _TEST = { |
15 | 'url': 'http://thvideo.tv/v/th1987/', | |
16 | 'md5': 'fa107b1f73817e325e9433505a70db50', | |
17 | 'info_dict': { | |
18 | 'id': '1987', | |
19 | 'ext': 'mp4', | |
20 | 'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', | |
21 | 'display_id': 'th1987', | |
22 | 'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', | |
23 | 'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', | |
24 | 'upload_date': '20140722' | |
25 | } | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
1770ed9e | 29 | video_id = self._match_id(url) |
72e450c5 AL |
30 | |
31 | # extract download link from mobile player page | |
df8f53f7 PH |
32 | webpage_player = self._download_webpage( |
33 | 'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), | |
34 | video_id, note='Downloading video source page') | |
35 | video_url = self._html_search_regex( | |
36 | r'<source src="(.*?)" type', webpage_player, 'video url') | |
72e450c5 AL |
37 | |
38 | # extract video info from main page | |
df8f53f7 PH |
39 | webpage = self._download_webpage( |
40 | 'http://thvideo.tv/v/th%s' % (video_id), video_id) | |
72e450c5 AL |
41 | title = self._og_search_title(webpage) |
42 | display_id = 'th%s' % video_id | |
43 | thumbnail = self._og_search_thumbnail(webpage) | |
44 | description = self._og_search_description(webpage) | |
df8f53f7 PH |
45 | upload_date = unified_strdate(self._html_search_regex( |
46 | r'span itemprop="datePublished" content="(.*?)">', webpage, | |
47 | 'upload date', fatal=False)) | |
72e450c5 AL |
48 | |
49 | return { | |
50 | 'id': video_id, | |
51 | 'ext': 'mp4', | |
52 | 'url': video_url, | |
53 | 'title': title, | |
54 | 'display_id': display_id, | |
55 | 'thumbnail': thumbnail, | |
56 | 'description': description, | |
57 | 'upload_date': upload_date | |
df8f53f7 | 58 | } |
67077b18 AL |
59 | |
60 | ||
61 | class THVideoPlaylistIE(InfoExtractor): | |
62 | _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' | |
63 | _TEST = { | |
64 | 'url': 'http://thvideo.tv/mylist2', | |
65 | 'info_dict': { | |
66 | 'id': '2', | |
67 | 'title': '幻想万華鏡', | |
68 | }, | |
69 | 'playlist_mincount': 23, | |
70 | } | |
71 | ||
72 | def _real_extract(self, url): | |
1770ed9e PH |
73 | playlist_id = self._match_id(url) |
74 | ||
75 | webpage = self._download_webpage(url, playlist_id) | |
76 | list_title = self._html_search_regex( | |
77 | r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title', | |
78 | fatal=False) | |
67077b18 AL |
79 | |
80 | entries = [ | |
81 | self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') | |
82 | for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] | |
83 | ||
1770ed9e | 84 | return self.playlist_result(entries, playlist_id, list_title) |