]>
Commit | Line | Data |
---|---|---|
0c56a3f7 PH |
1 | # coding: utf-8 |
2 | ||
7aca14a1 PH |
3 | import datetime |
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | ExtractorError, | |
9 | ) | |
10 | ||
11 | ||
12 | class GooglePlusIE(InfoExtractor): | |
0f818663 | 13 | IE_DESC = u'Google Plus' |
7aca14a1 PH |
14 | _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' |
15 | IE_NAME = u'plus.google' | |
0c56a3f7 PH |
16 | _TEST = { |
17 | u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", | |
18 | u"file": u"ZButuJc6CtH.flv", | |
19 | u"info_dict": { | |
20 | u"upload_date": u"20120613", | |
21 | u"uploader": u"井上ヨシマサ", | |
22 | u"title": u"嘆きの天使 降臨" | |
23 | } | |
24 | } | |
7aca14a1 PH |
25 | |
26 | def _real_extract(self, url): | |
27 | # Extract id from URL | |
28 | mobj = re.match(self._VALID_URL, url) | |
29 | if mobj is None: | |
30 | raise ExtractorError(u'Invalid URL: %s' % url) | |
31 | ||
32 | post_url = mobj.group(0) | |
33 | video_id = mobj.group(1) | |
34 | ||
35 | video_extension = 'flv' | |
36 | ||
37 | # Step 1, Retrieve post webpage to extract further information | |
38 | webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage') | |
39 | ||
40 | self.report_extraction(video_id) | |
41 | ||
42 | # Extract update date | |
fad84d50 | 43 | upload_date = self._html_search_regex( |
685a9cd2 | 44 | r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*> |
e94b783c | 45 | ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', |
685a9cd2 | 46 | webpage, u'upload date', fatal=False, flags=re.VERBOSE) |
7aca14a1 PH |
47 | if upload_date: |
48 | # Convert timestring to a format suitable for filename | |
49 | upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") | |
50 | upload_date = upload_date.strftime('%Y%m%d') | |
51 | ||
52 | # Extract uploader | |
53 | uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>', | |
54 | webpage, u'uploader', fatal=False) | |
55 | ||
56 | # Extract title | |
57 | # Get the first line for title | |
58 | video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', | |
59 | webpage, 'title', default=u'NA') | |
60 | ||
d6628960 | 61 | # Step 2, Simulate clicking the image box to launch video |
7f3c4f4f JS |
62 | DOMAIN = 'https://plus.google.com/' |
63 | video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN), | |
7aca14a1 | 64 | webpage, u'video page URL') |
d6628960 PH |
65 | if not video_page.startswith(DOMAIN): |
66 | video_page = DOMAIN + video_page | |
67 | ||
7aca14a1 PH |
68 | webpage = self._download_webpage(video_page, video_id, u'Downloading video page') |
69 | ||
70 | # Extract video links on video page | |
71 | """Extract video links of all sizes""" | |
d6628960 | 72 | pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' |
7aca14a1 PH |
73 | mobj = re.findall(pattern, webpage) |
74 | if len(mobj) == 0: | |
75 | raise ExtractorError(u'Unable to extract video links') | |
76 | ||
77 | # Sort in resolution | |
78 | links = sorted(mobj) | |
79 | ||
80 | # Choose the lowest of the sort, i.e. highest resolution | |
81 | video_url = links[-1] | |
82 | # Only get the url. The resolution part in the tuple has no use anymore | |
83 | video_url = video_url[-1] | |
84 | # Treat escaped \u0026 style hex | |
85 | try: | |
86 | video_url = video_url.decode("unicode_escape") | |
87 | except AttributeError: # Python 3 | |
88 | video_url = bytes(video_url, 'ascii').decode('unicode-escape') | |
89 | ||
90 | ||
91 | return [{ | |
92 | 'id': video_id, | |
93 | 'url': video_url, | |
94 | 'uploader': uploader, | |
95 | 'upload_date': upload_date, | |
96 | 'title': video_title, | |
97 | 'ext': video_extension, | |
98 | }] |