]>
Commit | Line | Data |
---|---|---|
97d6faac PH |
1 | import datetime |
2 | import json | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | from ..utils import ( | |
8 | ExtractorError, | |
9 | ) | |
10 | ||
11 | class PhotobucketIE(InfoExtractor): | |
12 | """Information extractor for photobucket.com.""" | |
13 | ||
14 | # TODO: the original _VALID_URL was: | |
15 | # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' | |
16 | # Check if it's necessary to keep the old extracion process | |
17 | _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | |
18 | IE_NAME = u'photobucket' | |
19 | ||
20 | def _real_extract(self, url): | |
21 | # Extract id from URL | |
22 | mobj = re.match(self._VALID_URL, url) | |
23 | if mobj is None: | |
24 | raise ExtractorError(u'Invalid URL: %s' % url) | |
25 | ||
26 | video_id = mobj.group('id') | |
27 | ||
28 | video_extension = mobj.group('ext') | |
29 | ||
30 | # Retrieve video webpage to extract further information | |
31 | webpage = self._download_webpage(url, video_id) | |
32 | ||
33 | # Extract URL, uploader, and title from webpage | |
34 | self.report_extraction(video_id) | |
35 | # We try first by looking the javascript code: | |
36 | mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage) | |
37 | if mobj is not None: | |
38 | info = json.loads(mobj.group('json')) | |
39 | return [{ | |
40 | 'id': video_id, | |
41 | 'url': info[u'downloadUrl'], | |
42 | 'uploader': info[u'username'], | |
43 | 'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), | |
44 | 'title': info[u'title'], | |
45 | 'ext': video_extension, | |
46 | 'thumbnail': info[u'thumbUrl'], | |
47 | }] | |
48 | ||
49 | # We try looking in other parts of the webpage | |
50 | video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />', | |
51 | webpage, u'video URL') | |
52 | ||
53 | mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) | |
54 | if mobj is None: | |
55 | raise ExtractorError(u'Unable to extract title') | |
56 | video_title = mobj.group(1).decode('utf-8') | |
57 | video_uploader = mobj.group(2).decode('utf-8') | |
58 | ||
59 | return [{ | |
60 | 'id': video_id.decode('utf-8'), | |
61 | 'url': video_url.decode('utf-8'), | |
62 | 'uploader': video_uploader, | |
63 | 'upload_date': None, | |
64 | 'title': video_title, | |
65 | 'ext': video_extension.decode('utf-8'), | |
66 | }] |