]>
Commit | Line | Data |
---|---|---|
97d6faac PH |
1 | import datetime |
2 | import json | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | from ..utils import ( | |
8 | ExtractorError, | |
9 | ) | |
10 | ||
11 | class PhotobucketIE(InfoExtractor): | |
12 | """Information extractor for photobucket.com.""" | |
13 | ||
14 | # TODO: the original _VALID_URL was: | |
15 | # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' | |
16 | # Check if it's necessary to keep the old extracion process | |
17 | _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' | |
18 | IE_NAME = u'photobucket' | |
6f5ac90c PH |
19 | _TEST = { |
20 | u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', | |
21 | u'file': u'zpsc0c3b9fa.mp4', | |
22 | u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', | |
23 | u'info_dict': { | |
24 | u"upload_date": u"20130504", | |
25 | u"uploader": u"rachaneronas", | |
26 | u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" | |
27 | } | |
28 | } | |
97d6faac PH |
29 | |
30 | def _real_extract(self, url): | |
31 | # Extract id from URL | |
32 | mobj = re.match(self._VALID_URL, url) | |
33 | if mobj is None: | |
34 | raise ExtractorError(u'Invalid URL: %s' % url) | |
35 | ||
36 | video_id = mobj.group('id') | |
37 | ||
38 | video_extension = mobj.group('ext') | |
39 | ||
40 | # Retrieve video webpage to extract further information | |
41 | webpage = self._download_webpage(url, video_id) | |
42 | ||
43 | # Extract URL, uploader, and title from webpage | |
44 | self.report_extraction(video_id) | |
45 | # We try first by looking the javascript code: | |
46 | mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage) | |
47 | if mobj is not None: | |
48 | info = json.loads(mobj.group('json')) | |
49 | return [{ | |
50 | 'id': video_id, | |
51 | 'url': info[u'downloadUrl'], | |
52 | 'uploader': info[u'username'], | |
53 | 'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), | |
54 | 'title': info[u'title'], | |
55 | 'ext': video_extension, | |
56 | 'thumbnail': info[u'thumbUrl'], | |
57 | }] | |
58 | ||
59 | # We try looking in other parts of the webpage | |
60 | video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />', | |
61 | webpage, u'video URL') | |
62 | ||
63 | mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) | |
64 | if mobj is None: | |
65 | raise ExtractorError(u'Unable to extract title') | |
66 | video_title = mobj.group(1).decode('utf-8') | |
67 | video_uploader = mobj.group(2).decode('utf-8') | |
68 | ||
69 | return [{ | |
70 | 'id': video_id.decode('utf-8'), | |
71 | 'url': video_url.decode('utf-8'), | |
72 | 'uploader': video_uploader, | |
73 | 'upload_date': None, | |
74 | 'title': video_title, | |
75 | 'ext': video_extension.decode('utf-8'), | |
76 | }] |