]>
Commit | Line | Data |
---|---|---|
cb3bb2cf PH |
1 | from __future__ import unicode_literals |
2 | ||
9eae41dd | 3 | import json |
9eae41dd PH |
4 | import re |
5 | import socket | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | compat_http_client, | |
10 | compat_str, | |
11 | compat_urllib_error, | |
12 | compat_urllib_parse, | |
13 | compat_urllib_request, | |
b74fa8cd | 14 | urlencode_postdata, |
9eae41dd PH |
15 | |
16 | ExtractorError, | |
17 | ) | |
18 | ||
19 | ||
20 | class FacebookIE(InfoExtractor): | |
9834872b | 21 | _VALID_URL = r'''(?x) |
9d9d70c4 | 22 | https?://(?:\w+\.)?facebook\.com/ |
a6da7b6b | 23 | (?:[^#]*?\#!/)? |
9834872b PH |
24 | (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) |
25 | (?:v|video_id)=(?P<id>[0-9]+) | |
26 | (?:.*)''' | |
67874aef JMF |
27 | _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' |
28 | _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' | |
9eae41dd | 29 | _NETRC_MACHINE = 'facebook' |
cb3bb2cf | 30 | IE_NAME = 'facebook' |
6f5ac90c | 31 | _TEST = { |
cb3bb2cf PH |
32 | 'url': 'https://www.facebook.com/photo.php?v=120708114770723', |
33 | 'md5': '48975a41ccc4b7a581abd68651c1a5a8', | |
34 | 'info_dict': { | |
35 | 'id': '120708114770723', | |
36 | 'ext': 'mp4', | |
ab24f4f3 | 37 | 'duration': 279, |
9d9d70c4 | 38 | 'title': 'PEOPLE ARE AWESOME 2013', |
6f5ac90c PH |
39 | } |
40 | } | |
9eae41dd | 41 | |
67874aef JMF |
42 | def _login(self): |
43 | (useremail, password) = self._get_login_info() | |
9eae41dd PH |
44 | if useremail is None: |
45 | return | |
46 | ||
67874aef JMF |
47 | login_page_req = compat_urllib_request.Request(self._LOGIN_URL) |
48 | login_page_req.add_header('Cookie', 'locale=en_US') | |
b74fa8cd JMF |
49 | login_page = self._download_webpage(login_page_req, None, |
50 | note='Downloading login page', | |
cb3bb2cf | 51 | errnote='Unable to download login page') |
b37b9450 | 52 | lsd = self._search_regex( |
d0ff8384 | 53 | r'<input type="hidden" name="lsd" value="([^"]*)"', |
b37b9450 | 54 | login_page, 'lsd') |
cb3bb2cf | 55 | lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') |
67874aef | 56 | |
9eae41dd PH |
57 | login_form = { |
58 | 'email': useremail, | |
59 | 'pass': password, | |
67874aef JMF |
60 | 'lsd': lsd, |
61 | 'lgnrnd': lgnrnd, | |
62 | 'next': 'http://facebook.com/home.php', | |
63 | 'default_persistent': '0', | |
64 | 'legacy_return': '1', | |
65 | 'timezone': '-60', | |
66 | 'trynum': '1', | |
9eae41dd | 67 | } |
b74fa8cd | 68 | request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) |
67874aef | 69 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') |
9eae41dd | 70 | try: |
b74fa8cd JMF |
71 | login_results = self._download_webpage(request, None, |
72 | note='Logging in', errnote='unable to fetch login page') | |
9eae41dd | 73 | if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: |
cb3bb2cf | 74 | self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') |
9eae41dd | 75 | return |
67874aef JMF |
76 | |
77 | check_form = { | |
b74fa8cd | 78 | 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), |
24b8924b | 79 | 'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'), |
67874aef | 80 | 'name_action_selected': 'dont_save', |
67874aef | 81 | } |
b74fa8cd | 82 | check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) |
67874aef | 83 | check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') |
b74fa8cd JMF |
84 | check_response = self._download_webpage(check_req, None, |
85 | note='Confirming login') | |
67874aef | 86 | if re.search(r'id="checkpointSubmitButton"', check_response) is not None: |
cb3bb2cf | 87 | self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') |
9eae41dd | 88 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |
cb3bb2cf | 89 | self._downloader.report_warning('unable to log in: %s' % compat_str(err)) |
9eae41dd PH |
90 | return |
91 | ||
67874aef JMF |
92 | def _real_initialize(self): |
93 | self._login() | |
94 | ||
9eae41dd PH |
95 | def _real_extract(self, url): |
96 | mobj = re.match(self._VALID_URL, url) | |
9834872b | 97 | video_id = mobj.group('id') |
9eae41dd PH |
98 | |
99 | url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | |
100 | webpage = self._download_webpage(url, video_id) | |
101 | ||
102 | BEFORE = '{swf.addParam(param[0], param[1]);});\n' | |
103 | AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' | |
104 | m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) | |
105 | if not m: | |
6f71ef58 JMF |
106 | m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) |
107 | if m_msg is not None: | |
749a4fd2 | 108 | raise ExtractorError( |
cb3bb2cf | 109 | 'The video is not available, Facebook said: "%s"' % m_msg.group(1), |
749a4fd2 | 110 | expected=True) |
6f71ef58 | 111 | else: |
cb3bb2cf | 112 | raise ExtractorError('Cannot parse data') |
9eae41dd PH |
113 | data = dict(json.loads(m.group(1))) |
114 | params_raw = compat_urllib_parse.unquote(data['params']) | |
115 | params = json.loads(params_raw) | |
116 | video_data = params['video_data'][0] | |
117 | video_url = video_data.get('hd_src') | |
118 | if not video_url: | |
119 | video_url = video_data['sd_src'] | |
120 | if not video_url: | |
cb3bb2cf | 121 | raise ExtractorError('Cannot find video URL') |
9eae41dd | 122 | |
81ec7c79 | 123 | video_title = self._html_search_regex( |
cb3bb2cf | 124 | r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') |
9eae41dd | 125 | |
9d9d70c4 | 126 | return { |
9eae41dd PH |
127 | 'id': video_id, |
128 | 'title': video_title, | |
129 | 'url': video_url, | |
9d9d70c4 JMF |
130 | 'duration': int(video_data['video_duration']), |
131 | 'thumbnail': video_data['thumbnail_src'], | |
9eae41dd | 132 | } |