]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import json | |
4 | import re | |
5 | import socket | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | compat_http_client, | |
10 | compat_str, | |
11 | compat_urllib_error, | |
12 | compat_urllib_parse, | |
13 | compat_urllib_request, | |
14 | urlencode_postdata, | |
15 | ||
16 | ExtractorError, | |
17 | ) | |
18 | ||
19 | ||
20 | class FacebookIE(InfoExtractor): | |
21 | _VALID_URL = r'''(?x) | |
22 | https?://(?:\w+\.)?facebook\.com/ | |
23 | (?:[^#]*?\#!/)? | |
24 | (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) | |
25 | (?:v|video_id)=(?P<id>[0-9]+) | |
26 | (?:.*)''' | |
27 | _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' | |
28 | _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' | |
29 | _NETRC_MACHINE = 'facebook' | |
30 | IE_NAME = 'facebook' | |
31 | _TEST = { | |
32 | 'url': 'https://www.facebook.com/photo.php?v=120708114770723', | |
33 | 'md5': '48975a41ccc4b7a581abd68651c1a5a8', | |
34 | 'info_dict': { | |
35 | 'id': '120708114770723', | |
36 | 'ext': 'mp4', | |
37 | 'duration': 279, | |
38 | 'title': 'PEOPLE ARE AWESOME 2013', | |
39 | } | |
40 | } | |
41 | ||
42 | def _login(self): | |
43 | (useremail, password) = self._get_login_info() | |
44 | if useremail is None: | |
45 | return | |
46 | ||
47 | login_page_req = compat_urllib_request.Request(self._LOGIN_URL) | |
48 | login_page_req.add_header('Cookie', 'locale=en_US') | |
49 | login_page = self._download_webpage(login_page_req, None, | |
50 | note='Downloading login page', | |
51 | errnote='Unable to download login page') | |
52 | lsd = self._search_regex( | |
53 | r'<input type="hidden" name="lsd" value="([^"]*)"', | |
54 | login_page, 'lsd') | |
55 | lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') | |
56 | ||
57 | login_form = { | |
58 | 'email': useremail, | |
59 | 'pass': password, | |
60 | 'lsd': lsd, | |
61 | 'lgnrnd': lgnrnd, | |
62 | 'next': 'http://facebook.com/home.php', | |
63 | 'default_persistent': '0', | |
64 | 'legacy_return': '1', | |
65 | 'timezone': '-60', | |
66 | 'trynum': '1', | |
67 | } | |
68 | request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) | |
69 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
70 | try: | |
71 | login_results = self._download_webpage(request, None, | |
72 | note='Logging in', errnote='unable to fetch login page') | |
73 | if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | |
74 | self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') | |
75 | return | |
76 | ||
77 | check_form = { | |
78 | 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), | |
79 | 'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'), | |
80 | 'name_action_selected': 'dont_save', | |
81 | } | |
82 | check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) | |
83 | check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
84 | check_response = self._download_webpage(check_req, None, | |
85 | note='Confirming login') | |
86 | if re.search(r'id="checkpointSubmitButton"', check_response) is not None: | |
87 | self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') | |
88 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
89 | self._downloader.report_warning('unable to log in: %s' % compat_str(err)) | |
90 | return | |
91 | ||
92 | def _real_initialize(self): | |
93 | self._login() | |
94 | ||
95 | def _real_extract(self, url): | |
96 | mobj = re.match(self._VALID_URL, url) | |
97 | video_id = mobj.group('id') | |
98 | ||
99 | url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | |
100 | webpage = self._download_webpage(url, video_id) | |
101 | ||
102 | BEFORE = '{swf.addParam(param[0], param[1]);});\n' | |
103 | AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' | |
104 | m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) | |
105 | if not m: | |
106 | m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) | |
107 | if m_msg is not None: | |
108 | raise ExtractorError( | |
109 | 'The video is not available, Facebook said: "%s"' % m_msg.group(1), | |
110 | expected=True) | |
111 | else: | |
112 | raise ExtractorError('Cannot parse data') | |
113 | data = dict(json.loads(m.group(1))) | |
114 | params_raw = compat_urllib_parse.unquote(data['params']) | |
115 | params = json.loads(params_raw) | |
116 | video_data = params['video_data'][0] | |
117 | video_url = video_data.get('hd_src') | |
118 | if not video_url: | |
119 | video_url = video_data['sd_src'] | |
120 | if not video_url: | |
121 | raise ExtractorError('Cannot find video URL') | |
122 | ||
123 | video_title = self._html_search_regex( | |
124 | r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title') | |
125 | ||
126 | return { | |
127 | 'id': video_id, | |
128 | 'title': video_title, | |
129 | 'url': video_url, | |
130 | 'duration': int(video_data['video_duration']), | |
131 | 'thumbnail': video_data['thumbnail_src'], | |
132 | } |