]>
Commit | Line | Data |
---|---|---|
3c4eebf7 | 1 | # coding: utf-8 |
2 | from .common import InfoExtractor | |
3 | from ..utils import int_or_none | |
4 | ||
5 | ||
6 | class AmazonStoreIE(InfoExtractor): | |
73f035e1 | 7 | _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)' |
3c4eebf7 | 8 | |
9 | _TESTS = [{ | |
10 | 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', | |
11 | 'info_dict': { | |
12 | 'id': 'B098XNCHLD', | |
13 | 'title': 'md5:5f3194dbf75a8dcfc83079bd63a2abed', | |
14 | }, | |
15 | 'playlist_mincount': 1, | |
16 | 'playlist': [{ | |
17 | 'info_dict': { | |
18 | 'id': 'A1F83G8C2ARO7P', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'mcdodo usb c cable 100W 5a', | |
21 | 'thumbnail': r're:^https?://.*\.jpg$', | |
22 | }, | |
23 | }] | |
24 | }, { | |
25 | 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3', | |
26 | 'info_dict': { | |
27 | 'id': 'B0863TXGM3', | |
28 | 'title': 'md5:b0bde4881d3cfd40d63af19f7898b8ff', | |
29 | }, | |
30 | 'playlist_mincount': 4, | |
31 | }, { | |
32 | 'url': 'https://www.amazon.com/dp/B0845NXCXF/', | |
33 | 'info_dict': { | |
34 | 'id': 'B0845NXCXF', | |
35 | 'title': 'md5:2145cd4e3c7782f1ee73649a3cff1171', | |
36 | }, | |
37 | 'playlist-mincount': 1, | |
38 | }] | |
39 | ||
40 | def _real_extract(self, url): | |
41 | id = self._match_id(url) | |
42 | webpage = self._download_webpage(url, id) | |
43 | data_json = self._parse_json(self._html_search_regex(r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'(.*)\'\)', webpage, 'data'), id) | |
44 | entries = [{ | |
45 | 'id': video['marketPlaceID'], | |
46 | 'url': video['url'], | |
47 | 'title': video.get('title'), | |
48 | 'thumbnail': video.get('thumbUrl') or video.get('thumb'), | |
49 | 'duration': video.get('durationSeconds'), | |
50 | 'height': int_or_none(video.get('videoHeight')), | |
51 | 'width': int_or_none(video.get('videoWidth')), | |
52 | } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] | |
53 | return self.playlist_result(entries, playlist_id=id, playlist_title=data_json['title']) |