]>
Commit | Line | Data |
---|---|---|
10fd9e6e | 1 | import json |
10fd9e6e H |
2 | |
3 | from .common import InfoExtractor | |
3d2623a8 | 4 | from ..networking.exceptions import HTTPError |
10fd9e6e H |
5 | from ..utils import ( |
6 | ExtractorError, | |
7 | GeoRestrictedError, | |
8 | float_or_none, | |
9 | traverse_obj, | |
10 | try_call | |
11 | ) | |
12 | ||
13 | ||
14 | class OnDemandChinaEpisodeIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://www\.ondemandchina\.com/\w+/watch/(?P<series>[\w-]+)/(?P<id>ep-(?P<ep>\d+))' | |
16 | _TESTS = [{ | |
17 | 'url': 'https://www.ondemandchina.com/en/watch/together-against-covid-19/ep-1', | |
18 | 'info_dict': { | |
19 | 'id': '264394', | |
20 | 'ext': 'mp4', | |
21 | 'duration': 3256.88, | |
22 | 'title': 'EP 1 The Calling', | |
23 | 'alt_title': '第1集 令出如山', | |
24 | 'thumbnail': 'https://d2y2efdi5wgkcl.cloudfront.net/fit-in/256x256/media-io/2020/9/11/image.d9816e81.jpg', | |
25 | 'description': '疫情严峻,党政军民学、东西南北中协同应考', | |
26 | 'tags': ['Social Humanities', 'Documentary', 'Medical', 'Social'], | |
27 | } | |
28 | }] | |
29 | ||
30 | _QUERY = ''' | |
31 | query Episode($programSlug: String!, $episodeNumber: Int!) { | |
32 | episode( | |
33 | programSlug: $programSlug | |
34 | episodeNumber: $episodeNumber | |
35 | kind: "series" | |
36 | part: null | |
37 | ) { | |
38 | id | |
39 | title | |
40 | titleEn | |
41 | titleKo | |
42 | titleZhHans | |
43 | titleZhHant | |
44 | synopsis | |
45 | synopsisEn | |
46 | synopsisKo | |
47 | synopsisZhHans | |
48 | synopsisZhHant | |
49 | videoDuration | |
50 | images { | |
51 | thumbnail | |
52 | } | |
53 | } | |
54 | }''' | |
55 | ||
56 | def _real_extract(self, url): | |
57 | program_slug, display_id, ep_number = self._match_valid_url(url).group('series', 'id', 'ep') | |
58 | webpage = self._download_webpage(url, display_id) | |
59 | ||
60 | video_info = self._download_json( | |
61 | 'https://odc-graphql.odkmedia.io/graphql', display_id, | |
62 | headers={'Content-type': 'application/json'}, | |
63 | data=json.dumps({ | |
64 | 'operationName': 'Episode', | |
65 | 'query': self._QUERY, | |
66 | 'variables': { | |
67 | 'programSlug': program_slug, | |
68 | 'episodeNumber': int(ep_number), | |
69 | }, | |
70 | }).encode())['data']['episode'] | |
71 | ||
72 | try: | |
73 | source_json = self._download_json( | |
74 | f'https://odkmedia.io/odc/api/v2/playback/{video_info["id"]}/', display_id, | |
75 | headers={'Authorization': '', 'service-name': 'odc'}) | |
76 | except ExtractorError as e: | |
3d2623a8 | 77 | if isinstance(e.cause, HTTPError): |
78 | error_data = self._parse_json(e.cause.response.read(), display_id)['detail'] | |
10fd9e6e H |
79 | raise GeoRestrictedError(error_data) |
80 | ||
81 | formats, subtitles = [], {} | |
82 | for source in traverse_obj(source_json, ('sources', ...)): | |
83 | if source.get('type') == 'hls': | |
84 | fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('url'), display_id) | |
85 | formats.extend(fmts) | |
86 | self._merge_subtitles(subs, target=subtitles) | |
87 | else: | |
88 | self.report_warning(f'Unsupported format {source.get("type")}', display_id) | |
89 | ||
90 | return { | |
91 | 'id': str(video_info['id']), | |
92 | 'duration': float_or_none(video_info.get('videoDuration'), 1000), | |
93 | 'thumbnail': (traverse_obj(video_info, ('images', 'thumbnail')) | |
94 | or self._html_search_meta(['og:image', 'twitter:image'], webpage)), | |
95 | 'title': (traverse_obj(video_info, 'title', 'titleEn') | |
96 | or self._html_search_meta(['og:title', 'twitter:title'], webpage) | |
97 | or self._html_extract_title(webpage)), | |
98 | 'alt_title': traverse_obj(video_info, 'titleKo', 'titleZhHans', 'titleZhHant'), | |
99 | 'description': (traverse_obj( | |
100 | video_info, 'synopsisEn', 'synopsisKo', 'synopsisZhHans', 'synopsisZhHant', 'synopisis') | |
101 | or self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)), | |
102 | 'formats': formats, | |
103 | 'subtitles': subtitles, | |
104 | 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')) | |
105 | } |