]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/funk.py
[facebook] fix tahoe request(closes #17171)
[yt-dlp.git] / youtube_dl / extractor / funk.py
CommitLineData
ff3f1a62
S
1# coding: utf-8
2from __future__ import unicode_literals
3
af322eb8 4import itertools
690404a6
S
5import re
6
ff3f1a62
S
7from .common import InfoExtractor
8from .nexx import NexxIE
47421507 9from ..compat import compat_str
c84eae4f
S
10from ..utils import (
11 int_or_none,
12 try_get,
13)
690404a6
S
14
15
16class FunkBaseIE(InfoExtractor):
47421507
S
17 _HEADERS = {
18 'Accept': '*/*',
19 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
20 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
21 }
22 _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
23
24 @staticmethod
25 def _make_headers(referer):
26 headers = FunkBaseIE._HEADERS.copy()
27 headers['Referer'] = referer
28 return headers
29
690404a6
S
30 def _make_url_result(self, video):
31 return {
32 '_type': 'url_transparent',
33 'url': 'nexx:741:%s' % video['sourceId'],
34 'ie_key': NexxIE.ie_key(),
35 'id': video['sourceId'],
36 'title': video.get('title'),
37 'description': video.get('description'),
38 'duration': int_or_none(video.get('duration')),
39 'season_number': int_or_none(video.get('seasonNr')),
40 'episode_number': int_or_none(video.get('episodeNr')),
41 }
42
43
44class FunkMixIE(FunkBaseIE):
45 _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
46 _TESTS = [{
47 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
48 'md5': '8edf617c2f2b7c9847dfda313f199009',
49 'info_dict': {
50 'id': '123748',
51 'ext': 'mp4',
52 'title': '"Die realste Kifferdoku aller Zeiten"',
53 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
54 'timestamp': 1490274721,
55 'upload_date': '20170323',
56 },
57 }]
58
59 def _real_extract(self, url):
60 mobj = re.match(self._VALID_URL, url)
61 mix_id = mobj.group('id')
62 alias = mobj.group('alias')
63
64 lists = self._download_json(
65 'https://www.funk.net/api/v3.1/curation/curatedLists/',
47421507 66 mix_id, headers=self._make_headers(url), query={
690404a6 67 'size': 100,
47421507 68 })['_embedded']['curatedListList']
690404a6
S
69
70 metas = next(
71 l for l in lists
72 if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
73 video = next(
74 meta['videoDataDelegate']
47421507
S
75 for meta in metas
76 if try_get(
77 meta, lambda x: x['videoDataDelegate']['alias'],
78 compat_str) == alias)
690404a6
S
79
80 return self._make_url_result(video)
ff3f1a62
S
81
82
690404a6
S
83class FunkChannelIE(FunkBaseIE):
84 _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
ff3f1a62 85 _TESTS = [{
690404a6 86 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
ff3f1a62 87 'info_dict': {
690404a6 88 'id': '1155821',
ff3f1a62 89 'ext': 'mp4',
690404a6
S
90 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
91 'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
92 'timestamp': 1514507395,
93 'upload_date': '20171229',
ff3f1a62
S
94 },
95 'params': {
ff3f1a62
S
96 'skip_download': True,
97 },
c84eae4f
S
98 }, {
99 # only available via byIdList API
100 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
101 'info_dict': {
102 'id': '205067',
103 'ext': 'mp4',
104 'title': 'Martin Sonneborn erklärt die EU',
105 'description': 'md5:050f74626e4ed87edf4626d2024210c0',
106 'timestamp': 1494424042,
107 'upload_date': '20170510',
108 },
109 'params': {
110 'skip_download': True,
111 },
ff3f1a62 112 }, {
690404a6 113 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
ff3f1a62
S
114 'only_matching': True,
115 }]
116
117 def _real_extract(self, url):
690404a6
S
118 mobj = re.match(self._VALID_URL, url)
119 channel_id = mobj.group('id')
120 alias = mobj.group('alias')
ff3f1a62 121
47421507 122 headers = self._make_headers(url)
ff3f1a62 123
c84eae4f
S
124 video = None
125
47421507
S
126 # Id-based channels are currently broken on their side: webplayer
127 # tries to process them via byChannelAlias endpoint and fails
128 # predictably.
af322eb8
S
129 for page_num in itertools.count():
130 by_channel_alias = self._download_json(
131 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
132 % channel_id,
133 'Downloading byChannelAlias JSON page %d' % (page_num + 1),
134 headers=headers, query={
135 'filterFsk': 'false',
136 'sort': 'creationDate,desc',
137 'size': 100,
138 'page': page_num,
139 }, fatal=False)
140 if not by_channel_alias:
141 break
47421507
S
142 video_list = try_get(
143 by_channel_alias, lambda x: x['_embedded']['videoList'], list)
af322eb8
S
144 if not video_list:
145 break
146 try:
47421507 147 video = next(r for r in video_list if r.get('alias') == alias)
af322eb8
S
148 break
149 except StopIteration:
150 pass
151 if not try_get(
152 by_channel_alias, lambda x: x['_links']['next']):
153 break
47421507
S
154
155 if not video:
156 by_id_list = self._download_json(
157 'https://www.funk.net/api/v3.0/content/videos/byIdList',
158 channel_id, 'Downloading byIdList JSON', headers=headers,
159 query={
160 'ids': alias,
161 }, fatal=False)
162 if by_id_list:
163 video = try_get(by_id_list, lambda x: x['result'][0], dict)
c84eae4f
S
164
165 if not video:
166 results = self._download_json(
47421507
S
167 'https://www.funk.net/api/v3.0/content/videos/filter',
168 channel_id, 'Downloading filter JSON', headers=headers, query={
c84eae4f
S
169 'channelId': channel_id,
170 'size': 100,
171 })['result']
172 video = next(r for r in results if r.get('alias') == alias)
ff3f1a62 173
690404a6 174 return self._make_url_result(video)