]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/iqiyi.py
[iqiyi] Remove format selection codes
[yt-dlp.git] / youtube_dl / extractor / iqiyi.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 from .common import InfoExtractor
6
7 from ..compat import compat_urllib_parse
8
9 from ..utils import ExtractorError
10
11 import re
12 import time
13 import uuid
14 import math
15 import random
16 import zlib
17 import hashlib
18
19
20 class IqiyiIE(InfoExtractor):
21 IE_NAME = 'iqiyi'
22
23 _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html'
24
25 _TEST = {
26 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
27 'md5': '2cb594dc2781e6c941a110d8f358118b',
28 'info_dict': {
29 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
30 'title': '美国德州空中惊现奇异云团 酷似UFO',
31 'ext': 'f4v',
32 }
33 }
34
35 def construct_video_urls(self, data, video_id, _uuid):
36 def do_xor(x, y):
37 a = y % 3
38 if a == 1:
39 return x ^ 121
40 if a == 2:
41 return x ^ 72
42 return x ^ 103
43
44 def get_encode_code(l):
45 a = 0
46 b = l.split('-')
47 c = len(b)
48 s = ''
49 for i in range(c - 1, -1, -1):
50 a = do_xor(int(b[c - i - 1], 16), i)
51 s += chr(a)
52 return s[::-1]
53
54 def get_path_key(x):
55 mg = ')(*&^flash@#$%a'
56 tm = self._download_json(
57 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t']
58 t = str(int(math.floor(int(tm) / (600.0))))
59 return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
60
61 video_urls_dict = {}
62 for i in data['vp']['tkl'][0]['vs']:
63 if 0 < int(i['bid']) <= 10:
64 format_id = self.get_format(i['bid'])
65 else:
66 continue
67
68 video_urls = []
69
70 video_urls_info = i['fs']
71 if not i['fs'][0]['l'].startswith('/'):
72 t = get_encode_code(i['fs'][0]['l'])
73 if t.endswith('mp4'):
74 video_urls_info = i['flvs']
75
76 for ii in video_urls_info:
77 vl = ii['l']
78 if not vl.startswith('/'):
79 vl = get_encode_code(vl)
80 key = get_path_key(
81 vl.split('/')[-1].split('.')[0])
82 filesize = ii['b']
83 base_url = data['vp']['du'].split('/')
84 base_url.insert(-1, key)
85 base_url = '/'.join(base_url)
86 param = {
87 'su': _uuid,
88 'qyid': uuid.uuid4().hex,
89 'client': '',
90 'z': '',
91 'bt': '',
92 'ct': '',
93 'tn': str(int(time.time()))
94 }
95 api_video_url = base_url + vl + '?' + \
96 compat_urllib_parse.urlencode(param)
97 js = self._download_json(api_video_url, video_id)
98 video_url = js['l']
99 video_urls.append(
100 (video_url, filesize))
101
102 video_urls_dict[format_id] = video_urls
103 return video_urls_dict
104
105 def get_format(self, bid):
106 _dict = {
107 '1': 'h6',
108 '2': 'h5',
109 '3': 'h4',
110 '4': 'h3',
111 '5': 'h2',
112 '10': 'h1'
113 }
114 return _dict.get(str(bid), None)
115
116 def get_bid(self, format_id):
117 _dict = {
118 'h6': '1',
119 'h5': '2',
120 'h4': '3',
121 'h3': '4',
122 'h2': '5',
123 'h1': '10',
124 'best': 'best'
125 }
126 return _dict.get(format_id, None)
127
128 def get_raw_data(self, tvid, video_id, enc_key, _uuid):
129 tm = str(int(time.time()))
130 param = {
131 'key': 'fvip',
132 'src': hashlib.md5(b'youtube-dl').hexdigest(),
133 'tvId': tvid,
134 'vid': video_id,
135 'vinfo': 1,
136 'tm': tm,
137 'enc': hashlib.md5(
138 (enc_key + tm + tvid).encode('utf8')).hexdigest(),
139 'qyid': _uuid,
140 'tn': random.random(),
141 'um': 0,
142 'authkey': hashlib.md5(
143 (tm + tvid).encode('utf8')).hexdigest()
144 }
145
146 api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
147 compat_urllib_parse.urlencode(param)
148 raw_data = self._download_json(api_url, video_id)
149 return raw_data
150
151 def get_enc_key(self, swf_url, video_id):
152 req = self._request_webpage(
153 swf_url, video_id, note='download swf content')
154 cn = req.read()
155 cn = zlib.decompress(cn[8:])
156 pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
157 enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
158 return enc_key
159
160 def _real_extract(self, url):
161 webpage = self._download_webpage(
162 url, 'temp_id', note='download video page')
163 tvid = self._search_regex(
164 r'tvId ?= ?(\'|\")(?P<tvid>\d+)', webpage, 'tvid', flags=re.I, group='tvid')
165 video_id = self._search_regex(
166 r'videoId ?= ?(\'|\")(?P<video_id>[a-z\d]+)',
167 webpage, 'video_id', flags=re.I, group='video_id')
168 swf_url = self._search_regex(
169 r'(?P<swf>http://.+?MainPlayer.+?\.swf)', webpage, 'swf')
170 _uuid = uuid.uuid4().hex
171
172 enc_key = self.get_enc_key(swf_url, video_id)
173
174 raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
175 assert raw_data['code'] == 'A000000'
176 if not raw_data['data']['vp']['tkl']:
177 raise ExtractorError('No support iQiqy VIP video')
178
179 data = raw_data['data']
180
181 title = data['vi']['vn']
182
183 # generate video_urls_dict
184 video_urls_dict = self.construct_video_urls(
185 data, video_id, _uuid)
186
187 # construct info
188 entries = []
189 for format_id in video_urls_dict:
190 video_urls = video_urls_dict[format_id]
191 for i, video_url_info in enumerate(video_urls):
192 if len(entries) < i + 1:
193 entries.append({'formats': []})
194 entries[i]['formats'].append(
195 {
196 'url': video_url_info[0],
197 'filesize': video_url_info[-1],
198 'format_id': format_id,
199 'preference': int(self.get_bid(format_id))
200 }
201 )
202
203 for i in range(len(entries)):
204 self._sort_formats(entries[i]['formats'])
205 entries[i].update(
206 {
207 'id': '_part%d' % (i + 1),
208 'title': title,
209 }
210 )
211
212 if len(entries) > 1:
213 info = {
214 '_type': 'multi_video',
215 'id': video_id,
216 'title': title,
217 'entries': entries,
218 }
219 else:
220 info = entries[0]
221 info['id'] = video_id
222 info['title'] = title
223
224 return info