2 from __future__
import unicode_literals
13 from .common
import InfoExtractor
14 from ..compat
import (
17 compat_urllib_parse_urlencode
,
18 compat_urllib_parse_urlparse
,
32 return hashlib
.md5(text
.encode('utf-8')).hexdigest()
35 class IqiyiSDK(object):
36 def __init__(self
, target
, ip
, timestamp
):
39 self
.timestamp
= timestamp
43 return compat_str(sum(map(lambda p
: int(p
, 16), list(data
))))
47 if isinstance(num
, int):
49 return compat_str(sum(map(int, num
)))
52 even
= self
.digit_sum(compat_str(self
.timestamp
)[::2])
53 odd
= self
.digit_sum(compat_str(self
.timestamp
)[1::2])
56 def preprocess(self
, chunksize
):
57 self
.target
= md5_text(self
.target
)
59 for i
in range(32 // chunksize
):
60 chunks
.append(self
.target
[chunksize
* i
:chunksize
* (i
+ 1)])
62 chunks
.append(self
.target
[32 - 32 % chunksize
:])
63 return chunks
, list(map(int, self
.ip
.split('.')))
65 def mod(self
, modulus
):
66 chunks
, ip
= self
.preprocess(32)
67 self
.target
= chunks
[0] + ''.join(map(lambda p
: compat_str(p
% modulus
), ip
))
69 def split(self
, chunksize
):
76 chunks
, ip
= self
.preprocess(chunksize
)
78 for i
in range(len(chunks
)):
79 ip_part
= compat_str(ip
[i
] % modulus_map
[chunksize
]) if i
< 4 else ''
81 ret
+= ip_part
+ chunks
[i
]
83 ret
+= chunks
[i
] + ip_part
86 def handle_input16(self
):
87 self
.target
= md5_text(self
.target
)
88 self
.target
= self
.split_sum(self
.target
[:16]) + self
.target
+ self
.split_sum(self
.target
[16:])
90 def handle_input8(self
):
91 self
.target
= md5_text(self
.target
)
94 part
= self
.target
[8 * i
:8 * (i
+ 1)]
95 ret
+= self
.split_sum(part
) + part
99 self
.target
= md5_text(self
.target
)
100 self
.target
= self
.split_sum(self
.target
) + self
.target
102 def date(self
, scheme
):
103 self
.target
= md5_text(self
.target
)
104 d
= time
.localtime(self
.timestamp
)
106 'y': compat_str(d
.tm_year
),
107 'm': '%02d' % d
.tm_mon
,
108 'd': '%02d' % d
.tm_mday
,
110 self
.target
+= ''.join(map(lambda c
: strings
[c
], list(scheme
)))
112 def split_time_even_odd(self
):
113 even
, odd
= self
.even_odd()
114 self
.target
= odd
+ md5_text(self
.target
) + even
116 def split_time_odd_even(self
):
117 even
, odd
= self
.even_odd()
118 self
.target
= even
+ md5_text(self
.target
) + odd
120 def split_ip_time_sum(self
):
121 chunks
, ip
= self
.preprocess(32)
122 self
.target
= compat_str(sum(ip
)) + chunks
[0] + self
.digit_sum(self
.timestamp
)
124 def split_time_ip_sum(self
):
125 chunks
, ip
= self
.preprocess(32)
126 self
.target
= self
.digit_sum(self
.timestamp
) + chunks
[0] + compat_str(sum(ip
))
129 class IqiyiSDKInterpreter(object):
130 def __init__(self
, sdk_code
):
131 self
.sdk_code
= sdk_code
133 def run(self
, target
, ip
, timestamp
):
134 self
.sdk_code
= decode_packed_codes(self
.sdk_code
)
136 functions
= re
.findall(r
'input=([a-zA-Z0-9]+)\(input', self
.sdk_code
)
138 sdk
= IqiyiSDK(target
, ip
, timestamp
)
141 'handleSum': sdk
.handleSum
,
142 'handleInput8': sdk
.handle_input8
,
143 'handleInput16': sdk
.handle_input16
,
144 'splitTimeEvenOdd': sdk
.split_time_even_odd
,
145 'splitTimeOddEven': sdk
.split_time_odd_even
,
146 'splitIpTimeSum': sdk
.split_ip_time_sum
,
147 'splitTimeIpSum': sdk
.split_time_ip_sum
,
149 for function
in functions
:
150 if re
.match(r
'mod\d+', function
):
151 sdk
.mod(int(function
[3:]))
152 elif re
.match(r
'date[ymd]{3}', function
):
153 sdk
.date(function
[4:])
154 elif re
.match(r
'split\d+', function
):
155 sdk
.split(int(function
[5:]))
156 elif function
in other_functions
:
157 other_functions
[function
]()
159 raise ExtractorError('Unknown funcion %s' % function
)
164 class IqiyiIE(InfoExtractor
):
168 _VALID_URL
= r
'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
170 _NETRC_MACHINE
= 'iqiyi'
173 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
174 'md5': '2cb594dc2781e6c941a110d8f358118b',
176 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
177 'title': '美国德州空中惊现奇异云团 酷似UFO',
181 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
183 'id': 'e3f585b550a280af23c98b6cb2be19fb',
184 'title': '名侦探柯南第752集',
188 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
190 'title': '名侦探柯南第752集',
194 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
196 'title': '名侦探柯南第752集',
200 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
202 'title': '名侦探柯南第752集',
206 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
208 'title': '名侦探柯南第752集',
212 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
214 'title': '名侦探柯南第752集',
218 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
220 'title': '名侦探柯南第752集',
224 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
226 'title': '名侦探柯南第752集',
230 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
232 'title': '名侦探柯南第752集',
236 'skip_download': True,
239 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
240 'only_matching': True,
242 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
243 'only_matching': True,
245 'url': 'http://yule.iqiyi.com/pcb.html',
246 'only_matching': True,
248 # VIP-only video. The first 2 parts (6 minutes) are available without login
249 # MD5 sums omitted as values are different on Travis CI and my machine
250 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
252 'id': 'f3cf468b39dddb30d676f89a91200dc1',
257 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
263 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
268 'expected_warnings': ['Needs a VIP account for full video'],
270 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
275 'playlist_count': 101,
277 'url': 'http://www.pps.tv/w_19rrbav0ph.html',
278 'only_matching': True,
290 def _real_initialize(self
):
295 # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
296 N
= 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
299 return ohdave_rsa_encrypt(data
, e
, N
)
302 (username
, password
) = self
._get
_login
_info
()
304 # No authentication to be performed
308 data
= self
._download
_json
(
309 'http://kylin.iqiyi.com/get_token', None,
310 note
='Get token for logging', errnote
='Unable to get token for logging')
312 timestamp
= int(time
.time())
313 target
= '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % (
314 username
, self
._rsa
_fun
(password
.encode('utf-8')))
316 interp
= IqiyiSDKInterpreter(sdk
)
317 sign
= interp
.run(target
, data
['ip'], timestamp
)
319 validation_params
= {
321 'server': 'BEA3AA1908656AABCCFF76582C4C6660',
322 'token': data
['token'],
323 'bird_src': 'f8d91d57af224da7893dd397d52d811a',
327 validation_result
= self
._download
_json
(
328 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params
), None,
329 note
='Validate credentials', errnote
='Unable to validate credentials')
332 'P00107': 'please login via the web interface and enter the CAPTCHA code',
333 'P00117': 'bad username or password',
336 code
= validation_result
['code']
338 msg
= MSG_MAP
.get(code
)
340 msg
= 'error %s' % code
341 if validation_result
.get('msg'):
342 msg
+= ': ' + validation_result
['msg']
343 self
._downloader
.report_warning('unable to log in: ' + msg
)
348 def _authenticate_vip_video(self
, api_video_url
, video_id
, tvid
, _uuid
, do_report_warning
):
350 # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
352 'platform': 'b6c13e26323c537d',
357 'playType': 'main', # XXX: always main?
358 'filename': os
.path
.splitext(url_basename(api_video_url
))[0],
361 qd_items
= compat_parse_qs(compat_urllib_parse_urlparse(api_video_url
).query
)
362 for key
, val
in qd_items
.items():
363 auth_params
[key
] = val
[0]
365 auth_req
= sanitized_Request(
366 'http://api.vip.iqiyi.com/services/ckn.action',
367 urlencode_postdata(auth_params
))
368 # iQiyi server throws HTTP 405 error without the following header
369 auth_req
.add_header('Content-Type', 'application/x-www-form-urlencoded')
370 auth_result
= self
._download
_json
(
372 note
='Downloading video authentication JSON',
373 errnote
='Unable to download video authentication JSON')
375 if auth_result
['code'] == 'Q00505': # No preview available (不允许试看鉴权失败)
376 raise ExtractorError('This video requires a VIP account', expected
=True)
377 if auth_result
['code'] == 'Q00506': # End of preview time (试看结束鉴权失败)
378 if do_report_warning
:
379 self
.report_warning('Needs a VIP account for full video')
384 def construct_video_urls(self
, data
, video_id
, _uuid
, tvid
):
393 def get_encode_code(l
):
398 for i
in range(c
- 1, -1, -1):
399 a
= do_xor(int(b
[c
- i
- 1], 16), i
)
403 def get_path_key(x
, format_id
, segment_index
):
404 mg
= ')(*&^flash@#$%a'
405 tm
= self
._download
_json
(
406 'http://data.video.qiyi.com/t?tn=' + str(random
.random()), video_id
,
407 note
='Download path key of segment %d for format %s' % (segment_index
+ 1, format_id
)
409 t
= str(int(math
.floor(int(tm
) / (600.0))))
410 return md5_text(t
+ mg
+ x
)
413 need_vip_warning_report
= True
414 for format_item
in data
['vp']['tkl'][0]['vs']:
415 if 0 < int(format_item
['bid']) <= 10:
416 format_id
= self
.get_format(format_item
['bid'])
422 video_urls_info
= format_item
['fs']
423 if not format_item
['fs'][0]['l'].startswith('/'):
424 t
= get_encode_code(format_item
['fs'][0]['l'])
425 if t
.endswith('mp4'):
426 video_urls_info
= format_item
['flvs']
428 for segment_index
, segment
in enumerate(video_urls_info
):
430 if not vl
.startswith('/'):
431 vl
= get_encode_code(vl
)
432 is_vip_video
= '/vip/' in vl
433 filesize
= segment
['b']
434 base_url
= data
['vp']['du'].split('/')
437 vl
.split('/')[-1].split('.')[0], format_id
, segment_index
)
438 base_url
.insert(-1, key
)
439 base_url
= '/'.join(base_url
)
442 'qyid': uuid
.uuid4().hex,
447 'tn': str(int(time
.time()))
449 api_video_url
= base_url
+ vl
451 api_video_url
= api_video_url
.replace('.f4v', '.hml')
452 auth_result
= self
._authenticate
_vip
_video
(
453 api_video_url
, video_id
, tvid
, _uuid
, need_vip_warning_report
)
454 if auth_result
is False:
455 need_vip_warning_report
= False
458 't': auth_result
['data']['t'],
459 # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
460 'cid': 'afbe8fd3d73448c9',
462 'QY00001': auth_result
['data']['u'],
464 api_video_url
+= '?' if '?' not in api_video_url
else '&'
465 api_video_url
+= compat_urllib_parse_urlencode(param
)
466 js
= self
._download
_json
(
467 api_video_url
, video_id
,
468 note
='Download video info of segment %d for format %s' % (segment_index
+ 1, format_id
))
471 (video_url
, filesize
))
473 video_urls_dict
[format_id
] = video_urls
474 return video_urls_dict
476 def get_format(self
, bid
):
477 matched_format_ids
= [_format_id
for _bid
, _format_id
in self
._FORMATS
_MAP
if _bid
== str(bid
)]
478 return matched_format_ids
[0] if len(matched_format_ids
) else None
480 def get_bid(self
, format_id
):
481 matched_bids
= [_bid
for _bid
, _format_id
in self
._FORMATS
_MAP
if _format_id
== format_id
]
482 return matched_bids
[0] if len(matched_bids
) else None
484 def get_raw_data(self
, tvid
, video_id
, enc_key
, _uuid
):
485 tm
= str(int(time
.time()))
489 'src': md5_text('youtube-dl'),
494 'enc': md5_text(enc_key
+ tail
),
496 'tn': random
.random(),
498 'authkey': md5_text(md5_text('') + tail
),
502 api_url
= 'http://cache.video.qiyi.com/vms' + '?' + \
503 compat_urllib_parse_urlencode(param
)
504 raw_data
= self
._download
_json
(api_url
, video_id
)
507 def get_enc_key(self
, video_id
):
508 # TODO: automatic key extraction
509 # last update at 2016-01-22 for Zombie::bite
510 enc_key
= '4a1caba4b4465345366f28da7c117d20'
513 def _extract_playlist(self
, webpage
):
517 r
'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
522 album_id
= self
._search
_regex
(
523 r
'albumId\s*:\s*(\d+),', webpage
, 'album ID')
524 album_title
= self
._search
_regex
(
525 r
'data-share-title="([^"]+)"', webpage
, 'album title', fatal
=False)
527 entries
= list(map(self
.url_result
, links
))
529 # Start from 2 because links in the first page are already on webpage
530 for page_num
in itertools
.count(2):
531 pagelist_page
= self
._download
_webpage
(
532 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id
, page_num
, PAGE_SIZE
),
534 note
='Download playlist page %d' % page_num
,
535 errnote
='Failed to download playlist page %d' % page_num
)
536 pagelist
= self
._parse
_json
(
537 remove_start(pagelist_page
, 'var tvInfoJs='), album_id
)
538 vlist
= pagelist
['data']['vlist']
540 entries
.append(self
.url_result(item
['vurl']))
541 if len(vlist
) < PAGE_SIZE
:
544 return self
.playlist_result(entries
, album_id
, album_title
)
546 def _real_extract(self
, url
):
547 webpage
= self
._download
_webpage
(
548 url
, 'temp_id', note
='download video page')
550 # There's no simple way to determine whether an URL is a playlist or not
552 playlist_result
= self
._extract
_playlist
(webpage
)
554 return playlist_result
556 tvid
= self
._search
_regex
(
557 r
'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
558 video_id = self._search_regex(
559 r'data-player-videoid\s*=\s*[\'"]([a
-f\d
]+)', webpage, 'video_id
')
560 _uuid = uuid.uuid4().hex
562 enc_key = self.get_enc_key(video_id)
564 raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
566 if raw_data['code
'] != 'A000000
':
567 raise ExtractorError('Unable to load data
. Error code
: ' + raw_data['code
'])
569 data = raw_data['data
']
571 title = data['vi
']['vn
']
573 # generate video_urls_dict
574 video_urls_dict = self.construct_video_urls(
575 data, video_id, _uuid, tvid)
579 for format_id in video_urls_dict:
580 video_urls = video_urls_dict[format_id]
581 for i, video_url_info in enumerate(video_urls):
582 if len(entries) < i + 1:
583 entries.append({'formats': []})
584 entries[i]['formats
'].append(
586 'url
': video_url_info[0],
587 'filesize
': video_url_info[-1],
588 'format_id
': format_id,
589 'preference
': int(self.get_bid(format_id))
593 for i in range(len(entries)):
594 self._sort_formats(entries[i]['formats
'])
597 'id': '%s_part
%d' % (video_id, i + 1),
604 '_type
': 'multi_video
',
611 info['id'] = video_id
612 info['title
'] = title