[yt-dlp.git] / youtube_dl / extractor / c56.py

# coding: utf-8
from __future__ import unicode_literals

import re
import json

from .common import InfoExtractor


class C56IE(InfoExtractor):
    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
    IE_NAME = '56.com'
    _TEST = {
        'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
        'file': '93440716.flv',
        'md5': 'e59995ac63d0457783ea05f93f12a866',
        'info_dict': {
            'title': '网事知多少 第32期：车怒',
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        text_id = mobj.group('textid')
        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
                                           text_id, 'Downloading video info')
        info = json.loads(info_page)['info']
        formats = [{
            'format_id': f['type'],
            'filesize': int(f['filesize']),
            'url': f['url']
        } for f in info['rfiles']]
        self._sort_formats(formats)

        return {
            'id': info['vid'],
            'title': info['Subject'],
            'formats': formats,
            'thumbnail': info.get('bimg') or info.get('img'),
        }
Commit	Line	Data
e1f6e61e	1	# coding: utf-8
84c92dc0	2	from __future__ import unicode_literals
e1f6e61e JMF	3
	4	import re
	5	import json
	6
	7	from .common import InfoExtractor
84c92dc0	8
e1f6e61e JMF	9
	10	class C56IE(InfoExtractor):
	11	_VALID_URL = r'https?://((www\|player)\.)?56\.com/(.+?/)?(v_\|(play_album.+-))(?P<textid>.+?)\.(html\|swf)'
84c92dc0 PH	12	IE_NAME = '56.com'
	13	_TEST = {
	14	'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
	15	'file': '93440716.flv',
	16	'md5': 'e59995ac63d0457783ea05f93f12a866',
	17	'info_dict': {
	18	'title': '网事知多少第32期：车怒',
e1f6e61e JMF	19	},
	20	}
	21
	22	def _real_extract(self, url):
	23	mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
	24	text_id = mobj.group('textid')
	25	info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
84c92dc0	26	text_id, 'Downloading video info')
e1f6e61e	27	info = json.loads(info_page)['info']
84c92dc0 PH	28	formats = [{
	29	'format_id': f['type'],
	30	'filesize': int(f['filesize']),
	31	'url': f['url']
	32	} for f in info['rfiles']]
	33	self._sort_formats(formats)
e1f6e61e	34
84c92dc0 PH	35	return {
	36	'id': info['vid'],
	37	'title': info['Subject'],
	38	'formats': formats,
	39	'thumbnail': info.get('bimg') or info.get('img'),
	40	}