[yt-dlp.git] / youtube_dl / extractor / c56.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor


class C56IE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
    IE_NAME = '56.com'
    _TEST = {
        'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
        'md5': 'e59995ac63d0457783ea05f93f12a866',
        'info_dict': {
            'id': '93440716',
            'ext': 'flv',
            'title': '网事知多少 第32期：车怒',
            'duration': 283.813,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        text_id = mobj.group('textid')

        page = self._download_json(
            'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')

        info = page['info']

        formats = [
            {
                'format_id': f['type'],
                'filesize': int(f['filesize']),
                'url': f['url']
            } for f in info['rfiles']
        ]
        self._sort_formats(formats)

        return {
            'id': info['vid'],
            'title': info['Subject'],
            'duration': int(info['duration']) / 1000.0,
            'formats': formats,
            'thumbnail': info.get('bimg') or info.get('img'),
        }
Commit	Line	Data
e1f6e61e	1	# coding: utf-8
84c92dc0	2	from __future__ import unicode_literals
e1f6e61e JMF	3
e1f6e61e JMF	4	import re
e1f6e61e JMF	5
e1f6e61e JMF	6	from .common import InfoExtractor
84c92dc0	7
e1f6e61e JMF	8
e1f6e61e JMF	9	class C56IE(InfoExtractor):
4a419b88	10	_VALID_URL = r'https?://(?:(?:www\|player)\.)?56\.com/(?:.+?/)?(?:v_\|(?:play_album.+-))(?P<textid>.+?)\.(?:html\|swf)'
84c92dc0 PH	11	IE_NAME = '56.com'
	12	_TEST = {
	13	'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
84c92dc0 PH	14	'md5': 'e59995ac63d0457783ea05f93f12a866',
84c92dc0 PH	15	'info_dict': {
4a419b88 S	16	'id': '93440716',
4a419b88 S	17	'ext': 'flv',
84c92dc0	18	'title': '网事知多少第32期：车怒',
4a419b88	19	'duration': 283.813,
e1f6e61e JMF	20	},
	21	}
	22
	23	def _real_extract(self, url):
	24	mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
	25	text_id = mobj.group('textid')
4a419b88 S	26
	27	page = self._download_json(
	28	'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
	29
	30	info = page['info']
	31
	32	formats = [
	33	{
	34	'format_id': f['type'],
	35	'filesize': int(f['filesize']),
	36	'url': f['url']
	37	} for f in info['rfiles']
	38	]
84c92dc0	39	self._sort_formats(formats)
e1f6e61e	40
84c92dc0 PH	41	return {
	42	'id': info['vid'],
	43	'title': info['Subject'],
4a419b88	44	'duration': int(info['duration']) / 1000.0,
84c92dc0 PH	45	'formats': formats,
	46	'thumbnail': info.get('bimg') or info.get('img'),
	47	}