[yt-dlp.git] / youtube_dl / extractor / vevo.py

import re
import json
import xml.etree.ElementTree
import datetime

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    ExtractorError,
)


class VevoIE(InfoExtractor):
    """
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
    _TEST = {
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
        u'info_dict': {
            u"upload_date": u"20130624",
            u"uploader": u"Hurts",
            u"title": u"Somebody to Die For",
            u'duration': 230,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')

        self.report_extraction(video_id)
        video_info = json.loads(info_json)['video']
        last_version = {'version': -1}
        for version in video_info['videoVersions']:
            # These are the HTTP downloads, other types are for different manifests
            if version['sourceType'] == 2:
                if version['version'] > last_version['version']:
                    last_version = version
        if last_version['version'] == -1:
            raise ExtractorError(u'Unable to extract last version of the video')

        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
        formats = []
        # Already sorted from worst to best quality
        for rend in renditions.findall('rendition'):
            attr = rend.attrib
            format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr
            formats.append({
                'url': attr['url'],
                'format_id': attr['name'],
                'format_note': format_note,
                'height': int(attr['frameheight']),
                'width': int(attr['frameWidth']),
            })

        timestamp_ms = int(self._search_regex(
            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
        upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
        info = {
            'id': video_id,
            'title': video_info['title'],
            'formats': formats,
            'thumbnail': video_info['imageUrl'],
            'upload_date': upload_date.strftime('%Y%m%d'),
            'uploader': video_info['mainArtists'][0]['artistName'],
            'duration': video_info['duration'],
        }

        return info
Commit	Line	Data
70d1924f JMF	1	import re
70d1924f JMF	2	import json
88bd97e3 JMF	3	import xml.etree.ElementTree
88bd97e3 JMF	4	import datetime
70d1924f JMF	5
	6	from .common import InfoExtractor
	7	from ..utils import (
88bd97e3	8	determine_ext,
70d1924f JMF	9	ExtractorError,
	10	)
	11
88bd97e3	12
70d1924f	13	class VevoIE(InfoExtractor):
1c251cd9	14	"""
0577177e	15	Accepts urls from vevo.com or in the format 'vevo:{id}'
1c251cd9 JMF	16	(currently used by MTVIE)
1c251cd9 JMF	17	"""
75340ee3	18	_VALID_URL = r'((http://www.vevo.com/watch/.?/.?/)\|(vevo:))(?P<id>.*?)(\?\|$)'
6f5ac90c PH	19	_TEST = {
	20	u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
	21	u'file': u'GB1101300280.mp4',
6f5ac90c	22	u'info_dict': {
75340ee3 JMF	23	u"upload_date": u"20130624",
75340ee3 JMF	24	u"uploader": u"Hurts",
88bd97e3 JMF	25	u"title": u"Somebody to Die For",
88bd97e3 JMF	26	u'duration': 230,
6f5ac90c PH	27	}
6f5ac90c PH	28	}
70d1924f JMF	29
	30	def _real_extract(self, url):
	31	mobj = re.match(self._VALID_URL, url)
	32	video_id = mobj.group('id')
	33
88bd97e3	34	json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
70d1924f	35	info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
70d1924f JMF	36
70d1924f JMF	37	self.report_extraction(video_id)
88bd97e3 JMF	38	video_info = json.loads(info_json)['video']
	39	last_version = {'version': -1}
	40	for version in video_info['videoVersions']:
	41	# These are the HTTP downloads, other types are for different manifests
	42	if version['sourceType'] == 2:
	43	if version['version'] > last_version['version']:
	44	last_version = version
	45	if last_version['version'] == -1:
	46	raise ExtractorError(u'Unable to extract last version of the video')
	47
	48	renditions = xml.etree.ElementTree.fromstring(last_version['data'])
	49	formats = []
	50	# Already sorted from worst to best quality
	51	for rend in renditions.findall('rendition'):
	52	attr = rend.attrib
e54fd4b2	53	format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr
88bd97e3	54	formats.append({
e54fd4b2 PH	55	'url': attr['url'],
	56	'format_id': attr['name'],
	57	'format_note': format_note,
88bd97e3 JMF	58	'height': int(attr['frameheight']),
	59	'width': int(attr['frameWidth']),
	60	})
	61
912cbf5d PH	62	timestamp_ms = int(self._search_regex(
	63	r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
	64	upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
88bd97e3 JMF	65	info = {
	66	'id': video_id,
	67	'title': video_info['title'],
	68	'formats': formats,
	69	'thumbnail': video_info['imageUrl'],
	70	'upload_date': upload_date.strftime('%Y%m%d'),
	71	'uploader': video_info['mainArtists'][0]['artistName'],
	72	'duration': video_info['duration'],
	73	}
	74
88bd97e3	75	return info