[yt-dlp.git] / youtube_dl / extractor / bambuser.py

from __future__ import unicode_literals

import re
import json
import itertools

from .common import InfoExtractor
from ..utils import (
    compat_urllib_request,
)


class BambuserIE(InfoExtractor):
    IE_NAME = 'bambuser'
    _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
    _API_KEY = '005f64509e19a868399060af746a00aa'

    _TEST = {
        'url': 'http://bambuser.com/v/4050584',
        # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
        #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
        'info_dict': {
            'id': '4050584',
            'ext': 'flv',
            'title': 'Education engineering days - lightning talks',
            'duration': 3741,
            'uploader': 'pixelversity',
            'uploader_id': '344706',
        },
        'params': {
            # It doesn't respect the 'Range' header, it would download the whole video
            # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
            'skip_download': True,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
            '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)['result']

        return {
            'id': video_id,
            'title': info['title'],
            'url': info['url'],
            'thumbnail': info.get('preview'),
            'duration': int(info['length']),
            'view_count': int(info['views_total']),
            'uploader': info['username'],
            'uploader_id': info['uid'],
        }


class BambuserChannelIE(InfoExtractor):
    IE_NAME = 'bambuser:channel'
    _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
    # The maximum number we can get with each request
    _STEP = 50
    _TEST = {
        'url': 'http://bambuser.com/channel/pixelversity',
        'info_dict': {
            'title': 'pixelversity',
        },
        'playlist_mincount': 60,
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user = mobj.group('user')
        urls = []
        last_id = ''
        for i in itertools.count(1):
            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
                '&sort=created&access_mode=0%2C1%2C2&limit={count}'
                '&method=broadcast&format=json&vid_older_than={last}'
                ).format(user=user, count=self._STEP, last=last_id)
            req = compat_urllib_request.Request(req_url)
            # Without setting this header, we wouldn't get any result
            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
            data = self._download_json(
                req, user, 'Downloading page %d' % i)
            results = data['result']
            if not results:
                break
            last_id = results[-1]['vid']
            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)

        return {
            '_type': 'playlist',
            'title': user,
            'entries': urls,
        }
Commit	Line	Data
3798eadc PH	1	from __future__ import unicode_literals
3798eadc PH	2
72a5b4f7 JMF	3	import re
72a5b4f7 JMF	4	import json
165e3bb6	5	import itertools
72a5b4f7 JMF	6
72a5b4f7 JMF	7	from .common import InfoExtractor
165e3bb6 JMF	8	from ..utils import (
	9	compat_urllib_request,
	10	)
72a5b4f7 JMF	11
	12
	13	class BambuserIE(InfoExtractor):
3798eadc	14	IE_NAME = 'bambuser'
72a5b4f7 JMF	15	_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
	16	_API_KEY = '005f64509e19a868399060af746a00aa'
	17
	18	_TEST = {
3798eadc	19	'url': 'http://bambuser.com/v/4050584',
ce152341	20	# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
3798eadc PH	21	#u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
	22	'info_dict': {
	23	'id': '4050584',
	24	'ext': 'flv',
	25	'title': 'Education engineering days - lightning talks',
	26	'duration': 3741,
	27	'uploader': 'pixelversity',
	28	'uploader_id': '344706',
72a5b4f7	29	},
3798eadc	30	'params': {
1a62c18f JMF	31	# It doesn't respect the 'Range' header, it would download the whole video
1a62c18f JMF	32	# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
3798eadc	33	'skip_download': True,
1a62c18f	34	},
72a5b4f7 JMF	35	}
	36
	37	def _real_extract(self, url):
	38	mobj = re.match(self._VALID_URL, url)
	39	video_id = mobj.group('id')
	40	info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
	41	'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
	42	info_json = self._download_webpage(info_url, video_id)
	43	info = json.loads(info_json)['result']
	44
	45	return {
	46	'id': video_id,
	47	'title': info['title'],
	48	'url': info['url'],
165e3bb6	49	'thumbnail': info.get('preview'),
72a5b4f7 JMF	50	'duration': int(info['length']),
	51	'view_count': int(info['views_total']),
	52	'uploader': info['username'],
	53	'uploader_id': info['uid'],
	54	}
	55
165e3bb6 JMF	56
165e3bb6 JMF	57	class BambuserChannelIE(InfoExtractor):
3798eadc	58	IE_NAME = 'bambuser:channel'
c0ade33e	59	_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/\|#\|\?\|$)'
165e3bb6 JMF	60	# The maximum number we can get with each request
165e3bb6 JMF	61	_STEP = 50
22a6f150 PH	62	_TEST = {
	63	'url': 'http://bambuser.com/channel/pixelversity',
	64	'info_dict': {
	65	'title': 'pixelversity',
	66	},
	67	'playlist_mincount': 60,
	68	}
165e3bb6 JMF	69
	70	def _real_extract(self, url):
	71	mobj = re.match(self._VALID_URL, url)
	72	user = mobj.group('user')
	73	urls = []
	74	last_id = ''
	75	for i in itertools.count(1):
	76	req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
	77	'&sort=created&access_mode=0%2C1%2C2&limit={count}'
	78	'&method=broadcast&format=json&vid_older_than={last}'
	79	).format(user=user, count=self._STEP, last=last_id)
	80	req = compat_urllib_request.Request(req_url)
	81	# Without setting this header, we wouldn't get any result
	82	req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
22a6f150 PH	83	data = self._download_json(
	84	req, user, 'Downloading page %d' % i)
	85	results = data['result']
	86	if not results:
165e3bb6 JMF	87	break
	88	last_id = results[-1]['vid']
	89	urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
	90
	91	return {
	92	'_type': 'playlist',
	93	'title': user,
	94	'entries': urls,
	95	}