[yt-dlp.git] / youtube_dl / extractor / channel9.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    parse_filesize,
    qualities,
)


class Channel9IE(InfoExtractor):
    '''
    Common extractor for channel9.msdn.com.

    The type of provided URL (video or playlist) is determined according to
    meta Search.PageType from web page HTML rather than URL itself, as it is
    not always possible to do.
    '''
    IE_DESC = 'Channel 9'
    IE_NAME = 'channel9'
    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'

    _TESTS = [{
        'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
        'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
        'info_dict': {
            'id': 'Events/TechEd/Australia/2013/KOS002',
            'ext': 'mp4',
            'title': 'Developer Kick-Off Session: Stuff We Love',
            'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
            'duration': 4576,
            'thumbnail': r're:http://.*\.jpg',
            'session_code': 'KOS002',
            'session_day': 'Day 1',
            'session_room': 'Arena 1A',
            'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
                                 'Mads Kristensen'],
        },
    }, {
        'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
        'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
        'info_dict': {
            'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
            'ext': 'mp4',
            'title': 'Self-service BI with Power BI - nuclear testing',
            'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
            'duration': 1540,
            'thumbnail': r're:http://.*\.jpg',
            'authors': ['Mike Wilmot'],
        },
    }, {
        # low quality mp4 is best
        'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
        'info_dict': {
            'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
            'ext': 'mp4',
            'title': 'Ranges for the Standard Library',
            'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
            'duration': 5646,
            'thumbnail': r're:http://.*\.jpg',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
        'info_dict': {
            'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
            'title': 'Channel 9',
        },
        'playlist_count': 2,
    }, {
        'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
        'only_matching': True,
    }, {
        'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
        'only_matching': True,
    }]

    _RSS_URL = 'http://channel9.msdn.com/%s/RSS'

    def _formats_from_html(self, html):
        FORMAT_REGEX = r'''
            (?x)
            <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
            <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
            (?:<div\s+class="popup\s+rounded">\s*
            <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
            </div>)?                                                # File size part may be missing
        '''
        quality = qualities((
            'MP3', 'MP4',
            'Low Quality WMV', 'Low Quality MP4',
            'Mid Quality WMV', 'Mid Quality MP4',
            'High Quality WMV', 'High Quality MP4'))
        formats = [{
            'url': x.group('url'),
            'format_id': x.group('quality'),
            'format_note': x.group('note'),
            'format': '%s (%s)' % (x.group('quality'), x.group('note')),
            'filesize_approx': parse_filesize(x.group('filesize')),
            'quality': quality(x.group('quality')),
            'vcodec': 'none' if x.group('note') == 'Audio only' else None,
        } for x in list(re.finditer(FORMAT_REGEX, html))]

        self._sort_formats(formats)

        return formats

    def _extract_title(self, html):
        title = self._html_search_meta('title', html, 'title')
        if title is None:
            title = self._og_search_title(html)
            TITLE_SUFFIX = ' (Channel 9)'
            if title is not None and title.endswith(TITLE_SUFFIX):
                title = title[:-len(TITLE_SUFFIX)]
        return title

    def _extract_description(self, html):
        DESCRIPTION_REGEX = r'''(?sx)
            <div\s+class="entry-content">\s*
            <div\s+id="entry-body">\s*
            (?P<description>.+?)\s*
            </div>\s*
            </div>
        '''
        m = re.search(DESCRIPTION_REGEX, html)
        if m is not None:
            return m.group('description')
        return self._html_search_meta('description', html, 'description')

    def _extract_duration(self, html):
        m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
        return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None

    def _extract_slides(self, html):
        m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
        return m.group('slidesurl') if m is not None else None

    def _extract_zip(self, html):
        m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
        return m.group('zipurl') if m is not None else None

    def _extract_avg_rating(self, html):
        m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
        return float(m.group('avgrating')) if m is not None else 0

    def _extract_rating_count(self, html):
        m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
        return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0

    def _extract_view_count(self, html):
        m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
        return int(self._fix_count(m.group('viewcount'))) if m is not None else 0

    def _extract_comment_count(self, html):
        m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
        return int(self._fix_count(m.group('commentcount'))) if m is not None else 0

    def _fix_count(self, count):
        return int(str(count).replace(',', '')) if count is not None else None

    def _extract_authors(self, html):
        m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
        if m is None:
            return None
        return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))

    def _extract_session_code(self, html):
        m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
        return m.group('code') if m is not None else None

    def _extract_session_day(self, html):
        m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
        return m.group('day').strip() if m is not None else None

    def _extract_session_room(self, html):
        m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
        return m.group('room') if m is not None else None

    def _extract_session_speakers(self, html):
        return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)

    def _extract_content(self, html, content_path):
        # Look for downloadable content
        formats = self._formats_from_html(html)
        slides = self._extract_slides(html)
        zip_ = self._extract_zip(html)

        # Nothing to download
        if len(formats) == 0 and slides is None and zip_ is None:
            self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
            return

        # Extract meta
        title = self._extract_title(html)
        description = self._extract_description(html)
        thumbnail = self._og_search_thumbnail(html)
        duration = self._extract_duration(html)
        avg_rating = self._extract_avg_rating(html)
        rating_count = self._extract_rating_count(html)
        view_count = self._extract_view_count(html)
        comment_count = self._extract_comment_count(html)

        common = {
            '_type': 'video',
            'id': content_path,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'avg_rating': avg_rating,
            'rating_count': rating_count,
            'view_count': view_count,
            'comment_count': comment_count,
        }

        result = []

        if slides is not None:
            d = common.copy()
            d.update({'title': title + '-Slides', 'url': slides})
            result.append(d)

        if zip_ is not None:
            d = common.copy()
            d.update({'title': title + '-Zip', 'url': zip_})
            result.append(d)

        if len(formats) > 0:
            d = common.copy()
            d.update({'title': title, 'formats': formats})
            result.append(d)

        return result

    def _extract_entry_item(self, html, content_path):
        contents = self._extract_content(html, content_path)
        if contents is None:
            return contents

        if len(contents) > 1:
            raise ExtractorError('Got more than one entry')
        result = contents[0]
        result['authors'] = self._extract_authors(html)

        return result

    def _extract_session(self, html, content_path):
        contents = self._extract_content(html, content_path)
        if contents is None:
            return contents

        session_meta = {
            'session_code': self._extract_session_code(html),
            'session_day': self._extract_session_day(html),
            'session_room': self._extract_session_room(html),
            'session_speakers': self._extract_session_speakers(html),
        }

        for content in contents:
            content.update(session_meta)

        return self.playlist_result(contents)

    def _extract_list(self, video_id, rss_url=None):
        if not rss_url:
            rss_url = self._RSS_URL % video_id
        rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
        entries = [self.url_result(session_url.text, 'Channel9')
                   for session_url in rss.findall('./channel/item/link')]
        title_text = rss.find('./channel/title').text
        return self.playlist_result(entries, video_id, title_text)

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        content_path = mobj.group('contentpath')
        rss = mobj.group('rss')

        if rss:
            return self._extract_list(content_path, url)

        webpage = self._download_webpage(
            url, content_path, 'Downloading web page')

        page_type = self._search_regex(
            r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
            webpage, 'page type', default=None, group='pagetype')
        if page_type:
            if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content
                return self._extract_entry_item(webpage, content_path)
            elif page_type == 'Session':  # Event session page, may contain downloadable content
                return self._extract_session(webpage, content_path)
            elif page_type == 'Event':
                return self._extract_list(content_path)
            else:
                raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
        else:  # Assuming list
            return self._extract_list(content_path)
Commit	Line	Data
adc267ee	1	from __future__ import unicode_literals
df537474	2
	3	import re
	4
	5	from .common import InfoExtractor
1db82381 S	6	from ..utils import (
	7	ExtractorError,
	8	parse_filesize,
	9	qualities,
	10	)
df537474	11
5f6a1245	12
df537474	13	class Channel9IE(InfoExtractor):
	14	'''
	15	Common extractor for channel9.msdn.com.
	16
	17	The type of provided URL (video or playlist) is determined according to
	18	meta Search.PageType from web page HTML rather than URL itself, as it is
adc267ee	19	not always possible to do.
df537474	20	'''
adc267ee	21	IE_DESC = 'Channel 9'
adc267ee	22	IE_NAME = 'channel9'
762d44c9 S	23	_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]\|$)'
	24
	25	_TESTS = [{
	26	'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
	27	'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
	28	'info_dict': {
	29	'id': 'Events/TechEd/Australia/2013/KOS002',
	30	'ext': 'mp4',
	31	'title': 'Developer Kick-Off Session: Stuff We Love',
	32	'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
	33	'duration': 4576,
ec85ded8	34	'thumbnail': r're:http://.*\.jpg',
762d44c9 S	35	'session_code': 'KOS002',
	36	'session_day': 'Day 1',
	37	'session_room': 'Arena 1A',
	38	'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
	39	'Mads Kristensen'],
df537474	40	},
762d44c9 S	41	}, {
	42	'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
	43	'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
	44	'info_dict': {
	45	'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
	46	'ext': 'mp4',
	47	'title': 'Self-service BI with Power BI - nuclear testing',
	48	'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
	49	'duration': 1540,
ec85ded8	50	'thumbnail': r're:http://.*\.jpg',
762d44c9	51	'authors': ['Mike Wilmot'],
a13d06de	52	},
762d44c9 S	53	}, {
	54	# low quality mp4 is best
	55	'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
	56	'info_dict': {
	57	'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
	58	'ext': 'mp4',
	59	'title': 'Ranges for the Standard Library',
	60	'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
	61	'duration': 5646,
ec85ded8	62	'thumbnail': r're:http://.*\.jpg',
762d44c9 S	63	},
	64	'params': {
	65	'skip_download': True,
	66	},
	67	}, {
	68	'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
	69	'info_dict': {
	70	'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
	71	'title': 'Channel 9',
	72	},
	73	'playlist_count': 2,
	74	}, {
	75	'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
	76	'only_matching': True,
	77	}, {
	78	'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
	79	'only_matching': True,
	80	}]
df537474	81
df537474	82	_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
df537474	83
df537474	84	def _formats_from_html(self, html):
	85	FORMAT_REGEX = r'''
	86	(?x)
	87	<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
	88	<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
	89	(?:<div\s+class="popup\s+rounded">\s*
	90	<h3>File\s+size</h3>\s(?P<filesize>.?)\s*
	91	</div>)? # File size part may be missing
	92	'''
1db82381 S	93	quality = qualities((
	94	'MP3', 'MP4',
	95	'Low Quality WMV', 'Low Quality MP4',
	96	'Mid Quality WMV', 'Mid Quality MP4',
	97	'High Quality WMV', 'High Quality MP4'))
a1b92edb PH	98	formats = [{
	99	'url': x.group('url'),
	100	'format_id': x.group('quality'),
	101	'format_note': x.group('note'),
adc267ee	102	'format': '%s (%s)' % (x.group('quality'), x.group('note')),
1db82381 S	103	'filesize_approx': parse_filesize(x.group('filesize')),
1db82381 S	104	'quality': quality(x.group('quality')),
a1b92edb	105	'vcodec': 'none' if x.group('note') == 'Audio only' else None,
1db82381	106	} for x in list(re.finditer(FORMAT_REGEX, html))]
a1b92edb PH	107
	108	self._sort_formats(formats)
	109
df537474	110	return formats
df537474	111
df537474	112	def _extract_title(self, html):
adc267ee	113	title = self._html_search_meta('title', html, 'title')
a316a83d	114	if title is None:
df537474	115	title = self._og_search_title(html)
adc267ee	116	TITLE_SUFFIX = ' (Channel 9)'
df537474	117	if title is not None and title.endswith(TITLE_SUFFIX):
	118	title = title[:-len(TITLE_SUFFIX)]
	119	return title
	120
	121	def _extract_description(self, html):
	122	DESCRIPTION_REGEX = r'''(?sx)
	123	<div\s+class="entry-content">\s*
	124	<div\s+id="entry-body">\s*
	125	(?P<description>.+?)\s*
	126	</div>\s*
	127	</div>
	128	'''
	129	m = re.search(DESCRIPTION_REGEX, html)
	130	if m is not None:
	131	return m.group('description')
adc267ee	132	return self._html_search_meta('description', html, 'description')
df537474	133
df537474	134	def _extract_duration(self, html):
a316a83d	135	m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
df537474	136	return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
	137
	138	def _extract_slides(self, html):
	139	m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
	140	return m.group('slidesurl') if m is not None else None
	141
	142	def _extract_zip(self, html):
	143	m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
	144	return m.group('zipurl') if m is not None else None
	145
	146	def _extract_avg_rating(self, html):
	147	m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
	148	return float(m.group('avgrating')) if m is not None else 0
	149
	150	def _extract_rating_count(self, html):
	151	m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
	152	return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
	153
	154	def _extract_view_count(self, html):
	155	m = re.search(r'<li class="views">\s<span class="count">(?P<viewcount>[^<]+)</span> Views\s</li>', html)
	156	return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
	157
	158	def _extract_comment_count(self, html):
	159	m = re.search(r'<li class="comments">\s<a href="#comments">\s<span class="count">(?P<commentcount>[^<]+)</span> Comments\s</a>\s</li>', html)
	160	return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
	161
	162	def _fix_count(self, count):
	163	return int(str(count).replace(',', '')) if count is not None else None
	164
	165	def _extract_authors(self, html):
	166	m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
	167	if m is None:
	168	return None
	169	return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
	170
	171	def _extract_session_code(self, html):
	172	m = re.search(r'<li class="code">\s(?P<code>.+?)\s</li>', html)
	173	return m.group('code') if m is not None else None
	174
	175	def _extract_session_day(self, html):
	176	m = re.search(r'<li class="day">\s<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s</li>', html)
506e261d	177	return m.group('day').strip() if m is not None else None
df537474	178
	179	def _extract_session_room(self, html):
	180	m = re.search(r'<li class="room">\s(?P<room>.+?)\s</li>', html)
	181	return m.group('room') if m is not None else None
	182
	183	def _extract_session_speakers(self, html):
	184	return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
	185
	186	def _extract_content(self, html, content_path):
a316a83d	187	# Look for downloadable content
df537474	188	formats = self._formats_from_html(html)
	189	slides = self._extract_slides(html)
	190	zip_ = self._extract_zip(html)
	191
	192	# Nothing to download
	193	if len(formats) == 0 and slides is None and zip_ is None:
adc267ee	194	self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
df537474	195	return
	196
	197	# Extract meta
	198	title = self._extract_title(html)
	199	description = self._extract_description(html)
	200	thumbnail = self._og_search_thumbnail(html)
	201	duration = self._extract_duration(html)
	202	avg_rating = self._extract_avg_rating(html)
	203	rating_count = self._extract_rating_count(html)
	204	view_count = self._extract_view_count(html)
	205	comment_count = self._extract_comment_count(html)
	206
b74e86f4 PH	207	common = {
	208	'_type': 'video',
	209	'id': content_path,
	210	'description': description,
	211	'thumbnail': thumbnail,
	212	'duration': duration,
	213	'avg_rating': avg_rating,
	214	'rating_count': rating_count,
	215	'view_count': view_count,
	216	'comment_count': comment_count,
	217	}
df537474	218
	219	result = []
	220
	221	if slides is not None:
	222	d = common.copy()
5f6a1245	223	d.update({'title': title + '-Slides', 'url': slides})
df537474	224	result.append(d)
	225
	226	if zip_ is not None:
	227	d = common.copy()
5f6a1245	228	d.update({'title': title + '-Zip', 'url': zip_})
df537474	229	result.append(d)
	230
	231	if len(formats) > 0:
	232	d = common.copy()
5f6a1245	233	d.update({'title': title, 'formats': formats})
df537474	234	result.append(d)
	235
	236	return result
	237
	238	def _extract_entry_item(self, html, content_path):
	239	contents = self._extract_content(html, content_path)
	240	if contents is None:
	241	return contents
	242
b30c4992 JMF	243	if len(contents) > 1:
	244	raise ExtractorError('Got more than one entry')
	245	result = contents[0]
	246	result['authors'] = self._extract_authors(html)
df537474	247
b30c4992	248	return result
df537474	249
	250	def _extract_session(self, html, content_path):
	251	contents = self._extract_content(html, content_path)
	252	if contents is None:
	253	return contents
	254
025f30ba PH	255	session_meta = {
	256	'session_code': self._extract_session_code(html),
	257	'session_day': self._extract_session_day(html),
	258	'session_room': self._extract_session_room(html),
	259	'session_speakers': self._extract_session_speakers(html),
	260	}
df537474	261
	262	for content in contents:
	263	content.update(session_meta)
	264
025f30ba	265	return self.playlist_result(contents)
df537474	266
762d44c9 S	267	def _extract_list(self, video_id, rss_url=None):
	268	if not rss_url:
	269	rss_url = self._RSS_URL % video_id
	270	rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
4d2ebb6b	271	entries = [self.url_result(session_url.text, 'Channel9')
	272	for session_url in rss.findall('./channel/item/link')]
	273	title_text = rss.find('./channel/title').text
762d44c9	274	return self.playlist_result(entries, video_id, title_text)
df537474	275
	276	def _real_extract(self, url):
	277	mobj = re.match(self._VALID_URL, url)
	278	content_path = mobj.group('contentpath')
762d44c9 S	279	rss = mobj.group('rss')
	280
	281	if rss:
	282	return self._extract_list(content_path, url)
df537474	283
762d44c9 S	284	webpage = self._download_webpage(
762d44c9 S	285	url, content_path, 'Downloading web page')
df537474	286
762d44c9 S	287	page_type = self._search_regex(
	288	r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
	289	webpage, 'page type', default=None, group='pagetype')
	290	if page_type:
a316a83d	291	if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
	292	return self._extract_entry_item(webpage, content_path)
	293	elif page_type == 'Session': # Event session page, may contain downloadable content
	294	return self._extract_session(webpage, content_path)
	295	elif page_type == 'Event':
	296	return self._extract_list(content_path)
	297	else:
	298	raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
5f6a1245	299	else: # Assuming list
df537474	300	return self._extract_list(content_path)