]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/bbc.py
[extractor/generic] Remove HEAD request
[yt-dlp.git] / yt_dlp / extractor / bbc.py
index 8231557300866ff071b7219ffda0be9186334daf..5ddeef7b5dce2a3ac17dd65e60694a677a530ca4 100644 (file)
@@ -1,19 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import functools
 import itertools
 import json
 import re
+import urllib.error
+import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_etree_Element,
-    compat_HTTPError,
-    compat_str,
-    compat_urllib_error,
-    compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
 from ..utils import (
     ExtractorError,
     OnDemandPagedList,
@@ -318,7 +311,7 @@ def _get_subtitles(self, media, programme_id):
                 continue
             captions = self._download_xml(
                 cc_url, programme_id, 'Downloading captions', fatal=False)
-            if not isinstance(captions, compat_etree_Element):
+            if not isinstance(captions, xml.etree.ElementTree.Element):
                 continue
             subtitles['en'] = [
                 {
@@ -394,7 +387,7 @@ def _process_media_selector(self, media_selection, programme_id):
                                 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                                 m3u8_id=format_id, fatal=False)
                         except ExtractorError as e:
-                            if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
+                            if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
                                     and e.exc_info[1].code in (403, 404)):
                                 raise
                             fmts = []
@@ -906,9 +899,8 @@ def _real_extract(self, url):
 
         playlist_title = json_ld_info.get('title')
         if not playlist_title:
-            playlist_title = self._og_search_title(
-                webpage, default=None) or self._html_search_regex(
-                r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+            playlist_title = (self._og_search_title(webpage, default=None)
+                              or self._html_extract_title(webpage, 'playlist title', default=None))
             if playlist_title:
                 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()