]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/common.py
[cleanup] Minor fixes
[yt-dlp.git] / yt_dlp / extractor / common.py
index ebeca4395166f934104a48c942d6d1acb981302d..6a451c20bba2c8e65ae9f483d7a8ff16ea5e1342 100644 (file)
@@ -1343,7 +1343,7 @@ def _og_search_url(self, html, **kargs):
         return self._og_search_property('url', html, **kargs)
 
     def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
-        return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+        return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
 
     def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
         name = variadic(name)
@@ -1509,8 +1509,9 @@ def extract_video_object(e):
                 'url': url_or_none(e.get('contentUrl')),
                 'title': unescapeHTML(e.get('name')),
                 'description': unescapeHTML(e.get('description')),
-                'thumbnails': [{'url': url_or_none(url)}
-                               for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
+                'thumbnails': [{'url': url}
+                               for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+                               if url_or_none(url)],
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
                 # author can be an instance of 'Organization' or 'Person' types.
@@ -2803,13 +2804,18 @@ def extract_Initialization(source):
                     mime_type = representation_attrib['mimeType']
                     content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
 
-                    codecs = parse_codecs(representation_attrib.get('codecs', ''))
+                    codec_str = representation_attrib.get('codecs', '')
+                    # Some kind of binary subtitle found in some youtube livestreams
+                    if mime_type == 'application/x-rawcc':
+                        codecs = {'scodec': codec_str}
+                    else:
+                        codecs = parse_codecs(codec_str)
                     if content_type not in ('video', 'audio', 'text'):
                         if mime_type == 'image/jpeg':
                             content_type = mime_type
-                        elif codecs['vcodec'] != 'none':
+                        elif codecs.get('vcodec', 'none') != 'none':
                             content_type = 'video'
-                        elif codecs['acodec'] != 'none':
+                        elif codecs.get('acodec', 'none') != 'none':
                             content_type = 'audio'
                         elif codecs.get('scodec', 'none') != 'none':
                             content_type = 'text'