]> jfr.im git - yt-dlp.git/blame - devscripts/make_lazy_extractors.py
[extractor] Framework for embed detection (#4307)
[yt-dlp.git] / devscripts / make_lazy_extractors.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
54007a45 2
3# Allow direct execution
21633673 4import os
779822d9
JMF
5import sys
6
e5a998f3 7sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
779822d9 8
779822d9 9
54007a45 10import optparse
11from inspect import getsource
12
82d02080 13NO_ATTR = object()
8f97a15d 14STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit']
82d02080 15CLASS_METHODS = [
24146491 16 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
82d02080 17]
18IE_TEMPLATE = '''
169d836f 19class {name}({bases}):
82d02080 20 _module = {module!r}
779822d9 21'''
82d02080 22with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
23 MODULE_TEMPLATE = f.read()
24
25
26def main():
27 parser = optparse.OptionParser(usage='%prog [OUTFILE.py]')
28 args = parser.parse_args()[1] or ['yt_dlp/extractor/lazy_extractors.py']
29 if len(args) != 1:
30 parser.error('Expected only an output filename')
31
32 lazy_extractors_filename = args[0]
33 if os.path.exists(lazy_extractors_filename):
34 os.remove(lazy_extractors_filename)
35
36 _ALL_CLASSES = get_all_ies() # Must be before import
37
38 from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
39
40 DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
41 module_src = '\n'.join((
42 MODULE_TEMPLATE,
43 ' _module = None',
44 *extra_ie_code(DummyInfoExtractor),
45 '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
46 *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
47 ))
48
49 with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
50 f.write(f'{module_src}\n')
51
52
53def get_all_ies():
54 PLUGINS_DIRNAME = 'ytdlp_plugins'
55 BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
56 if os.path.exists(PLUGINS_DIRNAME):
57 os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
58 try:
560738f3 59 from yt_dlp.extractor.extractors import _ALL_CLASSES
82d02080 60 finally:
61 if os.path.exists(BLOCKED_DIRNAME):
62 os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
63 return _ALL_CLASSES
64
65
66def extra_ie_code(ie, base=None):
67 for var in STATIC_CLASS_PROPERTIES:
68 val = getattr(ie, var)
69 if val != (getattr(base, var) if base else NO_ATTR):
70 yield f' {var} = {val!r}'
71 yield ''
72
73 for name in CLASS_METHODS:
74 f = getattr(ie, name)
75 if not base or f.__func__ != getattr(base, name).__func__:
76 yield getsource(f)
77
78
79def build_ies(ies, bases, attr_base):
80 names = []
81 for ie in sort_ies(ies, bases):
82 yield build_lazy_ie(ie, ie.__name__, attr_base)
83 if ie in ies:
84 names.append(ie.__name__)
85
86 yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
87
88
89def sort_ies(ies, ignored_bases):
90 """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
91 classes, returned_classes = ies[:-1], set()
92 assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
93 while classes:
94 for c in classes[:]:
95 bases = set(c.__bases__) - {object, *ignored_bases}
96 restart = False
5b836d47 97 for b in sorted(bases, key=lambda x: x.__name__):
82d02080 98 if b not in classes and b not in returned_classes:
99 assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
100 classes.insert(0, b)
101 restart = True
102 if restart:
103 break
104 if bases <= returned_classes:
105 yield c
106 returned_classes.add(c)
107 classes.remove(c)
108 break
109 yield ies[-1]
110
111
112def build_lazy_ie(ie, name, attr_base):
113 bases = ', '.join({
114 'InfoExtractor': 'LazyLoadExtractor',
115 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
116 }.get(base.__name__, base.__name__) for base in ie.__bases__)
117
118 s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
82d02080 119 return s + '\n'.join(extra_ie_code(ie, attr_base))
120
121
122if __name__ == '__main__':
123 main()