]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/mhtml.py
[fd/dash] Force native downloader for `--live-from-start` (#8339)
[yt-dlp.git] / yt_dlp / downloader / mhtml.py
CommitLineData
cdb19aa4 1import io
2import quopri
3import re
4import uuid
5
6from .fragment import FragmentFD
b4daacb4 7from ..compat import imghdr
f8271158 8from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin
cdb19aa4 9from ..version import __version__ as YT_DLP_VERSION
10
11
12class MhtmlFD(FragmentFD):
cdb19aa4 13 _STYLESHEET = """\
14html, body {
15 margin: 0;
16 padding: 0;
17 height: 100vh;
18}
19
20html {
21 overflow-y: scroll;
22 scroll-snap-type: y mandatory;
23}
24
25body {
26 scroll-snap-type: y mandatory;
27 display: flex;
28 flex-flow: column;
29}
30
31body > figure {
32 max-width: 100vw;
33 max-height: 100vh;
34 scroll-snap-align: center;
35}
36
37body > figure > figcaption {
38 text-align: center;
39 height: 2.5em;
40}
41
42body > figure > img {
43 display: block;
44 margin: auto;
45 max-width: 100%;
46 max-height: calc(100vh - 5em);
47}
48"""
49 _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
50 _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
51
52 @staticmethod
53 def _escape_mime(s):
54 return '=?utf-8?Q?' + (b''.join(
55 bytes((b,)) if b >= 0x20 else b'=%02X' % b
0f06bcd7 56 for b in quopri.encodestring(s.encode(), header=True)
cdb19aa4 57 )).decode('us-ascii') + '?='
58
59 def _gen_cid(self, i, fragment, frag_boundary):
60 return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
61
62 def _gen_stub(self, *, fragments, frag_boundary, title):
63 output = io.StringIO()
64
65 output.write((
66 '<!DOCTYPE html>'
67 '<html>'
68 '<head>'
69 '' '<meta name="generator" content="yt-dlp {version}">'
70 '' '<title>{title}</title>'
71 '' '<style>{styles}</style>'
72 '<body>'
73 ).format(
74 version=escapeHTML(YT_DLP_VERSION),
75 styles=self._STYLESHEET,
76 title=escapeHTML(title)
77 ))
78
79 t0 = 0
80 for i, frag in enumerate(fragments):
81 output.write('<figure>')
82 try:
83 t1 = t0 + frag['duration']
84 output.write((
85 '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
86 ).format(
87 num=i + 1,
88 t0=srt_subtitles_timecode(t0),
89 t1=srt_subtitles_timecode(t1),
90 duration=formatSeconds(frag['duration'], msec=True)
91 ))
92 except (KeyError, ValueError, TypeError):
93 t1 = None
94 output.write((
95 '<figcaption>Slide #{num}</figcaption>'
96 ).format(num=i + 1))
97 output.write('<img src="cid:{cid}">'.format(
98 cid=self._gen_cid(i, frag, frag_boundary)))
99 output.write('</figure>')
100 t0 = t1
101
102 return output.getvalue()
103
104 def real_download(self, filename, info_dict):
105 fragment_base_url = info_dict.get('fragment_base_url')
106 fragments = info_dict['fragments'][:1] if self.params.get(
107 'test', False) else info_dict['fragments']
d76991ab 108 title = info_dict.get('title', info_dict['format_id'])
109 origin = info_dict.get('webpage_url', info_dict['url'])
cdb19aa4 110
111 ctx = {
112 'filename': filename,
113 'total_frags': len(fragments),
114 }
115
3ba7740d 116 self._prepare_and_start_frag_download(ctx, info_dict)
cdb19aa4 117
118 extra_state = ctx.setdefault('extra_state', {
119 'header_written': False,
120 'mime_boundary': str(uuid.uuid4()).replace('-', ''),
121 })
122
123 frag_boundary = extra_state['mime_boundary']
124
125 if not extra_state['header_written']:
126 stub = self._gen_stub(
127 fragments=fragments,
128 frag_boundary=frag_boundary,
129 title=title
130 )
131
132 ctx['dest_stream'].write((
133 'MIME-Version: 1.0\r\n'
134 'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
135 'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
136 'Subject: {title}\r\n'
137 'Content-type: multipart/related; '
138 '' 'boundary="{boundary}"; '
139 '' 'type="text/html"\r\n'
140 'X.yt-dlp.Origin: {origin}\r\n'
141 '\r\n'
142 '--{boundary}\r\n'
143 'Content-Type: text/html; charset=utf-8\r\n'
144 'Content-Length: {length}\r\n'
145 '\r\n'
146 '{stub}\r\n'
147 ).format(
148 origin=origin,
149 boundary=frag_boundary,
150 length=len(stub),
151 title=self._escape_mime(title),
152 stub=stub
0f06bcd7 153 ).encode())
cdb19aa4 154 extra_state['header_written'] = True
155
156 for i, fragment in enumerate(fragments):
157 if (i + 1) <= ctx['fragment_index']:
158 continue
159
b3edc806 160 fragment_url = fragment.get('url')
161 if not fragment_url:
162 assert fragment_base_url
163 fragment_url = urljoin(fragment_base_url, fragment['path'])
164
d71fd412 165 success = self._download_fragment(ctx, fragment_url, info_dict)
cdb19aa4 166 if not success:
167 continue
d71fd412 168 frag_content = self._read_fragment(ctx)
cdb19aa4 169
cdb19aa4 170 frag_header = io.BytesIO()
171 frag_header.write(
172 b'--%b\r\n' % frag_boundary.encode('us-ascii'))
173 frag_header.write(
174 b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
175 frag_header.write(
b4daacb4 176 b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode())
cdb19aa4 177 frag_header.write(
178 b'Content-length: %u\r\n' % len(frag_content))
179 frag_header.write(
180 b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
181 frag_header.write(
182 b'X.yt-dlp.Duration: %f\r\n' % fragment['duration'])
183 frag_header.write(b'\r\n')
184 self._append_fragment(
185 ctx, frag_header.getvalue() + frag_content + b'\r\n')
186
187 ctx['dest_stream'].write(
188 b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
814bba39 189 return self._finish_frag_download(ctx, info_dict)