]>
jfr.im git - yt-dlp.git/blob - yt_dlp/downloader/mhtml.py
6 from .fragment
import FragmentFD
7 from ..utils
import escapeHTML
, formatSeconds
, srt_subtitles_timecode
, urljoin
8 from ..version
import __version__
as YT_DLP_VERSION
11 class MhtmlFD(FragmentFD
):
21 scroll-snap-type: y mandatory;
25 scroll-snap-type: y mandatory;
33 scroll-snap-align: center;
36 body > figure > figcaption {
45 max-height: calc(100vh - 5em);
48 _STYLESHEET
= re
.sub(r
'\s+', ' ', _STYLESHEET
)
49 _STYLESHEET
= re
.sub(r
'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET
)
53 return '=?utf-8?Q?' + (b
''.join(
54 bytes((b
,)) if b
>= 0x20 else b
'=%02X' % b
55 for b
in quopri
.encodestring(s
.encode(), header
=True)
56 )).decode('us-ascii') + '?='
58 def _gen_cid(self
, i
, fragment
, frag_boundary
):
59 return '%u.%s@yt-dlp.github.io.invalid' % (i
, frag_boundary
)
61 def _gen_stub(self
, *, fragments
, frag_boundary
, title
):
62 output
= io
.StringIO()
68 '' '<meta name="generator" content="yt-dlp {version}">'
69 '' '<title>{title}</title>'
70 '' '<style>{styles}</style>'
73 version
=escapeHTML(YT_DLP_VERSION
),
74 styles
=self
._STYLESHEET
,
75 title
=escapeHTML(title
)
79 for i
, frag
in enumerate(fragments
):
80 output
.write('<figure>')
82 t1
= t0
+ frag
['duration']
84 '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
87 t0
=srt_subtitles_timecode(t0
),
88 t1
=srt_subtitles_timecode(t1
),
89 duration
=formatSeconds(frag
['duration'], msec
=True)
91 except (KeyError, ValueError, TypeError):
94 '<figcaption>Slide #{num}</figcaption>'
96 output
.write('<img src="cid:{cid}">'.format(
97 cid
=self
._gen
_cid
(i
, frag
, frag_boundary
)))
98 output
.write('</figure>')
101 return output
.getvalue()
103 def real_download(self
, filename
, info_dict
):
104 fragment_base_url
= info_dict
.get('fragment_base_url')
105 fragments
= info_dict
['fragments'][:1] if self
.params
.get(
106 'test', False) else info_dict
['fragments']
107 title
= info_dict
.get('title', info_dict
['format_id'])
108 origin
= info_dict
.get('webpage_url', info_dict
['url'])
111 'filename': filename
,
112 'total_frags': len(fragments
),
115 self
._prepare
_and
_start
_frag
_download
(ctx
, info_dict
)
117 extra_state
= ctx
.setdefault('extra_state', {
118 'header_written': False,
119 'mime_boundary': str(uuid
.uuid4()).replace('-', ''),
122 frag_boundary
= extra_state
['mime_boundary']
124 if not extra_state
['header_written']:
125 stub
= self
._gen
_stub
(
127 frag_boundary
=frag_boundary
,
131 ctx
['dest_stream'].write((
132 'MIME-Version: 1.0\r\n'
133 'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
134 'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
135 'Subject: {title}\r\n'
136 'Content-type: multipart/related; '
137 '' 'boundary="{boundary}"; '
138 '' 'type="text/html"\r\n'
139 'X.yt-dlp.Origin: {origin}\r\n'
142 'Content-Type: text/html; charset=utf-8\r\n'
143 'Content-Length: {length}\r\n'
148 boundary
=frag_boundary
,
150 title
=self
._escape
_mime
(title
),
153 extra_state
['header_written'] = True
155 for i
, fragment
in enumerate(fragments
):
156 if (i
+ 1) <= ctx
['fragment_index']:
159 fragment_url
= fragment
.get('url')
161 assert fragment_base_url
162 fragment_url
= urljoin(fragment_base_url
, fragment
['path'])
164 success
= self
._download
_fragment
(ctx
, fragment_url
, info_dict
)
167 frag_content
= self
._read
_fragment
(ctx
)
169 mime_type
= b
'image/jpeg'
170 if frag_content
.startswith(b
'\x89PNG\r\n\x1a\n'):
171 mime_type
= b
'image/png'
172 if frag_content
.startswith((b
'GIF87a', b
'GIF89a')):
173 mime_type
= b
'image/gif'
174 if frag_content
.startswith(b
'RIFF') and frag_content
[8:12] == b
'WEBP':
175 mime_type
= b
'image/webp'
177 frag_header
= io
.BytesIO()
179 b
'--%b\r\n' % frag_boundary
.encode('us-ascii'))
181 b
'Content-ID: <%b>\r\n' % self
._gen
_cid
(i
, fragment
, frag_boundary
).encode('us-ascii'))
183 b
'Content-type: %b\r\n' % mime_type
)
185 b
'Content-length: %u\r\n' % len(frag_content
))
187 b
'Content-location: %b\r\n' % fragment_url
.encode('us-ascii'))
189 b
'X.yt-dlp.Duration: %f\r\n' % fragment
['duration'])
190 frag_header
.write(b
'\r\n')
191 self
._append
_fragment
(
192 ctx
, frag_header
.getvalue() + frag_content
+ b
'\r\n')
194 ctx
['dest_stream'].write(
195 b
'--%b--\r\n\r\n' % frag_boundary
.encode('us-ascii'))
196 self
._finish
_frag
_download
(ctx
, info_dict
)