]>
jfr.im git - yt-dlp.git/blob - yt_dlp/downloader/mhtml.py
6 from .fragment
import FragmentFD
7 from ..utils
import escapeHTML
, formatSeconds
, srt_subtitles_timecode
, urljoin
8 from ..version
import __version__
as YT_DLP_VERSION
11 class MhtmlFD(FragmentFD
):
23 scroll-snap-type: y mandatory;
27 scroll-snap-type: y mandatory;
35 scroll-snap-align: center;
38 body > figure > figcaption {
47 max-height: calc(100vh - 5em);
50 _STYLESHEET
= re
.sub(r
'\s+', ' ', _STYLESHEET
)
51 _STYLESHEET
= re
.sub(r
'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET
)
55 return '=?utf-8?Q?' + (b
''.join(
56 bytes((b
,)) if b
>= 0x20 else b
'=%02X' % b
57 for b
in quopri
.encodestring(s
.encode('utf-8'), header
=True)
58 )).decode('us-ascii') + '?='
60 def _gen_cid(self
, i
, fragment
, frag_boundary
):
61 return '%u.%s@yt-dlp.github.io.invalid' % (i
, frag_boundary
)
63 def _gen_stub(self
, *, fragments
, frag_boundary
, title
):
64 output
= io
.StringIO()
70 '' '<meta name="generator" content="yt-dlp {version}">'
71 '' '<title>{title}</title>'
72 '' '<style>{styles}</style>'
75 version
=escapeHTML(YT_DLP_VERSION
),
76 styles
=self
._STYLESHEET
,
77 title
=escapeHTML(title
)
81 for i
, frag
in enumerate(fragments
):
82 output
.write('<figure>')
84 t1
= t0
+ frag
['duration']
86 '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
89 t0
=srt_subtitles_timecode(t0
),
90 t1
=srt_subtitles_timecode(t1
),
91 duration
=formatSeconds(frag
['duration'], msec
=True)
93 except (KeyError, ValueError, TypeError):
96 '<figcaption>Slide #{num}</figcaption>'
98 output
.write('<img src="cid:{cid}">'.format(
99 cid
=self
._gen
_cid
(i
, frag
, frag_boundary
)))
100 output
.write('</figure>')
103 return output
.getvalue()
105 def real_download(self
, filename
, info_dict
):
106 fragment_base_url
= info_dict
.get('fragment_base_url')
107 fragments
= info_dict
['fragments'][:1] if self
.params
.get(
108 'test', False) else info_dict
['fragments']
109 title
= info_dict
.get('title', info_dict
['format_id'])
110 origin
= info_dict
.get('webpage_url', info_dict
['url'])
113 'filename': filename
,
114 'total_frags': len(fragments
),
117 self
._prepare
_and
_start
_frag
_download
(ctx
, info_dict
)
119 extra_state
= ctx
.setdefault('extra_state', {
120 'header_written': False,
121 'mime_boundary': str(uuid
.uuid4()).replace('-', ''),
124 frag_boundary
= extra_state
['mime_boundary']
126 if not extra_state
['header_written']:
127 stub
= self
._gen
_stub
(
129 frag_boundary
=frag_boundary
,
133 ctx
['dest_stream'].write((
134 'MIME-Version: 1.0\r\n'
135 'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
136 'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
137 'Subject: {title}\r\n'
138 'Content-type: multipart/related; '
139 '' 'boundary="{boundary}"; '
140 '' 'type="text/html"\r\n'
141 'X.yt-dlp.Origin: {origin}\r\n'
144 'Content-Type: text/html; charset=utf-8\r\n'
145 'Content-Length: {length}\r\n'
150 boundary
=frag_boundary
,
152 title
=self
._escape
_mime
(title
),
155 extra_state
['header_written'] = True
157 for i
, fragment
in enumerate(fragments
):
158 if (i
+ 1) <= ctx
['fragment_index']:
161 fragment_url
= fragment
.get('url')
163 assert fragment_base_url
164 fragment_url
= urljoin(fragment_base_url
, fragment
['path'])
166 success
= self
._download
_fragment
(ctx
, fragment_url
, info_dict
)
169 frag_content
= self
._read
_fragment
(ctx
)
171 mime_type
= b
'image/jpeg'
172 if frag_content
.startswith(b
'\x89PNG\r\n\x1a\n'):
173 mime_type
= b
'image/png'
174 if frag_content
.startswith((b
'GIF87a', b
'GIF89a')):
175 mime_type
= b
'image/gif'
176 if frag_content
.startswith(b
'RIFF') and frag_content
[8:12] == 'WEBP':
177 mime_type
= b
'image/webp'
179 frag_header
= io
.BytesIO()
181 b
'--%b\r\n' % frag_boundary
.encode('us-ascii'))
183 b
'Content-ID: <%b>\r\n' % self
._gen
_cid
(i
, fragment
, frag_boundary
).encode('us-ascii'))
185 b
'Content-type: %b\r\n' % mime_type
)
187 b
'Content-length: %u\r\n' % len(frag_content
))
189 b
'Content-location: %b\r\n' % fragment_url
.encode('us-ascii'))
191 b
'X.yt-dlp.Duration: %f\r\n' % fragment
['duration'])
192 frag_header
.write(b
'\r\n')
193 self
._append
_fragment
(
194 ctx
, frag_header
.getvalue() + frag_content
+ b
'\r\n')
196 ctx
['dest_stream'].write(
197 b
'--%b--\r\n\r\n' % frag_boundary
.encode('us-ascii'))
198 self
._finish
_frag
_download
(ctx
, info_dict
)