bpo-32248 - Implement ResourceReader
and get_resource_reader()
f… · python/cpython@6f6eb35 (original) (raw)
`@@ -12,7 +12,7 @@
`
12
12
`from typing import Iterator, Optional, Set, Union # noqa: F401
`
13
13
`from typing import cast
`
14
14
`from typing.io import BinaryIO, TextIO
`
15
``
`-
from zipfile import ZipFile
`
``
15
`+
from zipimport import ZipImportError
`
16
16
``
17
17
``
18
18
`Package = Union[str, ModuleType]
`
`@@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
`
216
216
`# contents doesn't necessarily mean it's a resource. Directories are not
`
217
217
`# resources, so let's try to find out if it's a directory or not.
`
218
218
`path = Path(package.spec.origin).parent / name
`
219
``
`-
if path.is_file():
`
220
``
`-
return True
`
221
``
`-
if path.is_dir():
`
222
``
`-
return False
`
223
``
`-
If it's not a file and it's not a directory, what is it? Well, this
`
224
``
`-
means the file doesn't exist on the file system, so it probably lives
`
225
``
`-
inside a zip file. We have to crack open the zip, look at its table of
`
226
``
`-
contents, and make sure that this entry doesn't have sub-entries.
`
227
``
`-
archive_path = package.spec.loader.archive # type: ignore
`
228
``
`-
package_directory = Path(package.spec.origin).parent
`
229
``
`-
with ZipFile(archive_path) as zf:
`
230
``
`-
toc = zf.namelist()
`
231
``
`-
relpath = package_directory.relative_to(archive_path)
`
232
``
`-
candidate_path = relpath / name
`
233
``
`-
for entry in toc:
`
234
``
`-
try:
`
235
``
`-
relative_to_candidate = Path(entry).relative_to(candidate_path)
`
236
``
`-
except ValueError:
`
237
``
`-
The two paths aren't relative to each other so we can ignore it.
`
238
``
`-
continue
`
239
``
`-
Since directories aren't explicitly listed in the zip file, we must
`
240
``
`-
infer their 'directory-ness' by looking at the number of path
`
241
``
`-
components in the path relative to the package resource we're
`
242
``
`-
looking up. If there are zero additional parts, it's a file, i.e. a
`
243
``
`-
resource. If there are more than zero it's a directory, i.e. not a
`
244
``
`-
resource. It has to be one of these two cases.
`
245
``
`-
return len(relative_to_candidate.parts) == 0
`
246
``
`-
I think it's impossible to get here. It would mean that we are looking
`
247
``
`-
for a resource in a zip file, there's an entry matching it in the return
`
248
``
`-
value of contents(), but we never actually found it in the zip's table of
`
249
``
`-
contents.
`
250
``
`-
raise AssertionError('Impossible situation')
`
``
219
`+
return path.is_file()
`
251
220
``
252
221
``
253
222
`def contents(package: Package) -> Iterator[str]:
`
`@@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
`
268
237
`not package.spec.has_location):
`
269
238
`return []
`
270
239
`package_directory = Path(package.spec.origin).parent
`
271
``
`-
try:
`
272
``
`-
yield from os.listdir(str(package_directory))
`
273
``
`-
except (NotADirectoryError, FileNotFoundError):
`
274
``
`-
The package is probably in a zip file.
`
275
``
`-
archive_path = getattr(package.spec.loader, 'archive', None)
`
276
``
`-
if archive_path is None:
`
277
``
`-
raise
`
278
``
`-
relpath = package_directory.relative_to(archive_path)
`
279
``
`-
with ZipFile(archive_path) as zf:
`
280
``
`-
toc = zf.namelist()
`
281
``
`-
subdirs_seen = set() # type: Set
`
282
``
`-
for filename in toc:
`
283
``
`-
path = Path(filename)
`
284
``
`-
Strip off any path component parts that are in common with the
`
285
``
`-
package directory, relative to the zip archive's file system
`
286
``
`-
path. This gives us all the parts that live under the named
`
287
``
`-
package inside the zip file. If the length of these subparts is
`
288
``
`-
exactly 1, then it is situated inside the package. The resulting
`
289
``
`-
length will be 0 if it's above the package, and it will be
`
290
``
`-
greater than 1 if it lives in a subdirectory of the package
`
291
``
`-
directory.
`
292
``
`-
`
293
``
`-
However, since directories themselves don't appear in the zip
`
294
``
`-
archive as a separate entry, we need to return the first path
`
295
``
`-
component for any case that has > 1 subparts -- but only once!
`
296
``
`-
if path.parts[:len(relpath.parts)] != relpath.parts:
`
``
240
`+
yield from os.listdir(str(package_directory))
`
``
241
+
``
242
+
``
243
`+
Private implementation of ResourceReader and get_resource_reader() for
`
``
244
`+
zipimport. Don't use these directly! We're implementing these in Python
`
``
245
`+
because 1) it's easier, 2) zipimport will likely get rewritten in Python
`
``
246
`+
itself at some point, so doing this all in C would just be a waste of
`
``
247
`+
effort.
`
``
248
+
``
249
`+
class _ZipImportResourceReader(resources_abc.ResourceReader):
`
``
250
`+
"""Private class used to support ZipImport.get_resource_reader().
`
``
251
+
``
252
`+
This class is allowed to reference all the innards and private parts of
`
``
253
`+
the zipimporter.
`
``
254
`+
"""
`
``
255
+
``
256
`+
def init(self, zipimporter, fullname):
`
``
257
`+
self.zipimporter = zipimporter
`
``
258
`+
self.fullname = fullname
`
``
259
+
``
260
`+
def open_resource(self, resource):
`
``
261
`+
path = f'{self.fullname}/{resource}'
`
``
262
`+
try:
`
``
263
`+
return BytesIO(self.zipimporter.get_data(path))
`
``
264
`+
except OSError:
`
``
265
`+
raise FileNotFoundError
`
``
266
+
``
267
`+
def resource_path(self, resource):
`
``
268
`+
All resources are in the zip file, so there is no path to the file.
`
``
269
`+
Raising FileNotFoundError tells the higher level API to extract the
`
``
270
`+
binary data and create a temporary file.
`
``
271
`+
raise FileNotFoundError
`
``
272
+
``
273
`+
def is_resource(self, name):
`
``
274
`+
Maybe we could do better, but if we can get the data, it's a
`
``
275
`+
resource. Otherwise it isn't.
`
``
276
`+
path = f'{self.fullname}/{name}'
`
``
277
`+
try:
`
``
278
`+
self.zipimporter.get_data(path)
`
``
279
`+
except OSError:
`
``
280
`+
return False
`
``
281
`+
return True
`
``
282
+
``
283
`+
def contents(self):
`
``
284
`+
This is a bit convoluted, because fullname will be a module path,
`
``
285
`+
but _files is a list of file names relative to the top of the
`
``
286
`+
archive's namespace. We want to compare file paths to find all the
`
``
287
`+
names of things inside the module represented by fullname. So we
`
``
288
`+
turn the module path of fullname into a file path relative to the
`
``
289
`+
top of the archive, and then we iterate through _files looking for
`
``
290
`+
names inside that "directory".
`
``
291
`+
fullname_path = Path(self.zipimporter.get_filename(self.fullname))
`
``
292
`+
relative_path = fullname_path.relative_to(self.zipimporter.archive)
`
``
293
`+
Don't forget that fullname names a package, so its path will include
`
``
294
`+
init.py, which we want to ignore.
`
``
295
`+
assert relative_path.name == 'init.py'
`
``
296
`+
package_path = relative_path.parent
`
``
297
`+
subdirs_seen = set()
`
``
298
`+
for filename in self.zipimporter._files:
`
``
299
`+
try:
`
``
300
`+
relative = Path(filename).relative_to(package_path)
`
``
301
`+
except ValueError:
`
297
302
`continue
`
298
``
`-
subparts = path.parts[len(relpath.parts):]
`
299
``
`-
if len(subparts) == 1:
`
300
``
`-
yield subparts[0]
`
301
``
`-
elif len(subparts) > 1:
`
302
``
`-
subdir = subparts[0]
`
303
``
`-
if subdir not in subdirs_seen:
`
304
``
`-
subdirs_seen.add(subdir)
`
305
``
`-
yield subdir
`
``
303
`+
If the path of the file (which is relative to the top of the zip
`
``
304
`+
namespace), relative to the package given when the resource
`
``
305
`+
reader was created, has a parent, then it's a name in a
`
``
306
`+
subdirectory and thus we skip it.
`
``
307
`+
parent_name = relative.parent.name
`
``
308
`+
if len(parent_name) == 0:
`
``
309
`+
yield relative.name
`
``
310
`+
elif parent_name not in subdirs_seen:
`
``
311
`+
subdirs_seen.add(parent_name)
`
``
312
`+
yield parent_name
`
``
313
+
``
314
+
``
315
`+
def _zipimport_get_resource_reader(zipimporter, fullname):
`
``
316
`+
try:
`
``
317
`+
if not zipimporter.is_package(fullname):
`
``
318
`+
return None
`
``
319
`+
except ZipImportError:
`
``
320
`+
return None
`
``
321
`+
return _ZipImportResourceReader(zipimporter, fullname)
`