bpo-32248 - Implement ResourceReader and get_resource_reader() f… · python/cpython@6f6eb35 (original) (raw)

`@@ -12,7 +12,7 @@

`

12

12

`from typing import Iterator, Optional, Set, Union # noqa: F401

`

13

13

`from typing import cast

`

14

14

`from typing.io import BinaryIO, TextIO

`

15

``

`-

from zipfile import ZipFile

`

``

15

`+

from zipimport import ZipImportError

`

16

16

``

17

17

``

18

18

`Package = Union[str, ModuleType]

`

`@@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:

`

216

216

`# contents doesn't necessarily mean it's a resource. Directories are not

`

217

217

`# resources, so let's try to find out if it's a directory or not.

`

218

218

`path = Path(package.spec.origin).parent / name

`

219

``

`-

if path.is_file():

`

220

``

`-

return True

`

221

``

`-

if path.is_dir():

`

222

``

`-

return False

`

223

``

`-

If it's not a file and it's not a directory, what is it? Well, this

`

224

``

`-

means the file doesn't exist on the file system, so it probably lives

`

225

``

`-

inside a zip file. We have to crack open the zip, look at its table of

`

226

``

`-

contents, and make sure that this entry doesn't have sub-entries.

`

227

``

`-

archive_path = package.spec.loader.archive # type: ignore

`

228

``

`-

package_directory = Path(package.spec.origin).parent

`

229

``

`-

with ZipFile(archive_path) as zf:

`

230

``

`-

toc = zf.namelist()

`

231

``

`-

relpath = package_directory.relative_to(archive_path)

`

232

``

`-

candidate_path = relpath / name

`

233

``

`-

for entry in toc:

`

234

``

`-

try:

`

235

``

`-

relative_to_candidate = Path(entry).relative_to(candidate_path)

`

236

``

`-

except ValueError:

`

237

``

`-

The two paths aren't relative to each other so we can ignore it.

`

238

``

`-

continue

`

239

``

`-

Since directories aren't explicitly listed in the zip file, we must

`

240

``

`-

infer their 'directory-ness' by looking at the number of path

`

241

``

`-

components in the path relative to the package resource we're

`

242

``

`-

looking up. If there are zero additional parts, it's a file, i.e. a

`

243

``

`-

resource. If there are more than zero it's a directory, i.e. not a

`

244

``

`-

resource. It has to be one of these two cases.

`

245

``

`-

return len(relative_to_candidate.parts) == 0

`

246

``

`-

I think it's impossible to get here. It would mean that we are looking

`

247

``

`-

for a resource in a zip file, there's an entry matching it in the return

`

248

``

`-

value of contents(), but we never actually found it in the zip's table of

`

249

``

`-

contents.

`

250

``

`-

raise AssertionError('Impossible situation')

`

``

219

`+

return path.is_file()

`

251

220

``

252

221

``

253

222

`def contents(package: Package) -> Iterator[str]:

`

`@@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:

`

268

237

`not package.spec.has_location):

`

269

238

`return []

`

270

239

`package_directory = Path(package.spec.origin).parent

`

271

``

`-

try:

`

272

``

`-

yield from os.listdir(str(package_directory))

`

273

``

`-

except (NotADirectoryError, FileNotFoundError):

`

274

``

`-

The package is probably in a zip file.

`

275

``

`-

archive_path = getattr(package.spec.loader, 'archive', None)

`

276

``

`-

if archive_path is None:

`

277

``

`-

raise

`

278

``

`-

relpath = package_directory.relative_to(archive_path)

`

279

``

`-

with ZipFile(archive_path) as zf:

`

280

``

`-

toc = zf.namelist()

`

281

``

`-

subdirs_seen = set() # type: Set

`

282

``

`-

for filename in toc:

`

283

``

`-

path = Path(filename)

`

284

``

`-

Strip off any path component parts that are in common with the

`

285

``

`-

package directory, relative to the zip archive's file system

`

286

``

`-

path. This gives us all the parts that live under the named

`

287

``

`-

package inside the zip file. If the length of these subparts is

`

288

``

`-

exactly 1, then it is situated inside the package. The resulting

`

289

``

`-

length will be 0 if it's above the package, and it will be

`

290

``

`-

greater than 1 if it lives in a subdirectory of the package

`

291

``

`-

directory.

`

292

``

`-

`

293

``

`-

However, since directories themselves don't appear in the zip

`

294

``

`-

archive as a separate entry, we need to return the first path

`

295

``

`-

component for any case that has > 1 subparts -- but only once!

`

296

``

`-

if path.parts[:len(relpath.parts)] != relpath.parts:

`

``

240

`+

yield from os.listdir(str(package_directory))

`

``

241

+

``

242

+

``

243

`+

Private implementation of ResourceReader and get_resource_reader() for

`

``

244

`+

zipimport. Don't use these directly! We're implementing these in Python

`

``

245

`+

because 1) it's easier, 2) zipimport will likely get rewritten in Python

`

``

246

`+

itself at some point, so doing this all in C would just be a waste of

`

``

247

`+

effort.

`

``

248

+

``

249

`+

class _ZipImportResourceReader(resources_abc.ResourceReader):

`

``

250

`+

"""Private class used to support ZipImport.get_resource_reader().

`

``

251

+

``

252

`+

This class is allowed to reference all the innards and private parts of

`

``

253

`+

the zipimporter.

`

``

254

`+

"""

`

``

255

+

``

256

`+

def init(self, zipimporter, fullname):

`

``

257

`+

self.zipimporter = zipimporter

`

``

258

`+

self.fullname = fullname

`

``

259

+

``

260

`+

def open_resource(self, resource):

`

``

261

`+

path = f'{self.fullname}/{resource}'

`

``

262

`+

try:

`

``

263

`+

return BytesIO(self.zipimporter.get_data(path))

`

``

264

`+

except OSError:

`

``

265

`+

raise FileNotFoundError

`

``

266

+

``

267

`+

def resource_path(self, resource):

`

``

268

`+

All resources are in the zip file, so there is no path to the file.

`

``

269

`+

Raising FileNotFoundError tells the higher level API to extract the

`

``

270

`+

binary data and create a temporary file.

`

``

271

`+

raise FileNotFoundError

`

``

272

+

``

273

`+

def is_resource(self, name):

`

``

274

`+

Maybe we could do better, but if we can get the data, it's a

`

``

275

`+

resource. Otherwise it isn't.

`

``

276

`+

path = f'{self.fullname}/{name}'

`

``

277

`+

try:

`

``

278

`+

self.zipimporter.get_data(path)

`

``

279

`+

except OSError:

`

``

280

`+

return False

`

``

281

`+

return True

`

``

282

+

``

283

`+

def contents(self):

`

``

284

`+

This is a bit convoluted, because fullname will be a module path,

`

``

285

`+

but _files is a list of file names relative to the top of the

`

``

286

`+

archive's namespace. We want to compare file paths to find all the

`

``

287

`+

names of things inside the module represented by fullname. So we

`

``

288

`+

turn the module path of fullname into a file path relative to the

`

``

289

`+

top of the archive, and then we iterate through _files looking for

`

``

290

`+

names inside that "directory".

`

``

291

`+

fullname_path = Path(self.zipimporter.get_filename(self.fullname))

`

``

292

`+

relative_path = fullname_path.relative_to(self.zipimporter.archive)

`

``

293

`+

Don't forget that fullname names a package, so its path will include

`

``

294

`+

init.py, which we want to ignore.

`

``

295

`+

assert relative_path.name == 'init.py'

`

``

296

`+

package_path = relative_path.parent

`

``

297

`+

subdirs_seen = set()

`

``

298

`+

for filename in self.zipimporter._files:

`

``

299

`+

try:

`

``

300

`+

relative = Path(filename).relative_to(package_path)

`

``

301

`+

except ValueError:

`

297

302

`continue

`

298

``

`-

subparts = path.parts[len(relpath.parts):]

`

299

``

`-

if len(subparts) == 1:

`

300

``

`-

yield subparts[0]

`

301

``

`-

elif len(subparts) > 1:

`

302

``

`-

subdir = subparts[0]

`

303

``

`-

if subdir not in subdirs_seen:

`

304

``

`-

subdirs_seen.add(subdir)

`

305

``

`-

yield subdir

`

``

303

`+

If the path of the file (which is relative to the top of the zip

`

``

304

`+

namespace), relative to the package given when the resource

`

``

305

`+

reader was created, has a parent, then it's a name in a

`

``

306

`+

subdirectory and thus we skip it.

`

``

307

`+

parent_name = relative.parent.name

`

``

308

`+

if len(parent_name) == 0:

`

``

309

`+

yield relative.name

`

``

310

`+

elif parent_name not in subdirs_seen:

`

``

311

`+

subdirs_seen.add(parent_name)

`

``

312

`+

yield parent_name

`

``

313

+

``

314

+

``

315

`+

def _zipimport_get_resource_reader(zipimporter, fullname):

`

``

316

`+

try:

`

``

317

`+

if not zipimporter.is_package(fullname):

`

``

318

`+

return None

`

``

319

`+

except ZipImportError:

`

``

320

`+

return None

`

``

321

`+

return _ZipImportResourceReader(zipimporter, fullname)

`