[chore]: Pypdfium2 compatibility fix (#1239) · mindee/doctr@4e1985f (original) (raw)

Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
3 3 # This program is licensed under the Apache License 2.0.
4 4 # See LICENSE or go to https://opensource.org/licenses/Apache-2.0 for full license details.
5 5
6 -from pathlib import Path
7 6 from typing import Any, List, Optional
8 7
9 8 import numpy as np
@@ -31,16 +30,12 @@ def read_pdf(
31 30 scale: rendering scale (1 corresponds to 72dpi)
32 31 rgb_mode: if True, the output will be RGB, otherwise BGR
33 32 password: a password to unlock the document, if encrypted
34 - kwargs: additional parameters to :meth:`pypdfium2.PdfDocument.render_to`
33 + kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
35 34
36 35 Returns:
37 36 the list of pages decoded as numpy ndarray of shape H x W x C
38 37 """
39 38
40 -if isinstance(file, Path):
41 -file = str(file)
42 -
43 39 # Rasterise pages to numpy ndarrays with pypdfium2
44 -pdf = pdfium.PdfDocument(file, password=password)
45 -renderer = pdf.render_to(pdfium.BitmapConv.numpy_ndarray, scale=scale, rev_byteorder=rgb_mode, **kwargs)
46 -return [img for img, _ in renderer]
40 +pdf = pdfium.PdfDocument(file, password=password, autoclose=True)
41 +return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]