| @@ -3,7 +3,6 @@ |
|
|
| 3 |
3 |
# This program is licensed under the Apache License 2.0. |
| 4 |
4 |
# See LICENSE or go to https://opensource.org/licenses/Apache-2.0 for full license details. |
| 5 |
5 |
|
| 6 |
|
-from pathlib import Path |
| 7 |
6 |
from typing import Any, List, Optional |
| 8 |
7 |
|
| 9 |
8 |
import numpy as np |
| @@ -31,16 +30,12 @@ def read_pdf( |
|
|
| 31 |
30 |
scale: rendering scale (1 corresponds to 72dpi) |
| 32 |
31 |
rgb_mode: if True, the output will be RGB, otherwise BGR |
| 33 |
32 |
password: a password to unlock the document, if encrypted |
| 34 |
|
- kwargs: additional parameters to :meth:`pypdfium2.PdfDocument.render_to` |
|
33 |
+ kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` |
| 35 |
34 |
|
| 36 |
35 |
Returns: |
| 37 |
36 |
the list of pages decoded as numpy ndarray of shape H x W x C |
| 38 |
37 |
""" |
| 39 |
38 |
|
| 40 |
|
-if isinstance(file, Path): |
| 41 |
|
-file = str(file) |
| 42 |
|
- |
| 43 |
39 |
# Rasterise pages to numpy ndarrays with pypdfium2 |
| 44 |
|
-pdf = pdfium.PdfDocument(file, password=password) |
| 45 |
|
-renderer = pdf.render_to(pdfium.BitmapConv.numpy_ndarray, scale=scale, rev_byteorder=rgb_mode, **kwargs) |
| 46 |
|
-return [img for img, _ in renderer] |
|
40 |
+pdf = pdfium.PdfDocument(file, password=password, autoclose=True) |
|
41 |
+return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf] |