from pdfminer.high_level import extract_pages
from pdfminer.layout import LTContainer, LTTextContainer, LTChar
def mostrar_estructura(pagina):
def buscar_ltchar(elemento):
if isinstance(elemento, LTChar):
if (elemento._text == "O"):
elemento._text = "X"
print(elemento)
elif isinstance(elemento, LTContainer):
for subelemento in elemento:
buscar_ltchar(subelemento)
for elemento in pagina:
buscar_ltchar(elemento)
Abre el archivo PDF en modo lectura binaria
with open("estructura.pdf", "rb") as archivo_pdf:
for pagina_num, pagina in enumerate(extract_pages(archivo_pdf)):
print(f"Estructura de la página {pagina_num + 1}:")
mostrar_estructura(pagina)
When I try to save it, it doesn't matter if I do it with fitz with pdf2 using writer, it always tells me this error
File "C:\Users\David\AppData\Local\Programs\Python\Python312\Lib\site-packages\PyPDF2\_writer.py", line 258, in _add_page
assert cast(str, page[PA.TYPE]) == CO.PAGE
~~~~^^^^^^^^^
The page format is an LTPage but I don't know how to convert it into a format that allows you to save the changes