For loop and crop pdfs in reticulate does not work

212 Views Asked by At

I tried to run this python script in R Studio in a python script, somehow it just takes the second "path2.pdf" pdf file and crops this and not the first. If I change the order of the files variable it crops "path1.pdf". With Jupyter Notebooks everything works as expected. Can anyone help?


import PyPDF2
from PyPDF2 import PdfFileReader, PdfFileWriter


files = ["path1.pdf" , "path2.pdf"]

# crop all pdfs in folder
for filepath in files:   
    pdfFileObj = open(filepath, 'rb')
    # read the pdf object
    pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
    # create pdf writer object
    writerObj = PdfFileWriter()

    page = pdfReader.getPage(0)
    print(page.cropBox.getLowerLeft())
    print(page.cropBox.getLowerRight())
    print(page.cropBox.getUpperLeft())
    print(page.cropBox.getUpperRight())
    page.cropBox.setLowerLeft((31, 20))
    page.cropBox.setLowerRight((190, 20))
    page.cropBox.setUpperLeft((31, 95))
    page.cropBox.setUpperRight((190, 95))
    # Write the new page
    writerObj.addPage(page)
    # Create an output pdf
    outstream = open(filepath, 'wb')
    writerObj.write(outstream)
    outstream.close()
1

There are 1 best solutions below

0
On

I don't know exactly why but it seems to work when I creat a python function and then run in from R as below:

# python script


# python_function
def crop_top5(files):
    import PyPDF2
    from PyPDF2 import PdfFileReader, PdfFileWriter
    for filepath in files:   
        pdfFileObj = open(filepath, 'rb')
        # read the pdf object
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
        # create pdf writer object
        writerObj = PdfFileWriter()

        page = pdfReader.getPage(0)
        print(page.cropBox.getLowerLeft())
        print(page.cropBox.getLowerRight())
        print(page.cropBox.getUpperLeft())
        print(page.cropBox.getUpperRight())
        page.cropBox.setLowerLeft((31, 20))
        page.cropBox.setLowerRight((190, 20))
        page.cropBox.setUpperLeft((31, 95))
        page.cropBox.setUpperRight((190, 95))
        # Write the new page
        writerObj.addPage(page)
        # Create an output pdf
        outstream = open(filepath, 'wb')
        writerObj.write(outstream)
        outstream.close()

Now I run this in R:

files = c("path1.pdf" , "path2.pdf")

source_python("python_function.py")

crop_top5(files)