PyPDF2 returns empty PDF after copying
def EncryptPDFFiles(password, directory):
pdfFiles = []
success = 0
# Get all PDF files from a directory
for folderName, subFolders, fileNames in os.walk(directory):
for fileName in fileNames:
if (fileName.endswith(".pdf")):
pdfFiles.append(os.path.join(folderName, fileName))
print("%s PDF documents found." % str(len(pdfFiles)))
# Create an encrypted version for each document
for pdf in pdfFiles:
# Copy old PDF into a new PDF object
pdfFile = open(pdf,"rb")
pdfReader = PyPDF2.PdfFileReader(pdfFile)
pdfWriter = PyPDF2.PdfFileWriter()
for pageNum in range(pdfReader.numPages):
pdfWriter.addPage(pdfReader.getPage(pageNum))
pdfFile.close()
# Encrypt the new PDF and save it
saveName = pdf.replace(".pdf",ENCRYPTION_TAG)
pdfWriter.encrypt(password)
newFile = open(saveName, "wb")
pdfWriter.write(newFile)
newFile.close()
print("%s saved to: %s" % (pdf, saveName))
# Verify the the encrypted PDF encrypted properly
encryptedPdfFile = open(saveName,"rb")
encryptedPdfReader = PyPDF2.PdfFileReader(encryptedPdfFile)
canDecrypt = encryptedPdfReader.decrypt(password)
encryptedPdfFile.close()
if (canDecrypt):
print("%s successfully encrypted." % (pdf))
send2trash.send2trash(pdf)
success += 1
print("%s of %s successfully encrypted." % (str(success),str(len(pdfFiles))))
I follow along with Pythons Automate the Boring Stuff. I'm having trouble copying a PDF document, but as of now, every time I run the program, my copied PDF is all blank pages. My encrypted PDF has the correct number of pages, but they are all blank (no content on the pages). I have happened before, but have not been able to recreate. I tried to quit in my sleep before closing my files. I'm not sure if the best practice for opening and closing files is in Python. I am using Python3 for reference.
+3
source to share
1 answer
Try moving your pdfFile.close to the very end of the for loop.
for pdf in pdfFiles:
#
# {stuff}
#
if (canDecrypt):
print("%s successfully encrypted." % (pdf))
send2trash.send2trash(pdf)
success += 1
pdfFile.close()
Thought that pdfFile should be available and open when pdfWriter finally writes out, otherwise it won't be able to access pages to write new file.
+3
source to share