Pythonanywhere PyPDF2 module PdfReader won't work

344 Views Asked by At

I have currently developed a PDF reader app where users can upload PDF files and add their own keywords. These keywords are extracted from the uploaded text, allowing the uploaders to identify and count the occurrence of specific keywords. However, I am encountering an issue with the app. I keep receiving the error message

'AttributeError: module 'PyPDF2' has no attribute 'PdfReader'.

I have installed PyPDF2 using the bash command in the app, but unfortunately, nothing seems to be working. Can you please help me identify what I might be doing wrong?

I have tried to restart the app completely from scratch. I have also referred to the following forum. The PyPDF2 library is present in the Lib directory. Also tried:

from PyPDF2 import PdfReader

Am running PyPDF2 on:

PyPDF2==3.0.1

the code am using:

from flask import Flask, request
import PyPDF2
import difflib

app = Flask(__name__)

keywords = ['']

def analyze_pdf(file_path, search_keywords):
    keywords_input = search_keywords.split(',')
    keywords_input = [keyword.strip() for keyword in keywords_input]
    keywords.extend(keywords_input)

    with open(file_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        keyword_counts = {keyword: 0 for keyword in keywords}
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num] 
            text = page.extract_text()
            text= text.lower()
            for keyword in keywords:
                close_match = difflib.get_close_matches(text, keywords, n=1, cutoff=0.8)
                if keyword in text or close_match:
                    keyword_counts[keyword] += 1
        metadata = pdf_reader.metadata
        author = metadata.get('/Author')
        return {'keyword_counts': keyword_counts, 'author': author}

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        file = request.files['file']
        search_keywords = request.form.get('keywords')
        file_path = fr"C:\uploads\{file.filename}"
        file.save(file_path)
        result = analyze_pdf(file_path, search_keywords)
        keyword_counts = result['keyword_counts']
        author = result['author']
        if any(keyword_counts.values()):
            output = f'''
                <div style='display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial, sans-serif;'>
                    <div class="flex-container">
                        <div><h1 style="font-size: 24px; font-weight: bold;">Matches found!</h1></div>
                        <div><h3 style="font-size: 16px; font-weight: bold;">Author: {author}</h3></div>
                        <div>{'<br>'.join([f'{k}: {v}' for k, v in keyword_counts.items if v > 0])}</div>
                    </div>
                </div>
            '''
        else:
            output = '''
                <div style='display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial, sans-serif;'> 
                    <div class="flex-container">
                        <div><h1 style="font-size: 24px; font-weight: bold;">No matches found.</h1></div>
                        <div><h3 style="font-size: 16px; font-weight: bold;">Author: {author}</h3></div>
                    </div>
                </div>
            '''
        return output

    return '''
        <div style='display: flex; justify-content: center; align-items: center; height: 100vh;'>
            <form method="POST" enctype="multipart/form-data">
                <input type="file" name="file">
                <label for="keywords">Keywords:</label>
                <input type="text" name="keywords" id="keywords">
                <input type="submit" value="Upload">
            </form>
        </div>
    '''

if __name__ == "__main__":
    app.run(debug=True)
2

There are 2 best solutions below

0
Kars On

Question answered, following the comment from @JensV related to the PyPDF2 to pypdf and then another change in the code.

change from @JensV use pypdf instead of PyPDF2:

pip install pypdf 

the new code:

from flask import Flask, request
import pypdf
import difflib
from pypdf import PdfReader


app = Flask(__name__)

keywords = ['']

def analyze_pdf(file_path, search_keywords):
    keywords_input = search_keywords.split(',')
    keywords_input = [keyword.strip() for keyword in keywords_input]
    keywords.extend(keywords_input)

    with open(file_path, 'rb') as pdf_file:
        pdf_reader = pypdf.PdfReader(pdf_file)
        keyword_counts = {keyword: 0 for keyword in keywords}
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text = page.extract_text()
            text= text.lower()
            for keyword in keywords:
                close_match = difflib.get_close_matches(text, keywords, n=1, cutoff=0.8)
                if keyword in text or close_match:
                    keyword_counts[keyword] += 1
        metadata = pdf_reader.metadata
        author = metadata.get('/Author')
        return {'keyword_counts': keyword_counts, 'author': author}

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        file = request.files['file']
        search_keywords = request.form.get('keywords')
        file_path = fr"C:\uploads\{file.filename}"
        file.save(file_path)
        result = analyze_pdf(file_path, search_keywords)
        keyword_counts = result['keyword_counts']
        author = result['author']
        if any(keyword_counts.values()):
            output = f'''
                <div style='display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial, sans-serif;'>
                    <div class="flex-container">
                        <div><h1 style="font-size: 24px; font-weight: bold;">Matches found!</h1></div>
                        <div><h3 style="font-size: 16px; font-weight: bold;">Author: {author}</h3></div>
                        <div>{'<br>'.join([f'{k}: {v}' for k, v in keyword_counts.items() if v > 0])}</div>
                    </div>
                </div>
            '''
        else:
            output = '''
                <div style='display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial, sans-serif;'>
                    <div class="flex-container">
                        <div><h1 style="font-size: 24px; font-weight: bold;">No matches found.</h1></div>
                        <div><h3 style="font-size: 16px; font-weight: bold;">Author: {author}</h3></div>
                    </div>
                </div>
            '''
        return output

    return '''
        <div style='display: flex; justify-content: center; align-items: center; height: 100vh;'>
            <form method="POST" enctype="multipart/form-data">
                <input type="file" name="file">
                <label for="keywords">Keywords:</label>
                <input type="text" name="keywords" id="keywords">
                <input type="submit" value="Upload">
            </form>
        </div>
    '''

if __name__ == "__main__":
    app.run(debug=True)
0
Glenn On

You may have another library or module on your Python path that is called PyPDF2 and that library does not have the PdfReader. After importing PyPDF2, check what PyPDF2.__file__ is and make sure it's not some other module.