AAIapp / utils.py
mrsk1883's picture
Create utils.py
97b49ea
raw
history blame
1.65 kB
from PyPDF2 import PdfReader
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from gtts import gTTS
import os
# Download the summarization model and tokenizer
model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def summarize_and_speak_pdf_abstract(pdf_path):
"""
Reads a PDF file, extracts the abstract, summarizes it in one sentence, and generates an audio file of the summary.
Args:
pdf_path: Path to the PDF file.
"""
# Summarize the abstract
summary = summarize_pdf_abstract(pdf_path)
# Define language and audio format
language = "en" # Change this to your desired language
audio_format = "mp3"
# Create the text-to-speech object
tts = gTTS(text=summary, lang=language)
# Generate the audio file
audio_file_name = f"summary.{audio_format}"
tts.save(audio_file_name)
print(f"Audio file created: {audio_file_name}")
# Play the audio file (optional)
# os.system(f"play {audio_file_name}")
def summarize_pdf_abstract(pdf_path):
"""
Reads a PDF file, extracts the abstract, and summarizes it in one sentence.
Args:
pdf_path: Path to the PDF file.
Returns:
A string containing the one-sentence summary of the abstract.
"""
# Read the PDF file
reader = PdfReader(open(pdf_path, "rb"))
# Extract the abstract
abstract_text = ""
for page in reader.pages:
# Search for keywords like "Abstract" or "Introduction"
if (
"Abstract" in page.extract_text