|
|
|
|
|
|
|
import PyPDF2 |
|
import re |
|
import torch |
|
from transformers import pipeline |
|
import soundfile as sf |
|
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub |
|
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface |
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
def extract_and_clean_abstract(uploaded_file): |
|
""" |
|
Extracts and cleans the abstract from the uploaded PDF file. |
|
""" |
|
reader = PyPDF2.PdfReader(uploaded_file.file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() or "" |
|
|
|
|
|
pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)" |
|
match = re.search(pattern, text, re.DOTALL) |
|
|
|
if match: |
|
abstract = match.group(2).strip() |
|
else: |
|
abstract = "Abstract not found." |
|
|
|
|
|
cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '') |
|
|
|
return cleaned_abstract |
|
|
|
def summarize_text(hf_model_name, text): |
|
""" |
|
Summarizes the given text using a Hugging Face model. |
|
""" |
|
summarizer = pipeline("summarization", model=hf_model_name) |
|
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] |
|
return summary |
|
|
|
def text_to_speech(text): |
|
""" |
|
Converts text to speech using a Hugging Face model. |
|
""" |
|
models, cfg, task = load_model_ensemble_and_task_from_hf_hub( |
|
"facebook/fastspeech2-en-ljspeech", |
|
arg_overrides={"vocoder": "hifigan", "fp16": False} |
|
) |
|
model = models[0] |
|
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) |
|
generator = task.build_generator([model], cfg) |
|
sample = TTSHubInterface.get_model_input(task, text) |
|
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) |
|
|
|
return wav, rate |
|
|
|
def process_pdf(uploaded_file, hf_model_name): |
|
""" |
|
Processes the uploaded PDF file to extract, summarize the abstract, and convert it to speech. |
|
""" |
|
abstract = extract_and_clean_abstract(uploaded_file) |
|
summary = summarize_text(hf_model_name, abstract) |
|
wav, rate = text_to_speech(summary) |
|
sf.write('/tmp/speech_output.wav', wav, rate) |
|
return '/tmp/speech_output.wav' |
|
|
|
iface = gr.Interface( |
|
fn=process_pdf, |
|
inputs=[ |
|
gr.File(label="Upload PDF", type="pdf"), |
|
gr.Textbox(label="Hugging Face Model Name for Summarization") |
|
], |
|
outputs=gr.Audio(label="Audio Summary"), |
|
title="PDF Abstract to Speech", |
|
description="Extracts and summarizes the abstract from a PDF file and converts it to speech." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|
|
|