Spaces:

gianb
/

PDF_Summarizer_and_TTS

Runtime error

App Files Files Community

gianb commited on Dec 3, 2023

Commit

6439089

1 Parent(s): 8f760c8

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -97

app.py DELETED Viewed

@@ -1,97 +0,0 @@
-import warnings
-import pdfplumber
-import torch
-from transformers import pipeline, AutoProcessor, AutoModel
-import numpy as np
-import gradio as gr
-from io import BytesIO
-from scipy.io.wavfile import write as write_wav
-warnings.filterwarnings("ignore")
-# Here is the code
-def extract_abstract(uploaded_file):
-    pdf_bytes = BytesIO(uploaded_file)
-    with pdfplumber.open(pdf_bytes) as pdf:
-        abstract = ""
-        # Iterate through each page
-        for page in pdf.pages:
-            text = page.extract_text(x_tolerance = 1, y_tolerance = 1) # these parameters are set 1 in order to get spaces between words and lines
-            # Search for the "Abstract" keyword
-            if "abstract" in text.lower():
-                # Found the "Abstract" keyword
-                start_index = text.lower().find("abstract") # find the "abstract" title as starter index
-                end_index = text.lower().find("introduction") # find the "introduction" title as end index
-                abstract = text[start_index:end_index]
-                break
-    print(abstract)
-    return abstract
-def process_summary(summary):
-    # Split the summary by the first period
-    summary = summary[0]["summary_text"]
-    sentences = summary.split('.', 1)
-    if len(sentences) > 0:
-        # Retrieve the first part before the period
-        processed_summary = sentences[0].strip() + "."
-        # Replace "-" with an empty string
-        processed_summary = processed_summary.replace("-", "")
-        return processed_summary
-# Function for summarization and audio conversion
-def summarize_and_convert_to_audio(pdf_file):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(device)
-    # Move models and related tensors to CUDA device if available
-    processor = AutoProcessor.from_pretrained("suno/bark-small")
-    model = AutoModel.from_pretrained("suno/bark-small").to(device)
-    # Extract abstract
-    abstract_text = extract_abstract(pdf_file)
-    if not abstract_text:
-        return "No 'abstract' section found in the uploaded PDF. Please upload a different PDF."
-    # Summarize the abstract
-    summarization_pipeline = pipeline(task='summarization', model='knkarthick/MEETING_SUMMARY', min_length=15, max_length=30)
-    summarized_text = summarization_pipeline(abstract_text)
-    one_sentence_summary = process_summary(summarized_text)
-    print(one_sentence_summary)
-    # Text-to-audio conversion
-    inputs = processor(
-        text=[one_sentence_summary],
-        return_tensors="pt",
-    )
-    inputs = inputs.to(device)
-    speech_values = model.generate(**inputs, do_sample=True)
-    sampling_rate = model.generation_config.sample_rate
-    # Convert speech values to audio data
-    audio_data = speech_values.cpu().numpy().squeeze()
-    # Convert audio data to bytes
-    with BytesIO() as buffer:
-        write_wav(buffer, sampling_rate, audio_data.astype(np.float32))
-        audio_bytes = buffer.getvalue()
-    return audio_bytes, one_sentence_summary
-# Create a Gradio interface
-iface = gr.Interface(
-    fn=summarize_and_convert_to_audio,
-    inputs=gr.UploadButton(label="Upload PDF", type="binary", file_types=["pdf"]),  # Set to accept only PDF files
-    outputs=[gr.Audio(label="Audio"), gr.Textbox(label="Message")],
-    title="PDF Abstract Summarizer",
-    description="""
-    This application is supposed to summarize the 'abstract' section of a PDF file and convert the summarization into a speech.
-    Please make sure you upload a PDF file with the 'abstract' section for application to work.
-    Note: If you get an error while processing the file please refresh your browser and try again.
-    """
-)
-iface.launch()