Spaces:

mrsk1883
/

AAIapp

Sleeping

App Files Files Community

mrsk1883 commited on Dec 8, 2023

Commit

c98d7c6

1 Parent(s): b2410fa

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -19

app.py CHANGED Viewed

@@ -1,29 +1,82 @@
-# app/main.py
 from PyPDF2 import PdfReader
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from gtts import gTTS
-import gradio as gr
-# Load the pre-trained model and tokenizer
 model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def summarize_pdf_abstract(pdf_path):
-    # Implement the function to summarize PDF abstracts (similar to your previous code)
-    # ...
-# Gradio Interface
-iface = gr.Interface(
-    fn=summarize_pdf_abstract,
-    inputs=gr.File(type="file", label="Upload a PDF file"),
-    outputs="text",
-    live=True,
-    interpretation="default",
-    title="PDF Abstract Summarizer",
-    description="This app accepts PDFs with abstracts and generates a summary.",
-)
-# Launch the Gradio interface
-iface.launch()

 from PyPDF2 import PdfReader
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from gtts import gTTS
+import os
+# Download the model and tokenizer
 model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+def summarize_and_speak_pdf_abstract(pdf_path):
+    """
+    Reads a PDF file, extracts the abstract, summarizes it in one sentence, and generates an audio file of the summary.
+    Args:
+        pdf_path: Path to the PDF file.
+    """
+    # Summarize the abstract
+    summary = summarize_pdf_abstract(pdf_path)
+    # Define language and audio format
+    language = "en"  # Change this to your desired language
+    audio_format = "mp3"
+    # Create the text-to-speech object
+    tts = gTTS(text=summary, lang=language)
+    # Generate the audio file
+    audio_file_name = f"summary.{audio_format}"
+    tts.save(audio_file_name)
+    print(f"Audio file created: {audio_file_name}")
+    # Play the audio file (optional)
+    # os.system(f"play {audio_file_name}")
+# Define the function to summarize the abstract
 def summarize_pdf_abstract(pdf_path):
+    """
+    Reads a PDF file, extracts the abstract, and summarizes it in one sentence.
+    Args:
+        pdf_path: Path to the PDF file.
+    Returns:
+        A string containing the one-sentence summary of the abstract.
+    """
+    # Read the PDF file
+    reader = PdfReader(open(pdf_path, "rb"))
+    # Extract the abstract
+    abstract_text = ""
+    for page in reader.pages:
+        # Search for keywords like "Abstract" or "Introduction"
+        if (
+            "Abstract" in page.extract_text()
+            or "Introduction" in page.extract_text()
+        ):
+            # Extract the text following the keyword
+            abstract_text = page.extract_text()
+            break
+    # Encode the abstract text
+    inputs = tokenizer(abstract_text, return_tensors="pt")
+    # Generate the summary
+    outputs = model.generate(**inputs)
+    # Decode the summary
+    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return summary
+# Example usage
+pdf_path = "/content/Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"
+summarize_and_speak_pdf_abstract(pdf_path)