Spaces:

mobenta
/

pdf_audio

Build error

App Files Files Community

mobenta commited on Sep 25, 2024

Commit

7fafbc5

verified ·

1 Parent(s): 1c95b8f

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -23

app.py CHANGED Viewed

@@ -7,15 +7,15 @@ from pathlib import Path
 from tempfile import NamedTemporaryFile
 from typing import List, Literal
 import re
 from transformers import pipeline
 from pydantic import BaseModel
-import gradio as gr
-# Initialize Hugging Face models
 text_generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-# Instruction templates (unchanged from your original code)
 INSTRUCTION_TEMPLATES = {
     "podcast": {
         "intro": """Your task is to take the input text provided and turn it into a lively, engaging, informative podcast dialogue, in the style of NPR...""",
@@ -95,7 +95,7 @@ def get_mp3(text: str, voice: str, audio_model: str) -> bytes:
 # Main audio generation function (adapted for Hugging Face text generation)
 def generate_audio(
-    files: list,
     text_model: str = "EleutherAI/gpt-neo-2.7B",
     audio_model: str = "tts-1",
     speaker_1_voice: str = "alloy",
@@ -111,13 +111,12 @@ def generate_audio(
     debug = False,
 ) -> tuple:
-    # Combine input text from files
     combined_text = original_text or ""
     if not combined_text:
-        for file in files:
-            with Path(file).open("rb") as f:
-                text = f.read().decode('utf-8')  # Assuming the PDF text is extracted as UTF-8
-                combined_text += text + "\n\n"
     # Generate the dialogue using Hugging Face
     llm_output = generate_dialogue(
@@ -158,40 +157,48 @@ def generate_audio(
     return temporary_file.name, transcript, combined_text
-# Gradio Interface
 def interface():
     with gr.Blocks() as demo:
-        gr.Markdown("# Podcast Generator from PDF Files")
-        gr.Markdown("Upload a PDF file and generate a podcast dialogue using Hugging Face's text generation model.")
         with gr.Row():
-            # Upload PDF file
-            pdf_input = gr.File(label="Upload PDF(s)", file_types=[".pdf"], type="file", multiple=True)
         with gr.Row():
-            # Instructions input
             intro_input = gr.Textbox(label="Intro Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["intro"], lines=5)
             text_instructions_input = gr.Textbox(label="Text Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["text_instructions"], lines=5)
-            scratch_pad_input = gr.Textbox(label="Scratch Pad", value=INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"], lines=5)
-            prelude_input = gr.Textbox(label="Prelude Dialog", value=INSTRUCTION_TEMPLATES["podcast"]["prelude"], lines=5)
             dialog_input = gr.Textbox(label="Podcast Dialogue Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["dialog"], lines=5)
-        # Generate button
         generate_btn = gr.Button("Generate Podcast Dialogue")
-        # Output
         output_text = gr.Textbox(label="Generated Podcast Dialogue", lines=10)
-        # Generate button action
         generate_btn.click(
-            fn=generate_audio,
             inputs=[pdf_input, intro_input, text_instructions_input, scratch_pad_input, prelude_input, dialog_input],
-            outputs=[output_text]
         )
     return demo
-# Launch the Gradio interface
 if __name__ == "__main__":
     demo = interface()
     demo.launch()

 from tempfile import NamedTemporaryFile
 from typing import List, Literal
 import re
+import gradio as gr
 from transformers import pipeline
 from pydantic import BaseModel
+# Initialize Hugging Face text generation model
 text_generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# Instruction templates
 INSTRUCTION_TEMPLATES = {
     "podcast": {
         "intro": """Your task is to take the input text provided and turn it into a lively, engaging, informative podcast dialogue, in the style of NPR...""",
 # Main audio generation function (adapted for Hugging Face text generation)
 def generate_audio(
+    file: str,
     text_model: str = "EleutherAI/gpt-neo-2.7B",
     audio_model: str = "tts-1",
     speaker_1_voice: str = "alloy",
     debug = False,
 ) -> tuple:
+    # Combine input text from the single file
     combined_text = original_text or ""
     if not combined_text:
+        with Path(file).open("rb") as f:
+            text = f.read().decode('utf-8')  # Assuming the PDF text is extracted as UTF-8
+            combined_text += text + "\n\n"
     # Generate the dialogue using Hugging Face
     llm_output = generate_dialogue(
     return temporary_file.name, transcript, combined_text
+# Gradio Interface (for single file upload)
+def generate_podcast_interface(file, intro_instructions, text_instructions, scratch_pad_instructions, prelude_dialog, podcast_dialog_instructions):
+    # Handle a single PDF file
+    combined_text = ""
+    file_path = file.name
+    with open(file_path, "rb") as f:
+        reader = PyPDF2.PdfReader(f)
+        for page in reader.pages:
+            combined_text += page.extract_text() + "\n\n"
+    # Generate podcast dialogue
+    podcast_dialogue = generate_dialogue(combined_text, intro_instructions, text_instructions, scratch_pad_instructions, prelude_dialog, podcast_dialog_instructions)
+    return podcast_dialogue
 def interface():
     with gr.Blocks() as demo:
+        gr.Markdown("# Podcast Generator from PDF File")
+        gr.Markdown("Upload a PDF file, input instructions, and generate podcast dialogues using Hugging Face models.")
         with gr.Row():
+            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         with gr.Row():
             intro_input = gr.Textbox(label="Intro Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["intro"], lines=5)
             text_instructions_input = gr.Textbox(label="Text Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["text_instructions"], lines=5)
+            scratch_pad_input = gr.Textbox(label="Scratch Pad Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"], lines=5)
+            prelude_input = gr.Textbox(label="Prelude", value=INSTRUCTION_TEMPLATES["podcast"]["prelude"], lines=5)
             dialog_input = gr.Textbox(label="Podcast Dialogue Instructions", value=INSTRUCTION_TEMPLATES["podcast"]["dialog"], lines=5)
         generate_btn = gr.Button("Generate Podcast Dialogue")
         output_text = gr.Textbox(label="Generated Podcast Dialogue", lines=10)
         generate_btn.click(
+            fn=generate_podcast_interface,
             inputs=[pdf_input, intro_input, text_instructions_input, scratch_pad_input, prelude_input, dialog_input],
+            outputs=output_text
         )
     return demo
 if __name__ == "__main__":
     demo = interface()
     demo.launch()