Spaces:

mobenta
/

pdf_audio

Sleeping

App Files Files Community

mobenta commited on Sep 27, 2024

Commit

b3226f0

verified ·

1 Parent(s): f76166d

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -17

app.py CHANGED Viewed

@@ -1,45 +1,122 @@
 import cohere
 import gradio as gr
 from pypdf import PdfReader
 import os
 from loguru import logger
-import promptic
-# Initialize Cohere client with your API key
-cohere_client = cohere.Client(os.getenv("vSS2Z6Jw3R73yh7XJpnZFttq1oTE0U94iFWdw6wG"))
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     reader = PdfReader(pdf_file)
     text = ""
     for page in reader.pages:
-        text += page.extract_text()
     return text
-# Function to convert PDF text to audio via Cohere
-def pdf_to_audio(pdf_file):
     try:
         text = extract_text_from_pdf(pdf_file)
-        # Generate response using Cohere
         response = cohere_client.generate(
-            model='xlarge',  # Change the model if necessary
             prompt=text,
             max_tokens=500  # Adjust based on your needs
         )
-        generated_text = response.generations[0].text.strip()
-        # You could add audio generation code here or use text-to-speech libraries
-        return generated_text  # Returning text for now
     except Exception as e:
         logger.error(f"Error during PDF to audio conversion: {e}")
-        return "An error occurred while processing the PDF."
 # Gradio interface
-def gradio_interface(pdf_file):
-    return pdf_to_audio(pdf_file)
-# Launch the Gradio interface
-gr.Interface(fn=gradio_interface, inputs="file", outputs="text", title="PDF to Audio using Cohere").launch()

 import cohere
 import gradio as gr
 from pypdf import PdfReader
+from gtts import gTTS  # Import Google Text-to-Speech
+from io import BytesIO  # To handle audio in memory
 import os
 from loguru import logger
+import tempfile  # To create temporary files
+from dotenv import load_dotenv  # To load environment variables from a .env file
+# Load environment variables from .env file (if you're using one)
+load_dotenv()
+# Read the Cohere API key from an environment variable
+COHERE_API_KEY = os.getenv('COHERE_API_KEY')
+# Check if the API key is available
+if not COHERE_API_KEY:
+    raise ValueError("Cohere API key not found. Please set the COHERE_API_KEY environment variable.")
+cohere_client = cohere.Client(COHERE_API_KEY)
+# Correct language codes for gTTS
+language_options = [
+    ("English", "en"),
+    ("Spanish", "es"),
+    ("French", "fr"),
+    ("German", "de"),
+    ("Italian", "it"),
+    ("Chinese", "zh-CN"),
+    ("Japanese", "ja"),
+    ("Hindi", "hi")
+]
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     reader = PdfReader(pdf_file)
     text = ""
     for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text += page_text
     return text
+# Function to convert text to speech using gTTS
+def text_to_speech(text, language_code):
+    if not text or not isinstance(text, str):
+        logger.error("No valid text available for speech conversion.")
+        return None
+    try:
+        tts = gTTS(text, lang=language_code)
+        audio_fp = BytesIO()  # In-memory file to store audio
+        tts.write_to_fp(audio_fp)  # Write audio data to the in-memory file
+        audio_fp.seek(0)  # Reset file pointer to the start
+        # Create a temporary file to save the audio data for Gradio
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
+            temp_audio_file.write(audio_fp.read())  # Write the audio data to the temp file
+            temp_audio_path = temp_audio_file.name  # Store the path of the temporary file
+        return temp_audio_path  # Return the file path
+    except Exception as e:
+        logger.error(f"Error during text-to-speech conversion: {e}")
+        return None
+# Function to convert PDF text to audio via Cohere and gTTS
+def pdf_to_audio(pdf_file, language_code):
     try:
         text = extract_text_from_pdf(pdf_file)
+        # Check if the extracted text is empty
+        if not text.strip():
+            logger.error("The PDF contains no extractable text.")
+            return "The PDF contains no extractable text. Please try a different file.", None
+        # Process the text with Cohere before audio generation
         response = cohere_client.generate(
+            model='c4ai-aya-23',  # Using your specified model
             prompt=text,
             max_tokens=500  # Adjust based on your needs
         )
+        # Check if the response is valid
+        if not response or not response.generations:
+            logger.error("Cohere API did not return a valid response.")
+            return "Error: Cohere API did not return a valid response.", None
+        processed_text = response.generations[0].text.strip()
+        # Check if processed_text is valid
+        if not processed_text:
+            logger.error("Cohere generated an empty response.")
+            return "Error: Cohere generated an empty response.", None
+        # Convert the processed text to speech and return the file path
+        audio_file_path = text_to_speech(processed_text, language_code)
+        if audio_file_path is None:
+            return "Error: Failed to generate speech from the provided text.", None
+        return processed_text, audio_file_path  # Return the text and the path to the audio file
     except Exception as e:
         logger.error(f"Error during PDF to audio conversion: {e}")
+        return "An error occurred while processing the PDF.", None
 # Gradio interface
+def gradio_interface(pdf_file, language_code):
+    return pdf_to_audio(pdf_file, language_code)
+# Launch the Gradio interface with file input, language dropdown, text output, and audio output
+gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        "file",
+        gr.Dropdown(choices=language_options, label="Select Language")
+    ],
+    outputs=[
+        "text",
+        "audio"
+    ],
+    title="PDF to Audio using Cohere (Multi-language)"
+).launch(debug=True)