mobenta commited on
Commit
b3226f0
·
verified ·
1 Parent(s): f76166d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -17
app.py CHANGED
@@ -1,45 +1,122 @@
1
-
2
  import cohere
3
  import gradio as gr
4
  from pypdf import PdfReader
 
 
5
  import os
6
  from loguru import logger
7
- import promptic
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Initialize Cohere client with your API key
10
- cohere_client = cohere.Client(os.getenv("vSS2Z6Jw3R73yh7XJpnZFttq1oTE0U94iFWdw6wG"))
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Function to extract text from PDF
13
  def extract_text_from_pdf(pdf_file):
14
  reader = PdfReader(pdf_file)
15
  text = ""
16
  for page in reader.pages:
17
- text += page.extract_text()
 
 
18
  return text
19
 
20
- # Function to convert PDF text to audio via Cohere
21
- def pdf_to_audio(pdf_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  try:
23
  text = extract_text_from_pdf(pdf_file)
24
 
25
- # Generate response using Cohere
 
 
 
 
 
26
  response = cohere_client.generate(
27
- model='xlarge', # Change the model if necessary
28
  prompt=text,
29
  max_tokens=500 # Adjust based on your needs
30
  )
31
- generated_text = response.generations[0].text.strip()
32
 
33
- # You could add audio generation code here or use text-to-speech libraries
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- return generated_text # Returning text for now
36
  except Exception as e:
37
  logger.error(f"Error during PDF to audio conversion: {e}")
38
- return "An error occurred while processing the PDF."
39
 
40
  # Gradio interface
41
- def gradio_interface(pdf_file):
42
- return pdf_to_audio(pdf_file)
43
 
44
- # Launch the Gradio interface
45
- gr.Interface(fn=gradio_interface, inputs="file", outputs="text", title="PDF to Audio using Cohere").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import cohere
2
  import gradio as gr
3
  from pypdf import PdfReader
4
+ from gtts import gTTS # Import Google Text-to-Speech
5
+ from io import BytesIO # To handle audio in memory
6
  import os
7
  from loguru import logger
8
+ import tempfile # To create temporary files
9
+ from dotenv import load_dotenv # To load environment variables from a .env file
10
+
11
+ # Load environment variables from .env file (if you're using one)
12
+ load_dotenv()
13
+
14
+ # Read the Cohere API key from an environment variable
15
+ COHERE_API_KEY = os.getenv('COHERE_API_KEY')
16
+
17
+ # Check if the API key is available
18
+ if not COHERE_API_KEY:
19
+ raise ValueError("Cohere API key not found. Please set the COHERE_API_KEY environment variable.")
20
 
21
+ cohere_client = cohere.Client(COHERE_API_KEY)
22
+
23
+ # Correct language codes for gTTS
24
+ language_options = [
25
+ ("English", "en"),
26
+ ("Spanish", "es"),
27
+ ("French", "fr"),
28
+ ("German", "de"),
29
+ ("Italian", "it"),
30
+ ("Chinese", "zh-CN"),
31
+ ("Japanese", "ja"),
32
+ ("Hindi", "hi")
33
+ ]
34
 
35
  # Function to extract text from PDF
36
  def extract_text_from_pdf(pdf_file):
37
  reader = PdfReader(pdf_file)
38
  text = ""
39
  for page in reader.pages:
40
+ page_text = page.extract_text()
41
+ if page_text:
42
+ text += page_text
43
  return text
44
 
45
+ # Function to convert text to speech using gTTS
46
+ def text_to_speech(text, language_code):
47
+ if not text or not isinstance(text, str):
48
+ logger.error("No valid text available for speech conversion.")
49
+ return None
50
+
51
+ try:
52
+ tts = gTTS(text, lang=language_code)
53
+ audio_fp = BytesIO() # In-memory file to store audio
54
+ tts.write_to_fp(audio_fp) # Write audio data to the in-memory file
55
+ audio_fp.seek(0) # Reset file pointer to the start
56
+
57
+ # Create a temporary file to save the audio data for Gradio
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
59
+ temp_audio_file.write(audio_fp.read()) # Write the audio data to the temp file
60
+ temp_audio_path = temp_audio_file.name # Store the path of the temporary file
61
+ return temp_audio_path # Return the file path
62
+ except Exception as e:
63
+ logger.error(f"Error during text-to-speech conversion: {e}")
64
+ return None
65
+
66
+ # Function to convert PDF text to audio via Cohere and gTTS
67
+ def pdf_to_audio(pdf_file, language_code):
68
  try:
69
  text = extract_text_from_pdf(pdf_file)
70
 
71
+ # Check if the extracted text is empty
72
+ if not text.strip():
73
+ logger.error("The PDF contains no extractable text.")
74
+ return "The PDF contains no extractable text. Please try a different file.", None
75
+
76
+ # Process the text with Cohere before audio generation
77
  response = cohere_client.generate(
78
+ model='c4ai-aya-23', # Using your specified model
79
  prompt=text,
80
  max_tokens=500 # Adjust based on your needs
81
  )
 
82
 
83
+ # Check if the response is valid
84
+ if not response or not response.generations:
85
+ logger.error("Cohere API did not return a valid response.")
86
+ return "Error: Cohere API did not return a valid response.", None
87
+
88
+ processed_text = response.generations[0].text.strip()
89
+
90
+ # Check if processed_text is valid
91
+ if not processed_text:
92
+ logger.error("Cohere generated an empty response.")
93
+ return "Error: Cohere generated an empty response.", None
94
+
95
+ # Convert the processed text to speech and return the file path
96
+ audio_file_path = text_to_speech(processed_text, language_code)
97
+
98
+ if audio_file_path is None:
99
+ return "Error: Failed to generate speech from the provided text.", None
100
 
101
+ return processed_text, audio_file_path # Return the text and the path to the audio file
102
  except Exception as e:
103
  logger.error(f"Error during PDF to audio conversion: {e}")
104
+ return "An error occurred while processing the PDF.", None
105
 
106
  # Gradio interface
107
+ def gradio_interface(pdf_file, language_code):
108
+ return pdf_to_audio(pdf_file, language_code)
109
 
110
+ # Launch the Gradio interface with file input, language dropdown, text output, and audio output
111
+ gr.Interface(
112
+ fn=gradio_interface,
113
+ inputs=[
114
+ "file",
115
+ gr.Dropdown(choices=language_options, label="Select Language")
116
+ ],
117
+ outputs=[
118
+ "text",
119
+ "audio"
120
+ ],
121
+ title="PDF to Audio using Cohere (Multi-language)"
122
+ ).launch(debug=True)