arjunanand13 commited on
Commit
a1e5f87
·
verified ·
1 Parent(s): e9fc3af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -63,22 +63,60 @@
63
 
64
  import gradio as gr
65
  import google.generativeai as genai
66
- import whisper
 
67
  import os
68
  import tempfile
 
69
 
70
-
71
  genai.configure(api_key="AIzaSyBPQF0g5EfEPzEiGRzA3iNzJZK4jDukMvE")
72
 
 
73
  model = genai.GenerativeModel('gemini-pro')
74
 
75
- whisper_model = whisper.load_model("base")
76
-
77
  def transcribe_video(video_path):
78
  """Transcribe the audio from a video file."""
79
  try:
80
- result = whisper_model.transcribe(video_path)
81
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  except Exception as e:
83
  return f"Error transcribing video: {str(e)}"
84
 
@@ -119,20 +157,17 @@ def process_video(video, num_questions):
119
  with gr.Row():
120
  gr.Markdown("Processing video and generating summary and quiz...")
121
 
122
-
123
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
124
  video.save(temp_video.name)
125
  video_path = temp_video.name
126
-
127
- transcript = transcribe_video(video_path)
128
 
 
129
  result = generate_summary_and_quiz(transcript, num_questions)
130
 
131
  os.unlink(video_path)
132
 
133
  return transcript, result
134
 
135
-
136
  with gr.Blocks() as demo:
137
  gr.Markdown("# Video Lecture Summarizer and Quiz Generator")
138
 
 
63
 
64
  import gradio as gr
65
  import google.generativeai as genai
66
+ import speech_recognition as sr
67
+ from pydub import AudioSegment
68
  import os
69
  import tempfile
70
+ import moviepy.editor as mp
71
 
72
+ # Initialize Gemini API
73
  genai.configure(api_key="AIzaSyBPQF0g5EfEPzEiGRzA3iNzJZK4jDukMvE")
74
 
75
+ # Initialize the Gemini model
76
  model = genai.GenerativeModel('gemini-pro')
77
 
 
 
78
  def transcribe_video(video_path):
79
  """Transcribe the audio from a video file."""
80
  try:
81
+ # Extract audio from video
82
+ video = mp.VideoFileClip(video_path)
83
+ audio_path = tempfile.mktemp(suffix=".wav")
84
+ video.audio.write_audiofile(audio_path, codec='pcm_s16le')
85
+
86
+ # Load audio file
87
+ audio = AudioSegment.from_wav(audio_path)
88
+
89
+ # Initialize recognizer
90
+ r = sr.Recognizer()
91
+
92
+ # Split audio into chunks to handle long audio
93
+ chunk_length_ms = 30000 # 30 seconds
94
+ chunks = [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
95
+
96
+ transcript = ""
97
+ for i, chunk in enumerate(chunks):
98
+ # Export chunk to a temporary file
99
+ chunk_path = tempfile.mktemp(suffix=".wav")
100
+ chunk.export(chunk_path, format="wav")
101
+
102
+ # Recognize speech in the chunk
103
+ with sr.AudioFile(chunk_path) as source:
104
+ audio_listened = r.record(source)
105
+ try:
106
+ text = r.recognize_google(audio_listened)
107
+ transcript += text + " "
108
+ except sr.UnknownValueError:
109
+ print(f"Could not understand audio in chunk {i+1}")
110
+ except sr.RequestError:
111
+ print(f"Could not request results from Google Speech Recognition service for chunk {i+1}")
112
+
113
+ # Clean up temporary chunk file
114
+ os.remove(chunk_path)
115
+
116
+ # Clean up temporary audio file
117
+ os.remove(audio_path)
118
+
119
+ return transcript.strip()
120
  except Exception as e:
121
  return f"Error transcribing video: {str(e)}"
122
 
 
157
  with gr.Row():
158
  gr.Markdown("Processing video and generating summary and quiz...")
159
 
 
160
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
161
  video.save(temp_video.name)
162
  video_path = temp_video.name
 
 
163
 
164
+ transcript = transcribe_video(video_path)
165
  result = generate_summary_and_quiz(transcript, num_questions)
166
 
167
  os.unlink(video_path)
168
 
169
  return transcript, result
170
 
 
171
  with gr.Blocks() as demo:
172
  gr.Markdown("# Video Lecture Summarizer and Quiz Generator")
173