MrSimple01 commited on
Commit
e039dbd
·
verified ·
1 Parent(s): 9a51efc

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +83 -0
  2. video_processing.py +78 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import gradio as gr
4
+ import tempfile
5
+ from video_processing import (
6
+ process_video_file,
7
+ )
8
+ from quiz_processing import process_text
9
+
10
+ ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", None)
11
+ GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY', None)
12
+
13
+ def combined_process_video_file(video_file, output_format, elevenlabs_api_key, model_id, google_api_key):
14
+ audio_path, audio_msg, transcript_file, transcript_msg, transcript_text, _ = process_video_file(
15
+ video_file, output_format, elevenlabs_api_key, model_id
16
+ )
17
+
18
+ if not transcript_text:
19
+ return audio_path, audio_msg, transcript_file, transcript_msg, "No transcript text to analyze", None, None
20
+
21
+ formatted_quiz, quiz_file, json_file = process_text(transcript_text, google_api_key)
22
+
23
+ return audio_path, audio_msg, transcript_file, transcript_msg, formatted_quiz, quiz_file, json_file
24
+
25
+ # Create the Gradio interface
26
+ with gr.Blocks(title="Video to Quiz Generator") as app:
27
+ gr.Markdown("Video => Quiz")
28
+ gr.Markdown("Upload a video or provide a URL to extract audio, transcribe, and automatically generate a quiz with topics, key concepts, summaries, and questions.")
29
+
30
+ with gr.Row():
31
+ with gr.Column():
32
+ elevenlabs_api_key = gr.Textbox(
33
+ placeholder="Enter your ElevenLabs API key",
34
+ label="ElevenLabs API Key (for transcription)",
35
+ type="password",
36
+ value=ELEVENLABS_API_KEY
37
+ )
38
+
39
+ model_id = gr.Dropdown(
40
+ choices=["scribe_v1"],
41
+ value="scribe_v1",
42
+ label="Transcription Model"
43
+ )
44
+
45
+ google_api_key = gr.Textbox(
46
+ placeholder="Enter your Google Gemini API key",
47
+ label="Google API Key (for quiz generation)",
48
+ type="password",
49
+ value=GOOGLE_API_KEY
50
+ )
51
+
52
+ with gr.Tabs():
53
+ with gr.TabItem("Upload Video"):
54
+ with gr.Row():
55
+ with gr.Column():
56
+ video_input = gr.Video(label="Upload Video")
57
+ format_choice_file = gr.Radio(["mp3", "wav"], value="mp3", label="Audio Format")
58
+ extract_button_file = gr.Button("Process Video & Generate Quiz")
59
+
60
+ with gr.Column():
61
+ audio_output_file = gr.Audio(label="Extracted Audio", type="filepath")
62
+ status_output_file = gr.Textbox(label="Audio Extraction Status")
63
+ transcript_file_output = gr.File(label="Transcription Text File")
64
+ transcript_status_output = gr.Textbox(label="Transcription Status")
65
+
66
+ with gr.Row():
67
+ with gr.Column():
68
+ quiz_output_file = gr.Textbox(
69
+ label="Generated Quiz",
70
+ lines=15
71
+ )
72
+ with gr.Row():
73
+ quiz_file_output_file = gr.File(label="Download Quiz Text")
74
+ json_file_output_file = gr.File(label="Download Quiz JSON")
75
+
76
+ extract_button_file.click(
77
+ fn=combined_process_video_file,
78
+ inputs=[video_input, format_choice_file, elevenlabs_api_key, model_id, google_api_key],
79
+ outputs=[audio_output_file, status_output_file, transcript_file_output,
80
+ transcript_status_output, quiz_output_file, quiz_file_output_file, json_file_output_file]
81
+ )
82
+ if __name__ == "__main__":
83
+ app.launch(share= True, debug = True)
video_processing.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import uuid
5
+ from moviepy import VideoFileClip
6
+
7
+ def extract_audio(video_path, output_format="mp3"):
8
+ if not video_path:
9
+ return None, "No video provided"
10
+
11
+ output_path = f"extracted_audio_{uuid.uuid4().hex[:8]}.{output_format}"
12
+
13
+ try:
14
+ video = VideoFileClip(video_path)
15
+ video.audio.write_audiofile(output_path)
16
+ video.close()
17
+ return output_path, f"Audio extracted successfully"
18
+ except Exception as e:
19
+ return None, f"Error extracting audio: {str(e)}"
20
+
21
+ def transcribe_audio(audio_file, api_key, model_id="scribe_v1"):
22
+ if not api_key:
23
+ return {"error": "Please provide an API key"}
24
+
25
+ url = "https://api.elevenlabs.io/v1/speech-to-text"
26
+ headers = {
27
+ "xi-api-key": api_key
28
+ }
29
+ files = {
30
+ "file": open(audio_file, "rb"),
31
+ "model_id": (None, model_id)
32
+ }
33
+
34
+ try:
35
+ response = requests.post(url, headers=headers, files=files)
36
+ response.raise_for_status()
37
+ result = response.json()
38
+ return result
39
+ except requests.exceptions.RequestException as e:
40
+ return {"error": f"API request failed: {str(e)}"}
41
+ except json.JSONDecodeError:
42
+ return {"error": "Failed to parse API response"}
43
+ finally:
44
+ files["file"].close()
45
+
46
+ def save_transcription(transcription):
47
+ if "error" in transcription:
48
+ return None, transcription["error"]
49
+
50
+ transcript_filename = f"transcription_{uuid.uuid4().hex[:8]}.txt"
51
+
52
+ try:
53
+ with open(transcript_filename, "w", encoding="utf-8") as f:
54
+ f.write(transcription.get('text', 'No text found'))
55
+
56
+ return transcript_filename, "Transcription saved as text file"
57
+ except Exception as e:
58
+ return None, f"Error saving transcription: {str(e)}"
59
+
60
+ def process_video_file(video_file, output_format, elevenlabs_api_key, model_id):
61
+ if video_file is None:
62
+ return None, "Please upload a video file", None, "No video provided", None, None
63
+
64
+ audio_path, message = extract_audio(video_file, output_format)
65
+
66
+ if audio_path and os.path.exists(audio_path):
67
+ transcription = transcribe_audio(audio_path, elevenlabs_api_key, model_id)
68
+ transcript_file, transcript_message = save_transcription(transcription)
69
+
70
+ transcript_text = ""
71
+ if transcript_file:
72
+ with open(transcript_file, "r", encoding="utf-8") as f:
73
+ transcript_text = f.read()
74
+
75
+ return audio_path, message, transcript_file, transcript_message, transcript_text, None
76
+ else:
77
+ return None, message, None, "Audio extraction failed, cannot transcribe", None, None
78
+