SameerArz commited on
Commit
d2c597b
·
verified ·
1 Parent(s): 52a55fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -151
app.py CHANGED
@@ -1,39 +1,22 @@
1
- #!/usr/bin/env python3
2
- import streamlit as st
3
- from gradio_client import Client
4
  from groq import Groq
5
- from PIL import Image
6
- import moviepy.editor as mp
7
- from natsort import natsorted
8
  import os
9
- from dotenv import load_dotenv
10
- import json
11
-
12
- # Load environment variables
13
- load_dotenv()
14
-
15
- # Constants
16
- HF_TOKEN = os.getenv("HF_TOKEN")
17
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
18
- # Switching to HF Inference API for stability
19
- IMAGE_GENERATION_API = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
20
-
21
- # Initialize Groq client
22
- try:
23
- groq_client = Groq(api_key=GROQ_API_KEY)
24
- except Exception as e:
25
- st.error(f"Failed to initialize Groq client: {e}")
26
- groq_client = None
27
-
28
- # LLM Models
29
- LLM_MODELS = {
30
- "Mixtral 8x7B (Groq)": "mixtral-8x7b-32768",
31
- "Mistral 7B (HF)": "mistralai/Mixtral-7B-Instruct-v0.1",
32
- "LLaMA 13B (HF)": "meta-llama/Llama-13b-hf"
33
- }
34
-
35
- # Utility Functions
36
- def generate_tutor_output(subject, difficulty, student_input, model):
37
  prompt = f"""
38
  You are an expert tutor in {subject} at the {difficulty} level.
39
  The student has provided the following input: "{student_input}"
@@ -46,129 +29,192 @@ def generate_tutor_output(subject, difficulty, student_input, model):
46
  Format your response as a JSON object with keys: "lesson", "question", "feedback"
47
  """
48
 
49
- if model.startswith("mixtral") and groq_client:
50
- try:
51
- completion = groq_client.chat.completions.create(
52
- messages=[{
53
- "role": "system",
54
- "content": f"You are the world's best AI tutor for {subject}, renowned for clear, engaging explanations."
55
- }, {
56
- "role": "user",
57
- "content": prompt
58
- }],
59
- model=model,
60
- max_tokens=1000
61
- )
62
- return json.loads(completion.choices[0].message.content)
63
- except Exception as e:
64
- st.error(f"Groq error: {e}")
65
- return {"lesson": "Sorry, unable to generate lesson due to API issue.", "question": "N/A", "feedback": "Please try again or check your input."}
 
 
 
 
 
66
  else:
67
- try:
68
- client = Client("https://api-inference.huggingface.co/models/" + model, hf_token=HF_TOKEN)
69
- response = client.predict(prompt, api_name="/generate")
70
- return json.loads(response)
71
- except:
72
- st.warning(f"HF model {model} failed, falling back to Mixtral.")
73
- if groq_client:
74
- return generate_tutor_output(subject, difficulty, student_input, "mixtral-8x7b-32768")
75
- return {"lesson": "Sorry, unable to generate lesson.", "question": "N/A", "feedback": "N/A"}
76
 
77
- def generate_image(prompt, path='temp_image.png'):
78
- try:
79
- client = Client(IMAGE_GENERATION_API, hf_token=HF_TOKEN)
80
- result = client.predict(prompt, api_name="/predict")
81
- if isinstance(result, str): # Handle file path or binary data
82
- image = Image.open(result)
83
- else:
84
- image = Image.open(result)
85
- image.save(path)
86
- return path
87
- except Exception as e:
88
- st.error(f"Error generating image: {e}")
89
- return None
90
 
91
- def generate_video(images, audio_text, language, speaker, path='temp_video.mp4'):
 
92
  try:
93
- if not images or all(img is None for img in images):
94
- st.error("No valid images to create video.")
95
- return None
96
- audio_client = Client("habib926653/Multilingual-TTS")
97
- audio_result = audio_client.predict(
98
- text=audio_text,
99
- language_code=language,
100
- speaker=speaker,
101
- api_name="/text_to_speech_edge"
 
 
 
102
  )
103
- audio_file = audio_result[1]
104
- with open(audio_file, 'rb') as f:
105
- audio_bytes = f.read()
106
- audio_path = "temp_audio.mp3"
107
- with open(audio_path, 'wb') as f:
108
- f.write(audio_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- audio_clip = mp.AudioFileClip(audio_path)
111
- duration_per_image = audio_clip.duration / len([img for img in images if img])
112
- image_clips = [mp.ImageClip(img).set_duration(duration_per_image) for img in images if img]
113
- if not image_clips:
114
- st.error("No image clips generated.")
115
- return None
116
- video = mp.concatenate_videoclips(image_clips, method="compose").set_audio(audio_clip)
117
- video.write_videofile(path, fps=24, codec='libx264')
118
- return path
 
 
 
 
119
  except Exception as e:
120
- st.error(f"Error generating video: {e}")
121
- return None
122
-
123
- # Streamlit App
124
- def main():
125
- st.markdown("<h1 style='text-align: center;'>EduAI: Your Interactive Tutor</h1>", unsafe_allow_html=True)
126
- st.markdown("<p style='text-align: center;'>Learn, Ask, Visualize! ❤️</p>", unsafe_allow_html=True)
127
-
128
- subject = st.selectbox("Choose Subject:", ["Math", "Science", "History", "Literature", "Code", "AI"])
129
- difficulty = st.selectbox("Difficulty Level:", ["Beginner", "Intermediate", "Advanced"])
130
- model = st.selectbox("Choose LLM Model:", list(LLM_MODELS.keys()))
131
- student_input = st.text_area("Your Question/Input (max 1500 chars):", max_chars=1500)
132
-
133
- if 'tutor_response' not in st.session_state:
134
- st.session_state.tutor_response = None
135
-
136
- if st.button("Generate Answer & Question"):
137
- if student_input:
138
- with st.spinner("Generating your lesson..."):
139
- response = generate_tutor_output(subject, difficulty, student_input, LLM_MODELS[model])
140
- st.session_state.tutor_response = response
141
- else:
142
- st.warning("Please provide an input!")
143
-
144
- if st.session_state.tutor_response:
145
- st.markdown("### Lesson")
146
- st.write(st.session_state.tutor_response["lesson"])
147
- st.markdown("### Comprehension Question")
148
- st.write(st.session_state.tutor_response["question"])
149
- st.markdown("### Feedback")
150
- st.write(st.session_state.tutor_response["feedback"])
151
-
152
- col1, col2 = st.columns(2)
153
- with col1:
154
- if st.button("Generate Image"):
155
- with st.spinner("Creating image..."):
156
- image_path = generate_image(st.session_state.tutor_response["lesson"])
157
- if image_path:
158
- st.image(image_path, caption="Visual of your lesson")
159
- with col2:
160
- if st.button("Generate Video"):
161
- with st.spinner("Creating video..."):
162
- audio_client = Client("habib926653/Multilingual-TTS")
163
- speakers_response = audio_client.predict(language="English", api_name="/get_speakers")
164
- speaker = speakers_response["choices"][0][0]
165
- images = [generate_image(st.session_state.tutor_response["lesson"])]
166
- video_path = generate_video(images, st.session_state.tutor_response["lesson"], "English", speaker)
167
- if video_path:
168
- st.video(video_path)
169
-
170
- st.markdown("---")
171
- st.markdown("<p style='text-align: center;'>Built for learning, powered by AI!</p>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  if __name__ == "__main__":
174
- main()
 
1
+ import gradio as gr
 
 
2
  from groq import Groq
 
 
 
3
  import os
4
+ import threading
5
+ from moviepy.editor import TextClip, concatenate_videoclips, AudioFileClip, ColorClip
6
+ import tempfile
7
+
8
+ # Initialize Groq client with your API key
9
+ client = Groq(api_key=os.environ["GROQ_API_KEY"])
10
+
11
+ # Load Text-to-Image Models
12
+ model1 = gr.load("models/prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA")
13
+ model2 = gr.load("models/Purz/face-projection")
14
+
15
+ # Stop event for threading (image generation)
16
+ stop_event = threading.Event()
17
+
18
+ # Function to generate tutor output (lesson, question, feedback)
19
+ def generate_tutor_output(subject, difficulty, student_input):
 
 
 
 
 
 
 
 
 
 
 
 
20
  prompt = f"""
21
  You are an expert tutor in {subject} at the {difficulty} level.
22
  The student has provided the following input: "{student_input}"
 
29
  Format your response as a JSON object with keys: "lesson", "question", "feedback"
30
  """
31
 
32
+ completion = client.chat.completions.create(
33
+ messages=[{
34
+ "role": "system",
35
+ "content": f"You are the world's best AI tutor, renowned for your ability to explain complex concepts in an engaging, clear, and memorable way and giving math examples. Your expertise in {subject} is unparalleled, and you're adept at tailoring your teaching to {difficulty} level students."
36
+ }, {
37
+ "role": "user",
38
+ "content": prompt,
39
+ }],
40
+ model="mixtral-8x7b-32768",
41
+ max_tokens=1000,
42
+ )
43
+
44
+ return completion.choices[0].message.content
45
+
46
+ # Function to generate images based on model selection
47
+ def generate_images(text, selected_model):
48
+ stop_event.clear()
49
+
50
+ if selected_model == "Model 1 (Turbo Realism)":
51
+ model = model1
52
+ elif selected_model == "Model 2 (Face Projection)":
53
+ model = model2
54
  else:
55
+ return ["Invalid model selection."] * 3
 
 
 
 
 
 
 
 
56
 
57
+ results = []
58
+ for i in range(3):
59
+ if stop_event.is_set():
60
+ return ["Image generation stopped by user."] * 3
61
+
62
+ modified_text = f"{text} variation {i+1}"
63
+ result = model(modified_text)
64
+ results.append(result)
65
+
66
+ return results
 
 
 
67
 
68
+ # New function to generate text-to-video with voice
69
+ def generate_text_to_video(text):
70
  try:
71
+ # Generate narration using Groq (text-to-speech simulation)
72
+ narration_prompt = f"Convert this text to a natural-sounding narration: {text}"
73
+ narration_response = client.chat.completions.create(
74
+ messages=[{
75
+ "role": "system",
76
+ "content": "You are an AI voice generator that produces natural, human-like speech."
77
+ }, {
78
+ "role": "user",
79
+ "content": narration_prompt,
80
+ }],
81
+ model="mixtral-8x7b-32768",
82
+ max_tokens=500,
83
  )
84
+ narration_text = narration_response.choices[0].message.content
85
+
86
+ # Simulate TTS by saving text as audio (placeholder; in reality, use a TTS API)
87
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
88
+ # Here, you'd use a real TTS service (e.g., Google TTS, ElevenLabs)
89
+ # For now, we'll simulate with a silent audio clip
90
+ audio_duration = len(narration_text.split()) / 2 # Rough estimate: 2 words per second
91
+ audio = ColorClip(size=(100, 100), color=(0, 0, 0), duration=audio_duration).set_audio(None)
92
+ audio.write_audiofile(temp_audio.name, fps=44100, logger=None)
93
+
94
+ # Create video clips from text
95
+ clips = []
96
+ words = narration_text.split()
97
+ chunk_size = 10 # Display 10 words at a time
98
+ for i in range(0, len(words), chunk_size):
99
+ chunk = " ".join(words[i:i + chunk_size])
100
+ clip = TextClip(chunk, fontsize=50, color='white', size=(1280, 720), bg_color='black')
101
+ clip = clip.set_duration(audio_duration / (len(words) / chunk_size)) # Evenly split duration
102
+ clips.append(clip)
103
+
104
+ # Concatenate clips into a single video
105
+ final_video = concatenate_videoclips(clips)
106
 
107
+ # Add audio to video
108
+ audio_clip = AudioFileClip(temp_audio.name)
109
+ final_video = final_video.set_audio(audio_clip)
110
+
111
+ # Save video to temporary file
112
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
113
+ final_video.write_videofile(temp_video.name, fps=24, logger=None)
114
+ video_path = temp_video.name
115
+
116
+ # Clean up temporary audio file
117
+ os.unlink(temp_audio.name)
118
+
119
+ return video_path
120
  except Exception as e:
121
+ return f"Error generating video: {str(e)}"
122
+
123
+ # Set up the Gradio interface
124
+ with gr.Blocks() as demo:
125
+ gr.Markdown("# 🎓 Your AI Tutor with Visuals & Images")
126
+
127
+ # Section for generating Text-based output (lesson, question, feedback)
128
+ with gr.Row():
129
+ with gr.Column(scale=2):
130
+ subject = gr.Dropdown(
131
+ ["Math", "Science", "History", "Literature", "Code", "AI"],
132
+ label="Subject",
133
+ info="Choose the subject of your lesson"
134
+ )
135
+ difficulty = gr.Radio(
136
+ ["Beginner", "Intermediate", "Advanced"],
137
+ label="Difficulty Level",
138
+ info="Select your proficiency level"
139
+ )
140
+ student_input = gr.Textbox(
141
+ placeholder="Type your query here...",
142
+ label="Your Input",
143
+ info="Enter the topic you want to learn"
144
+ )
145
+ submit_button_text = gr.Button("Generate Lesson & Question", variant="primary")
146
+
147
+ with gr.Column(scale=3):
148
+ lesson_output = gr.Markdown(label="Lesson")
149
+ question_output = gr.Markdown(label="Comprehension Question")
150
+ feedback_output = gr.Markdown(label="Feedback")
151
+
152
+ # Section for generating Visual output
153
+ with gr.Row():
154
+ with gr.Column(scale=2):
155
+ model_selector = gr.Radio(
156
+ ["Model 1 (Turbo Realism)", "Model 2 (Face Projection)"],
157
+ label="Select Image Generation Model",
158
+ value="Model 1 (Turbo Realism)"
159
+ )
160
+ submit_button_visual = gr.Button("Generate Visuals", variant="primary")
161
+ submit_button_video = gr.Button("Generate Video with Voice", variant="primary") # New button
162
+
163
+ with gr.Column(scale=3):
164
+ output1 = gr.Image(label="Generated Image 1")
165
+ output2 = gr.Image(label="Generated Image 2")
166
+ output3 = gr.Image(label="Generated Image 3")
167
+ video_output = gr.Video(label="Generated Video with Voice") # New video output
168
+
169
+ gr.Markdown("""
170
+ ### How to Use
171
+ 1. **Text Section**: Select a subject and difficulty, type your query, and click 'Generate Lesson & Question' to get your personalized lesson, comprehension question, and feedback.
172
+ 2. **Visual Section**: Select the model for image generation, then click 'Generate Visuals' to receive 3 variations of an image based on your topic. Click 'Generate Video with Voice' to create a video with narration.
173
+ 3. Review the AI-generated content to enhance your learning experience!
174
+ """)
175
+
176
+ def process_output_text(subject, difficulty, student_input):
177
+ try:
178
+ tutor_output = generate_tutor_output(subject, difficulty, student_input)
179
+ parsed = eval(tutor_output)
180
+ return parsed["lesson"], parsed["question"], parsed["feedback"]
181
+ except:
182
+ return "Error parsing output", "No question available", "No feedback available"
183
+
184
+ def process_output_visual(text, selected_model):
185
+ try:
186
+ images = generate_images(text, selected_model)
187
+ return images[0], images[1], images[2]
188
+ except:
189
+ return None, None, None
190
+
191
+ def process_output_video(text):
192
+ try:
193
+ video_path = generate_text_to_video(text)
194
+ return video_path
195
+ except:
196
+ return None
197
+
198
+ # Generate Text-based Output
199
+ submit_button_text.click(
200
+ fn=process_output_text,
201
+ inputs=[subject, difficulty, student_input],
202
+ outputs=[lesson_output, question_output, feedback_output]
203
+ )
204
+
205
+ # Generate Visual Output
206
+ submit_button_visual.click(
207
+ fn=process_output_visual,
208
+ inputs=[student_input, model_selector],
209
+ outputs=[output1, output2, output3]
210
+ )
211
+
212
+ # Generate Video Output
213
+ submit_button_video.click(
214
+ fn=process_output_video,
215
+ inputs=[student_input],
216
+ outputs=[video_output]
217
+ )
218
 
219
  if __name__ == "__main__":
220
+ demo.launch(server_name="0.0.0.0", server_port=7860)