Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,15 +4,10 @@ import os
|
|
4 |
import threading
|
5 |
import base64
|
6 |
from io import BytesIO
|
7 |
-
from mistralai import Mistral # Pixtral-12B integration
|
8 |
|
9 |
-
# Initialize Groq client
|
10 |
client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
11 |
|
12 |
-
# Initialize Mistral AI client (Pixtral-12B-2409 for VQA)
|
13 |
-
mistral_client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
|
14 |
-
pixtral_model = "pixtral-12b-2409"
|
15 |
-
|
16 |
# Load Text-to-Image Models
|
17 |
model1 = gr.load("models/prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA")
|
18 |
model2 = gr.load("models/Purz/face-projection")
|
@@ -20,7 +15,6 @@ model2 = gr.load("models/Purz/face-projection")
|
|
20 |
# Stop event for threading (image generation)
|
21 |
stop_event = threading.Event()
|
22 |
|
23 |
-
|
24 |
# Convert PIL image to Base64
|
25 |
def pil_to_base64(pil_image, image_format='jpeg'):
|
26 |
buffered = BytesIO()
|
@@ -28,57 +22,7 @@ def pil_to_base64(pil_image, image_format='jpeg'):
|
|
28 |
base64_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
29 |
return base64_string, image_format
|
30 |
|
31 |
-
|
32 |
-
# Function to generate tutor output (lesson, question, feedback)
|
33 |
-
def generate_tutor_output(subject, difficulty, student_input):
|
34 |
-
prompt = f"""
|
35 |
-
You are an expert tutor in {subject} at the {difficulty} level.
|
36 |
-
The student has provided the following input: "{student_input}"
|
37 |
-
|
38 |
-
Please generate:
|
39 |
-
1. A brief, engaging lesson on the topic (2-3 paragraphs)
|
40 |
-
2. A thought-provoking question to check understanding
|
41 |
-
3. Constructive feedback on the student's input
|
42 |
-
|
43 |
-
Format your response as a JSON object with keys: "lesson", "question", "feedback"
|
44 |
-
"""
|
45 |
-
|
46 |
-
completion = client.chat.completions.create(
|
47 |
-
messages=[{
|
48 |
-
"role": "system",
|
49 |
-
"content": f"You are the world's best AI tutor, renowned for explaining complex concepts with clarity and examples. Your expertise in {subject} is unparalleled, and you're adept at tailoring your teaching to {difficulty} level students."
|
50 |
-
}, {
|
51 |
-
"role": "user",
|
52 |
-
"content": prompt,
|
53 |
-
}],
|
54 |
-
model="mixtral-8x7b-32768",
|
55 |
-
max_tokens=1000,
|
56 |
-
)
|
57 |
-
|
58 |
-
return completion.choices[0].message.content
|
59 |
-
|
60 |
-
|
61 |
-
# Function to generate images based on model selection
|
62 |
-
def generate_images(text, selected_model):
|
63 |
-
stop_event.clear()
|
64 |
-
|
65 |
-
model = model1 if selected_model == "Model 1 (Turbo Realism)" else model2 if selected_model == "Model 2 (Face Projection)" else None
|
66 |
-
if not model:
|
67 |
-
return ["Invalid model selection."] * 3
|
68 |
-
|
69 |
-
results = []
|
70 |
-
for i in range(3):
|
71 |
-
if stop_event.is_set():
|
72 |
-
return ["Image generation stopped by user."] * 3
|
73 |
-
|
74 |
-
modified_text = f"{text} variation {i+1}"
|
75 |
-
result = model(modified_text)
|
76 |
-
results.append(result)
|
77 |
-
|
78 |
-
return results
|
79 |
-
|
80 |
-
|
81 |
-
# Function for Visual Question Answering (Pixtral-12B)
|
82 |
def answer_question(text, image, temperature=0.0, max_tokens=1024):
|
83 |
base64_string, file_format = pil_to_base64(image)
|
84 |
|
@@ -92,8 +36,8 @@ def answer_question(text, image, temperature=0.0, max_tokens=1024):
|
|
92 |
}
|
93 |
]
|
94 |
|
95 |
-
chat_response =
|
96 |
-
model=
|
97 |
messages=messages,
|
98 |
temperature=temperature,
|
99 |
max_tokens=max_tokens
|
@@ -111,36 +55,8 @@ def clear_all():
|
|
111 |
with gr.Blocks() as demo:
|
112 |
gr.Markdown("# 🎓 AI Tutor & Visual Learning Assistant")
|
113 |
|
114 |
-
# Section
|
115 |
-
|
116 |
-
with gr.Column(scale=2):
|
117 |
-
subject = gr.Dropdown(["Math", "Science", "History", "Literature", "Code", "AI"], label="Subject")
|
118 |
-
difficulty = gr.Radio(["Beginner", "Intermediate", "Advanced"], label="Difficulty Level")
|
119 |
-
student_input = gr.Textbox(placeholder="Type your query here...", label="Your Input")
|
120 |
-
submit_button_text = gr.Button("Generate Lesson & Question", variant="primary")
|
121 |
-
|
122 |
-
with gr.Column(scale=3):
|
123 |
-
lesson_output = gr.Markdown(label="Lesson")
|
124 |
-
question_output = gr.Markdown(label="Comprehension Question")
|
125 |
-
feedback_output = gr.Markdown(label="Feedback")
|
126 |
-
|
127 |
-
# Section 2: Image Generation
|
128 |
-
with gr.Row():
|
129 |
-
with gr.Column(scale=2):
|
130 |
-
model_selector = gr.Radio(
|
131 |
-
["Model 1 (Turbo Realism)", "Model 2 (Face Projection)"],
|
132 |
-
label="Select Image Generation Model",
|
133 |
-
value="Model 1 (Turbo Realism)"
|
134 |
-
)
|
135 |
-
submit_button_visual = gr.Button("Generate Visuals", variant="primary")
|
136 |
-
|
137 |
-
with gr.Column(scale=3):
|
138 |
-
output1 = gr.Image(label="Generated Image 1")
|
139 |
-
output2 = gr.Image(label="Generated Image 2")
|
140 |
-
output3 = gr.Image(label="Generated Image 3")
|
141 |
-
|
142 |
-
# Section 3: Visual Question Answering (Pixtral-12B)
|
143 |
-
gr.Markdown("## 🖼️ Visual Question Answering (Pixtral-12B)")
|
144 |
with gr.Row():
|
145 |
with gr.Column(scale=2):
|
146 |
question = gr.Textbox(placeholder="Ask about the image...", lines=2)
|
@@ -150,26 +66,12 @@ with gr.Blocks() as demo:
|
|
150 |
max_tokens = gr.Slider(label="Max Tokens", minimum=128, maximum=2048, value=1024, step=128)
|
151 |
|
152 |
with gr.Column(scale=3):
|
153 |
-
output_text = gr.Textbox(lines=10, label="
|
154 |
|
155 |
with gr.Row():
|
156 |
clear_btn = gr.Button("Clear", variant="secondary")
|
157 |
submit_btn_vqa = gr.Button("Submit", variant="primary")
|
158 |
|
159 |
-
# Generate Text-based Output
|
160 |
-
submit_button_text.click(
|
161 |
-
fn=lambda subject, difficulty, student_input: eval(generate_tutor_output(subject, difficulty, student_input)),
|
162 |
-
inputs=[subject, difficulty, student_input],
|
163 |
-
outputs=[lesson_output, question_output, feedback_output]
|
164 |
-
)
|
165 |
-
|
166 |
-
# Generate Visual Output
|
167 |
-
submit_button_visual.click(
|
168 |
-
fn=generate_images,
|
169 |
-
inputs=[student_input, model_selector],
|
170 |
-
outputs=[output1, output2, output3]
|
171 |
-
)
|
172 |
-
|
173 |
# VQA Processing
|
174 |
submit_btn_vqa.click(
|
175 |
fn=answer_question,
|
|
|
4 |
import threading
|
5 |
import base64
|
6 |
from io import BytesIO
|
|
|
7 |
|
8 |
+
# Initialize Groq client (No need for Mistral API)
|
9 |
client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
10 |
|
|
|
|
|
|
|
|
|
11 |
# Load Text-to-Image Models
|
12 |
model1 = gr.load("models/prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA")
|
13 |
model2 = gr.load("models/Purz/face-projection")
|
|
|
15 |
# Stop event for threading (image generation)
|
16 |
stop_event = threading.Event()
|
17 |
|
|
|
18 |
# Convert PIL image to Base64
|
19 |
def pil_to_base64(pil_image, image_format='jpeg'):
|
20 |
buffered = BytesIO()
|
|
|
22 |
base64_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
23 |
return base64_string, image_format
|
24 |
|
25 |
+
# Function for Visual Question Answering (Groq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def answer_question(text, image, temperature=0.0, max_tokens=1024):
|
27 |
base64_string, file_format = pil_to_base64(image)
|
28 |
|
|
|
36 |
}
|
37 |
]
|
38 |
|
39 |
+
chat_response = client.chat.completions.create(
|
40 |
+
model="gemma2-9b-it", # Groq model for vision tasks
|
41 |
messages=messages,
|
42 |
temperature=temperature,
|
43 |
max_tokens=max_tokens
|
|
|
55 |
with gr.Blocks() as demo:
|
56 |
gr.Markdown("# 🎓 AI Tutor & Visual Learning Assistant")
|
57 |
|
58 |
+
# Section 3: Visual Question Answering (Groq)
|
59 |
+
gr.Markdown("## 🖼️ Visual Question Answering (Groq)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
with gr.Row():
|
61 |
with gr.Column(scale=2):
|
62 |
question = gr.Textbox(placeholder="Ask about the image...", lines=2)
|
|
|
66 |
max_tokens = gr.Slider(label="Max Tokens", minimum=128, maximum=2048, value=1024, step=128)
|
67 |
|
68 |
with gr.Column(scale=3):
|
69 |
+
output_text = gr.Textbox(lines=10, label="Groq VQA Response")
|
70 |
|
71 |
with gr.Row():
|
72 |
clear_btn = gr.Button("Clear", variant="secondary")
|
73 |
submit_btn_vqa = gr.Button("Submit", variant="primary")
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
# VQA Processing
|
76 |
submit_btn_vqa.click(
|
77 |
fn=answer_question,
|