SameerArz commited on
Commit
60ed045
·
verified ·
1 Parent(s): 68a1d11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -105
app.py CHANGED
@@ -4,15 +4,10 @@ import os
4
  import threading
5
  import base64
6
  from io import BytesIO
7
- from mistralai import Mistral # Pixtral-12B integration
8
 
9
- # Initialize Groq client
10
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
11
 
12
- # Initialize Mistral AI client (Pixtral-12B-2409 for VQA)
13
- mistral_client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
14
- pixtral_model = "pixtral-12b-2409"
15
-
16
  # Load Text-to-Image Models
17
  model1 = gr.load("models/prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA")
18
  model2 = gr.load("models/Purz/face-projection")
@@ -20,7 +15,6 @@ model2 = gr.load("models/Purz/face-projection")
20
  # Stop event for threading (image generation)
21
  stop_event = threading.Event()
22
 
23
-
24
  # Convert PIL image to Base64
25
  def pil_to_base64(pil_image, image_format='jpeg'):
26
  buffered = BytesIO()
@@ -28,57 +22,7 @@ def pil_to_base64(pil_image, image_format='jpeg'):
28
  base64_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
29
  return base64_string, image_format
30
 
31
-
32
- # Function to generate tutor output (lesson, question, feedback)
33
- def generate_tutor_output(subject, difficulty, student_input):
34
- prompt = f"""
35
- You are an expert tutor in {subject} at the {difficulty} level.
36
- The student has provided the following input: "{student_input}"
37
-
38
- Please generate:
39
- 1. A brief, engaging lesson on the topic (2-3 paragraphs)
40
- 2. A thought-provoking question to check understanding
41
- 3. Constructive feedback on the student's input
42
-
43
- Format your response as a JSON object with keys: "lesson", "question", "feedback"
44
- """
45
-
46
- completion = client.chat.completions.create(
47
- messages=[{
48
- "role": "system",
49
- "content": f"You are the world's best AI tutor, renowned for explaining complex concepts with clarity and examples. Your expertise in {subject} is unparalleled, and you're adept at tailoring your teaching to {difficulty} level students."
50
- }, {
51
- "role": "user",
52
- "content": prompt,
53
- }],
54
- model="mixtral-8x7b-32768",
55
- max_tokens=1000,
56
- )
57
-
58
- return completion.choices[0].message.content
59
-
60
-
61
- # Function to generate images based on model selection
62
- def generate_images(text, selected_model):
63
- stop_event.clear()
64
-
65
- model = model1 if selected_model == "Model 1 (Turbo Realism)" else model2 if selected_model == "Model 2 (Face Projection)" else None
66
- if not model:
67
- return ["Invalid model selection."] * 3
68
-
69
- results = []
70
- for i in range(3):
71
- if stop_event.is_set():
72
- return ["Image generation stopped by user."] * 3
73
-
74
- modified_text = f"{text} variation {i+1}"
75
- result = model(modified_text)
76
- results.append(result)
77
-
78
- return results
79
-
80
-
81
- # Function for Visual Question Answering (Pixtral-12B)
82
  def answer_question(text, image, temperature=0.0, max_tokens=1024):
83
  base64_string, file_format = pil_to_base64(image)
84
 
@@ -92,8 +36,8 @@ def answer_question(text, image, temperature=0.0, max_tokens=1024):
92
  }
93
  ]
94
 
95
- chat_response = mistral_client.chat.complete(
96
- model=pixtral_model,
97
  messages=messages,
98
  temperature=temperature,
99
  max_tokens=max_tokens
@@ -111,36 +55,8 @@ def clear_all():
111
  with gr.Blocks() as demo:
112
  gr.Markdown("# 🎓 AI Tutor & Visual Learning Assistant")
113
 
114
- # Section 1: Text-based Learning
115
- with gr.Row():
116
- with gr.Column(scale=2):
117
- subject = gr.Dropdown(["Math", "Science", "History", "Literature", "Code", "AI"], label="Subject")
118
- difficulty = gr.Radio(["Beginner", "Intermediate", "Advanced"], label="Difficulty Level")
119
- student_input = gr.Textbox(placeholder="Type your query here...", label="Your Input")
120
- submit_button_text = gr.Button("Generate Lesson & Question", variant="primary")
121
-
122
- with gr.Column(scale=3):
123
- lesson_output = gr.Markdown(label="Lesson")
124
- question_output = gr.Markdown(label="Comprehension Question")
125
- feedback_output = gr.Markdown(label="Feedback")
126
-
127
- # Section 2: Image Generation
128
- with gr.Row():
129
- with gr.Column(scale=2):
130
- model_selector = gr.Radio(
131
- ["Model 1 (Turbo Realism)", "Model 2 (Face Projection)"],
132
- label="Select Image Generation Model",
133
- value="Model 1 (Turbo Realism)"
134
- )
135
- submit_button_visual = gr.Button("Generate Visuals", variant="primary")
136
-
137
- with gr.Column(scale=3):
138
- output1 = gr.Image(label="Generated Image 1")
139
- output2 = gr.Image(label="Generated Image 2")
140
- output3 = gr.Image(label="Generated Image 3")
141
-
142
- # Section 3: Visual Question Answering (Pixtral-12B)
143
- gr.Markdown("## 🖼️ Visual Question Answering (Pixtral-12B)")
144
  with gr.Row():
145
  with gr.Column(scale=2):
146
  question = gr.Textbox(placeholder="Ask about the image...", lines=2)
@@ -150,26 +66,12 @@ with gr.Blocks() as demo:
150
  max_tokens = gr.Slider(label="Max Tokens", minimum=128, maximum=2048, value=1024, step=128)
151
 
152
  with gr.Column(scale=3):
153
- output_text = gr.Textbox(lines=10, label="Pixtral 12B Response")
154
 
155
  with gr.Row():
156
  clear_btn = gr.Button("Clear", variant="secondary")
157
  submit_btn_vqa = gr.Button("Submit", variant="primary")
158
 
159
- # Generate Text-based Output
160
- submit_button_text.click(
161
- fn=lambda subject, difficulty, student_input: eval(generate_tutor_output(subject, difficulty, student_input)),
162
- inputs=[subject, difficulty, student_input],
163
- outputs=[lesson_output, question_output, feedback_output]
164
- )
165
-
166
- # Generate Visual Output
167
- submit_button_visual.click(
168
- fn=generate_images,
169
- inputs=[student_input, model_selector],
170
- outputs=[output1, output2, output3]
171
- )
172
-
173
  # VQA Processing
174
  submit_btn_vqa.click(
175
  fn=answer_question,
 
4
  import threading
5
  import base64
6
  from io import BytesIO
 
7
 
8
+ # Initialize Groq client (No need for Mistral API)
9
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
10
 
 
 
 
 
11
  # Load Text-to-Image Models
12
  model1 = gr.load("models/prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA")
13
  model2 = gr.load("models/Purz/face-projection")
 
15
  # Stop event for threading (image generation)
16
  stop_event = threading.Event()
17
 
 
18
  # Convert PIL image to Base64
19
  def pil_to_base64(pil_image, image_format='jpeg'):
20
  buffered = BytesIO()
 
22
  base64_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
23
  return base64_string, image_format
24
 
25
+ # Function for Visual Question Answering (Groq)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def answer_question(text, image, temperature=0.0, max_tokens=1024):
27
  base64_string, file_format = pil_to_base64(image)
28
 
 
36
  }
37
  ]
38
 
39
+ chat_response = client.chat.completions.create(
40
+ model="gemma2-9b-it", # Groq model for vision tasks
41
  messages=messages,
42
  temperature=temperature,
43
  max_tokens=max_tokens
 
55
  with gr.Blocks() as demo:
56
  gr.Markdown("# 🎓 AI Tutor & Visual Learning Assistant")
57
 
58
+ # Section 3: Visual Question Answering (Groq)
59
+ gr.Markdown("## 🖼️ Visual Question Answering (Groq)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  with gr.Row():
61
  with gr.Column(scale=2):
62
  question = gr.Textbox(placeholder="Ask about the image...", lines=2)
 
66
  max_tokens = gr.Slider(label="Max Tokens", minimum=128, maximum=2048, value=1024, step=128)
67
 
68
  with gr.Column(scale=3):
69
+ output_text = gr.Textbox(lines=10, label="Groq VQA Response")
70
 
71
  with gr.Row():
72
  clear_btn = gr.Button("Clear", variant="secondary")
73
  submit_btn_vqa = gr.Button("Submit", variant="primary")
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # VQA Processing
76
  submit_btn_vqa.click(
77
  fn=answer_question,