shukdevdatta123 commited on
Commit
90df5a7
·
verified ·
1 Parent(s): eddb24e

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -522
app.py DELETED
@@ -1,522 +0,0 @@
1
- import gradio as gr
2
- import openai
3
- import base64
4
- from PIL import Image
5
- import io
6
- import os
7
- import tempfile
8
- import fitz # PyMuPDF for PDF handling
9
-
10
- # Function to extract text from PDF files
11
- def extract_text_from_pdf(pdf_file):
12
- try:
13
- text = ""
14
- pdf_document = fitz.open(pdf_file)
15
-
16
- for page_num in range(len(pdf_document)):
17
- page = pdf_document[page_num]
18
- text += page.get_text()
19
-
20
- pdf_document.close()
21
- return text
22
- except Exception as e:
23
- return f"Error extracting text from PDF: {str(e)}"
24
-
25
- # Function to generate MCQ quiz from PDF content
26
- def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
27
- if not openai_api_key:
28
- return "Error: No API key provided."
29
-
30
- openai.api_key = openai_api_key
31
-
32
- # Limit content length to avoid token limits
33
- limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
34
-
35
- prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
36
- For each question:
37
- 1. Create a clear question based on key concepts in the document
38
- 2. Provide 4 possible answers (A, B, C, D)
39
- 3. Indicate the correct answer
40
- 4. Briefly explain why the answer is correct
41
-
42
- Format the output clearly with each question numbered and separated.
43
-
44
- Document content:
45
- {limited_content}
46
- """
47
-
48
- try:
49
- messages = [
50
- {"role": "user", "content": prompt}
51
- ]
52
-
53
- response = openai.ChatCompletion.create(
54
- model=model_choice,
55
- messages=messages
56
- )
57
-
58
- return response.choices[0].message.content
59
- except Exception as e:
60
- return f"Error generating quiz: {str(e)}"
61
-
62
- # Function to send the request to OpenAI API with an image, text or PDF input
63
- def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
64
- if not openai_api_key:
65
- return "Error: No API key provided."
66
-
67
- openai.api_key = openai_api_key
68
-
69
- # Process the input depending on whether it's text, image, or a PDF-related query
70
- if pdf_content and input_text:
71
- # For PDF queries, we combine the PDF content with the user's question
72
- prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
73
- input_content = prompt
74
- elif image:
75
- # Convert the image to base64 string
76
- image_info = get_base64_string_from_image(image)
77
- input_content = f"data:image/png;base64,{image_info}"
78
- else:
79
- # Plain text input
80
- input_content = input_text
81
-
82
- # Prepare the messages for OpenAI API
83
- if model_choice == "o1":
84
- if image and not pdf_content:
85
- messages = [
86
- {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
87
- ]
88
- else:
89
- messages = [
90
- {"role": "user", "content": input_content}
91
- ]
92
- elif model_choice == "o3-mini":
93
- messages = [
94
- {"role": "user", "content": input_content}
95
- ]
96
-
97
- try:
98
- # Call OpenAI API with the selected model
99
- response = openai.ChatCompletion.create(
100
- model=model_choice,
101
- messages=messages,
102
- max_completion_tokens=2000
103
- )
104
-
105
- return response.choices[0].message.content
106
- except Exception as e:
107
- return f"Error calling OpenAI API: {str(e)}"
108
-
109
- # Function to convert an uploaded image to a base64 string
110
- def get_base64_string_from_image(pil_image):
111
- # Convert PIL Image to bytes
112
- buffered = io.BytesIO()
113
- pil_image.save(buffered, format="PNG")
114
- img_bytes = buffered.getvalue()
115
- base64_str = base64.b64encode(img_bytes).decode("utf-8")
116
- return base64_str
117
-
118
- # Function to transcribe audio to text using OpenAI Whisper API
119
- def transcribe_audio(audio, openai_api_key):
120
- if not openai_api_key:
121
- return "Error: No API key provided."
122
-
123
- openai.api_key = openai_api_key
124
-
125
- try:
126
- # Open the audio file and pass it as a file object
127
- with open(audio, 'rb') as audio_file:
128
- audio_file_content = audio_file.read()
129
-
130
- # Use the correct transcription API call
131
- audio_file_obj = io.BytesIO(audio_file_content)
132
- audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
133
-
134
- # Transcribe the audio to text using OpenAI's whisper model
135
- audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
136
- return audio_file_transcription.text
137
- except Exception as e:
138
- return f"Error transcribing audio: {str(e)}"
139
-
140
- # The function that will be used by Gradio interface
141
- def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
142
- if history is None:
143
- history = []
144
-
145
- # If there's audio, transcribe it to text
146
- if audio:
147
- input_text = transcribe_audio(audio, openai_api_key)
148
-
149
- # If a new PDF is uploaded, extract its text
150
- new_pdf_content = pdf_content
151
- if pdf_file is not None:
152
- new_pdf_content = extract_text_from_pdf(pdf_file)
153
-
154
- # Check if we're in PDF quiz mode
155
- if pdf_quiz_mode:
156
- if new_pdf_content:
157
- # Generate MCQ quiz questions
158
- quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
159
- history.append((f"👤: [Uploaded PDF for Quiz - {int(num_quiz_questions)} questions]", f"🤖: {quiz_response}"))
160
- else:
161
- history.append(("👤: [Attempted to generate quiz without PDF]", "🤖: Please upload a PDF file to generate quiz questions."))
162
- else:
163
- # Regular chat mode - generate the response
164
- response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
165
-
166
- # Append the response to the history
167
- if input_text:
168
- history.append((f"👤: {input_text}", f"🤖: {response}"))
169
- elif image is not None:
170
- history.append((f"👤: [Uploaded image]", f"🤖: {response}"))
171
- elif pdf_file is not None:
172
- history.append((f"👤: [Uploaded PDF]", f"🤖: {response}"))
173
- else:
174
- history.append((f"👤: [No input provided]", f"🤖: Please provide some input (text, image, or PDF) for me to respond to."))
175
-
176
- return "", None, None, None, new_pdf_content, history
177
-
178
- # Function to clear the chat history and PDF content
179
- def clear_history():
180
- return "", None, None, None, "", []
181
-
182
- # Function to process a newly uploaded PDF
183
- def process_pdf(pdf_file):
184
- if pdf_file is None:
185
- return ""
186
- return extract_text_from_pdf(pdf_file)
187
-
188
- # Function to update visible components based on input type selection
189
- def update_input_type(choice):
190
- if choice == "Text":
191
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
192
- elif choice == "Image":
193
- return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
194
- elif choice == "Voice":
195
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
196
- elif choice == "PDF":
197
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
198
- elif choice == "PDF(QUIZ)":
199
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
200
-
201
- # Custom CSS styles with animations and button colors
202
- custom_css = """
203
- /* General body styles */
204
- .gradio-container {
205
- font-family: 'Arial', sans-serif;
206
- background-color: #f0f4f8; /* Lighter blue-gray background */
207
- color: #2d3748;;
208
- }
209
- /* Header styles */
210
- .gradio-header {
211
- background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
212
- color: white;
213
- padding: 20px;
214
- text-align: center;
215
- border-radius: 8px;
216
- box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
217
- animation: fadeIn 1s ease-out;
218
- }
219
- .gradio-header h1 {
220
- font-size: 2.5rem;
221
- }
222
- .gradio-header h3 {
223
- font-size: 1.2rem;
224
- margin-top: 10px;
225
- }
226
- /* Chatbot container styles */
227
- .gradio-chatbot {
228
- background-color: #fff;
229
- border-radius: 10px;
230
- padding: 20px;
231
- box-shadow: 0 6px 18px rgba(0, 0, 0, 0.1);
232
- border-left: 4px solid #4a00e0; /* Accent border */
233
- }
234
- /* Input field styles */
235
- .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
236
- border-radius: 8px;
237
- border: 2px solid #e2e8f0;
238
- background-color: #f8fafc;
239
- }
240
- .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
241
- border-color: #8e2de2;
242
- box-shadow: 0 0 0 3px rgba(142, 45, 226, 0.2);
243
- }
244
- /* Button styles */
245
- /* Send Button: Sky Blue */
246
- #submit-btn {
247
- background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
248
- color: white;
249
- border: none;
250
- border-radius: 8px;
251
- padding: 10px 19px;
252
- font-size: 1.1rem;
253
- cursor: pointer;
254
- transition: all 0.3s ease;
255
- margin-left: auto;
256
- margin-right: auto;
257
- display: block;
258
- margin-top: 10px;
259
- }
260
- #submit-btn:hover {
261
- background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
262
- box-shadow: 0 6px 8px rgba(74, 0, 224, 0.4);
263
- }
264
- #submit-btn:active {
265
- transform: scale(0.95);
266
- }
267
- #clear-history {
268
- background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
269
- color: white;
270
- border: none;
271
- border-radius: 8px;
272
- padding: 10px 13px;
273
- font-size: 1.1rem;
274
- cursor: pointer;
275
- transition: all 0.3s ease;
276
- margin-top: 10px;
277
- }
278
- #clear-history:hover {
279
- background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
280
- box-shadow: 0 6px 8px rgba(229, 62, 62, 0.4);
281
- }
282
- #clear-history:active {
283
- transform: scale(0.95);
284
- }
285
- /* Input type selector buttons */
286
- #input-type-group {
287
- display: flex;
288
- justify-content: center;
289
- gap: 10px;
290
- margin-bottom: 20px;
291
- }
292
- .input-type-btn {
293
- background-color: #718096; /* Slate gray */
294
- color: white;
295
- border: none;
296
- border-radius: 8px;
297
- padding: 10px 15px;
298
- font-size: 1rem;
299
- cursor: pointer;
300
- transition: all 0.3s ease;
301
- }
302
- .input-type-btn.selected {
303
- background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
304
- }
305
- .input-type-btn:hover {
306
- background-color: #4a5568; /* Darker slate */
307
- }
308
- /* Chat history styles */
309
- .gradio-chatbot .message {
310
- margin-bottom: 10px;
311
- }
312
- .gradio-chatbot .user {
313
- background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
314
- color: white;
315
- padding: 10px;
316
- border-radius: 12px;
317
- max-width: 70%;
318
- animation: slideInUser 0.5s ease-out;
319
- }
320
- .gradio-chatbot .assistant {
321
- background-color: #f0f4f8; /* Light blue-gray */
322
- color: #2d3748;
323
- padding: 10px;
324
- border-radius: 12px;
325
- max-width: 70%;
326
- margin-left: auto;
327
- animation: slideInAssistant 0.5s ease-out;
328
- }
329
- /* Animation keyframes */
330
- @keyframes fadeIn {
331
- 0% { opacity: 0; }
332
- 100% { opacity: 1; }
333
- }
334
- @keyframes slideInUser {
335
- 0% { transform: translateX(-100%); }
336
- 100% { transform: translateX(0); }
337
- }
338
- @keyframes slideInAssistant {
339
- 0% { transform: translateX(100%); }
340
- 100% { transform: translateX(0); }
341
- }
342
- /* Mobile responsiveness */
343
- @media (max-width: 768px) {
344
- .gradio-header h1 {
345
- font-size: 1.8rem;
346
- }
347
- .gradio-header h3 {
348
- font-size: 1rem;
349
- }
350
- .gradio-chatbot {
351
- max-height: 400px;
352
- }
353
- .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
354
- width: 100%;
355
- }
356
- #submit-btn, #clear-history {
357
- width: 100%;
358
- margin-left: 0;
359
- }
360
- }
361
- """
362
-
363
- # Gradio interface setup
364
- def create_interface():
365
- with gr.Blocks(css=custom_css) as demo:
366
- gr.Markdown("""
367
- <div class="gradio-header">
368
- <h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
369
- <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
370
- </div>
371
- """)
372
-
373
- # Add a description with an expandable accordion
374
- with gr.Accordion("Click to expand for details", open=False):
375
- gr.Markdown("""
376
- ### Description:
377
- This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
378
- - You can ask questions or provide text, and the assistant will respond.
379
- - You can upload an image, and the assistant will process it and answer questions about the image.
380
- - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
381
- - PDF support: Upload a PDF and ask questions about its content.
382
- - PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
383
- - Enter your OpenAI API key to start interacting with the model.
384
- - You can use the 'Clear History' button to remove the conversation history.
385
- - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
386
- ### Reasoning Effort:
387
- The reasoning effort controls how complex or detailed the assistant's answers should be.
388
- - **Low**: Provides quick, concise answers with minimal reasoning or details.
389
- - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
390
- - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
391
- """)
392
-
393
- # Store PDF content as a state variable
394
- pdf_content = gr.State("")
395
-
396
- with gr.Row():
397
- openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
398
-
399
- # Input type selector
400
- with gr.Row():
401
- input_type = gr.Radio(
402
- ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
403
- label="Choose Input Type",
404
- value="Text"
405
- )
406
-
407
- # Create the input components (initially text is visible, others are hidden)
408
- with gr.Row():
409
- # Text input
410
- input_text = gr.Textbox(
411
- label="Enter Text Question",
412
- placeholder="Ask a question or provide text",
413
- lines=2,
414
- visible=True
415
- )
416
-
417
- # Image input
418
- image_input = gr.Image(
419
- label="Upload an Image",
420
- type="pil",
421
- visible=False
422
- )
423
-
424
- # Audio input
425
- audio_input = gr.Audio(
426
- label="Upload or Record Audio",
427
- type="filepath",
428
- visible=False
429
- )
430
-
431
- # PDF input
432
- pdf_input = gr.File(
433
- label="Upload your PDF",
434
- file_types=[".pdf"],
435
- visible=False
436
- )
437
-
438
- # Quiz specific components
439
- quiz_questions_slider = gr.Slider(
440
- minimum=1,
441
- maximum=20,
442
- value=5,
443
- step=1,
444
- label="Number of Quiz Questions",
445
- visible=False
446
- )
447
-
448
- # Hidden state for quiz mode
449
- quiz_mode = gr.Checkbox(
450
- label="Quiz Mode",
451
- visible=False,
452
- value=False
453
- )
454
-
455
- with gr.Row():
456
- reasoning_effort = gr.Dropdown(
457
- label="Reasoning Effort",
458
- choices=["low", "medium", "high"],
459
- value="medium"
460
- )
461
- model_choice = gr.Dropdown(
462
- label="Select Model",
463
- choices=["o1", "o3-mini"],
464
- value="o1" # Default to 'o1' for image-related tasks
465
- )
466
- submit_btn = gr.Button("Ask!", elem_id="submit-btn")
467
- clear_btn = gr.Button("Clear History", elem_id="clear-history")
468
-
469
- chat_history = gr.Chatbot()
470
-
471
- # Connect the input type selector to the update function
472
- input_type.change(
473
- fn=update_input_type,
474
- inputs=[input_type],
475
- outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
476
- )
477
-
478
- # Process PDF when uploaded
479
- pdf_input.change(
480
- fn=process_pdf,
481
- inputs=[pdf_input],
482
- outputs=[pdf_content]
483
- )
484
-
485
- # Button interactions
486
- submit_btn.click(
487
- fn=chatbot,
488
- inputs=[
489
- input_text,
490
- image_input,
491
- audio_input,
492
- pdf_input,
493
- openai_api_key,
494
- reasoning_effort,
495
- model_choice,
496
- pdf_content,
497
- quiz_questions_slider,
498
- quiz_mode,
499
- chat_history
500
- ],
501
- outputs=[
502
- input_text,
503
- image_input,
504
- audio_input,
505
- pdf_input,
506
- pdf_content,
507
- chat_history
508
- ]
509
- )
510
-
511
- clear_btn.click(
512
- fn=clear_history,
513
- inputs=[],
514
- outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
515
- )
516
-
517
- return demo
518
-
519
- # Run the interface
520
- if __name__ == "__main__":
521
- demo = create_interface()
522
- demo.launch()