Multi-modal-o1-Chatbot-Secure

Sleeping

App Files Files Community

shukdevdatta123 commited on about 1 month ago

Commit

90df5a7

verified ·

1 Parent(s): eddb24e

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -522

app.py DELETED Viewed

@@ -1,522 +0,0 @@
-import gradio as gr
-import openai
-import base64
-from PIL import Image
-import io
-import os
-import tempfile
-import fitz  # PyMuPDF for PDF handling
-# Function to extract text from PDF files
-def extract_text_from_pdf(pdf_file):
-    try:
-        text = ""
-        pdf_document = fitz.open(pdf_file)
-        for page_num in range(len(pdf_document)):
-            page = pdf_document[page_num]
-            text += page.get_text()
-        pdf_document.close()
-        return text
-    except Exception as e:
-        return f"Error extracting text from PDF: {str(e)}"
-# Function to generate MCQ quiz from PDF content
-def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
-    if not openai_api_key:
-        return "Error: No API key provided."
-    openai.api_key = openai_api_key
-    # Limit content length to avoid token limits
-    limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
-    prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
-For each question:
-1. Create a clear question based on key concepts in the document
-2. Provide 4 possible answers (A, B, C, D)
-3. Indicate the correct answer
-4. Briefly explain why the answer is correct
-Format the output clearly with each question numbered and separated.
-Document content:
-{limited_content}
-"""
-    try:
-        messages = [
-            {"role": "user", "content": prompt}
-        ]
-        response = openai.ChatCompletion.create(
-            model=model_choice,
-            messages=messages
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"Error generating quiz: {str(e)}"
-# Function to send the request to OpenAI API with an image, text or PDF input
-def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
-    if not openai_api_key:
-        return "Error: No API key provided."
-    openai.api_key = openai_api_key
-    # Process the input depending on whether it's text, image, or a PDF-related query
-    if pdf_content and input_text:
-        # For PDF queries, we combine the PDF content with the user's question
-        prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
-        input_content = prompt
-    elif image:
-        # Convert the image to base64 string
-        image_info = get_base64_string_from_image(image)
-        input_content = f"data:image/png;base64,{image_info}"
-    else:
-        # Plain text input
-        input_content = input_text
-    # Prepare the messages for OpenAI API
-    if model_choice == "o1":
-        if image and not pdf_content:
-            messages = [
-                {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
-            ]
-        else:
-            messages = [
-                {"role": "user", "content": input_content}
-            ]
-    elif model_choice == "o3-mini":
-        messages = [
-            {"role": "user", "content": input_content}
-        ]
-    try:
-        # Call OpenAI API with the selected model
-        response = openai.ChatCompletion.create(
-            model=model_choice,
-            messages=messages,
-            max_completion_tokens=2000
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"Error calling OpenAI API: {str(e)}"
-# Function to convert an uploaded image to a base64 string
-def get_base64_string_from_image(pil_image):
-    # Convert PIL Image to bytes
-    buffered = io.BytesIO()
-    pil_image.save(buffered, format="PNG")
-    img_bytes = buffered.getvalue()
-    base64_str = base64.b64encode(img_bytes).decode("utf-8")
-    return base64_str
-# Function to transcribe audio to text using OpenAI Whisper API
-def transcribe_audio(audio, openai_api_key):
-    if not openai_api_key:
-        return "Error: No API key provided."
-    openai.api_key = openai_api_key
-    try:
-        # Open the audio file and pass it as a file object
-        with open(audio, 'rb') as audio_file:
-            audio_file_content = audio_file.read()
-        # Use the correct transcription API call
-        audio_file_obj = io.BytesIO(audio_file_content)
-        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
-        # Transcribe the audio to text using OpenAI's whisper model
-        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
-        return audio_file_transcription.text
-    except Exception as e:
-        return f"Error transcribing audio: {str(e)}"
-# The function that will be used by Gradio interface
-def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
-    if history is None:
-        history = []
-    # If there's audio, transcribe it to text
-    if audio:
-        input_text = transcribe_audio(audio, openai_api_key)
-    # If a new PDF is uploaded, extract its text
-    new_pdf_content = pdf_content
-    if pdf_file is not None:
-        new_pdf_content = extract_text_from_pdf(pdf_file)
-    # Check if we're in PDF quiz mode
-    if pdf_quiz_mode:
-        if new_pdf_content:
-            # Generate MCQ quiz questions
-            quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
-            history.append((f"👤: [Uploaded PDF for Quiz - {int(num_quiz_questions)} questions]", f"🤖: {quiz_response}"))
-        else:
-            history.append(("👤: [Attempted to generate quiz without PDF]", "🤖: Please upload a PDF file to generate quiz questions."))
-    else:
-        # Regular chat mode - generate the response
-        response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
-        # Append the response to the history
-        if input_text:
-            history.append((f"👤: {input_text}", f"🤖: {response}"))
-        elif image is not None:
-            history.append((f"👤: [Uploaded image]", f"🤖: {response}"))
-        elif pdf_file is not None:
-            history.append((f"👤: [Uploaded PDF]", f"🤖: {response}"))
-        else:
-            history.append((f"👤: [No input provided]", f"🤖: Please provide some input (text, image, or PDF) for me to respond to."))
-    return "", None, None, None, new_pdf_content, history
-# Function to clear the chat history and PDF content
-def clear_history():
-    return "", None, None, None, "", []
-# Function to process a newly uploaded PDF
-def process_pdf(pdf_file):
-    if pdf_file is None:
-        return ""
-    return extract_text_from_pdf(pdf_file)
-# Function to update visible components based on input type selection
-def update_input_type(choice):
-    if choice == "Text":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
-    elif choice == "Image":
-        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
-    elif choice == "Voice":
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
-    elif choice == "PDF":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
-    elif choice == "PDF(QUIZ)":
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
-# Custom CSS styles with animations and button colors
-custom_css = """
-    /* General body styles */
-    .gradio-container {
-        font-family: 'Arial', sans-serif;
-        background-color: #f0f4f8; /* Lighter blue-gray background */
-        color: #2d3748;;
-    }
-    /* Header styles */
-    .gradio-header {
-        background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-        color: white;
-        padding: 20px;
-        text-align: center;
-        border-radius: 8px;
-        box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
-        animation: fadeIn 1s ease-out;
-    }
-    .gradio-header h1 {
-        font-size: 2.5rem;
-    }
-    .gradio-header h3 {
-        font-size: 1.2rem;
-        margin-top: 10px;
-    }
-    /* Chatbot container styles */
-    .gradio-chatbot {
-        background-color: #fff;
-        border-radius: 10px;
-        padding: 20px;
-        box-shadow: 0 6px 18px rgba(0, 0, 0, 0.1);
-        border-left: 4px solid #4a00e0; /* Accent border */
-    }
-    /* Input field styles */
-    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
-        border-radius: 8px;
-        border: 2px solid #e2e8f0;
-        background-color: #f8fafc;
-    }
-    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
-        border-color: #8e2de2;
-        box-shadow: 0 0 0 3px rgba(142, 45, 226, 0.2);
-    }
-    /* Button styles */
-    /* Send Button: Sky Blue */
-    #submit-btn {
-        background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 19px;
-        font-size: 1.1rem;
-        cursor: pointer;
-        transition: all 0.3s ease;
-        margin-left: auto;
-        margin-right: auto;
-        display: block;
-        margin-top: 10px;
-    }
-    #submit-btn:hover {
-        background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
-        box-shadow: 0 6px 8px rgba(74, 0, 224, 0.4);
-    }
-    #submit-btn:active {
-        transform: scale(0.95);
-    }
-    #clear-history {
-        background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 13px;
-        font-size: 1.1rem;
-        cursor: pointer;
-        transition: all 0.3s ease;
-        margin-top: 10px;
-    }
-    #clear-history:hover {
-        background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
-        box-shadow: 0 6px 8px rgba(229, 62, 62, 0.4);
-    }
-    #clear-history:active {
-        transform: scale(0.95);
-    }
-    /* Input type selector buttons */
-    #input-type-group {
-        display: flex;
-        justify-content: center;
-        gap: 10px;
-        margin-bottom: 20px;
-    }
-    .input-type-btn {
-        background-color: #718096; /* Slate gray */
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 15px;
-        font-size: 1rem;
-        cursor: pointer;
-        transition: all 0.3s ease;
-    }
-    .input-type-btn.selected {
-        background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-    }
-    .input-type-btn:hover {
-        background-color: #4a5568; /* Darker slate */
-    }
-    /* Chat history styles */
-    .gradio-chatbot .message {
-        margin-bottom: 10px;
-    }
-    .gradio-chatbot .user {
-        background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-        color: white;
-        padding: 10px;
-        border-radius: 12px;
-        max-width: 70%;
-        animation: slideInUser 0.5s ease-out;
-    }
-    .gradio-chatbot .assistant {
-        background-color: #f0f4f8; /* Light blue-gray */
-        color: #2d3748;
-        padding: 10px;
-        border-radius: 12px;
-        max-width: 70%;
-        margin-left: auto;
-        animation: slideInAssistant 0.5s ease-out;
-    }
-    /* Animation keyframes */
-    @keyframes fadeIn {
-        0% { opacity: 0; }
-        100% { opacity: 1; }
-    }
-    @keyframes slideInUser {
-        0% { transform: translateX(-100%); }
-        100% { transform: translateX(0); }
-    }
-    @keyframes slideInAssistant {
-        0% { transform: translateX(100%); }
-        100% { transform: translateX(0); }
-    }
-    /* Mobile responsiveness */
-    @media (max-width: 768px) {
-        .gradio-header h1 {
-            font-size: 1.8rem;
-        }
-        .gradio-header h3 {
-            font-size: 1rem;
-        }
-        .gradio-chatbot {
-            max-height: 400px;
-        }
-        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
-            width: 100%;
-        }
-        #submit-btn, #clear-history {
-            width: 100%;
-            margin-left: 0;
-        }
-    }
-"""
-# Gradio interface setup
-def create_interface():
-    with gr.Blocks(css=custom_css) as demo:
-        gr.Markdown("""
-            <div class="gradio-header">
-                <h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
-                <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
-            </div>
-        """)
-        # Add a description with an expandable accordion
-        with gr.Accordion("Click to expand for details", open=False):
-            gr.Markdown("""
-            ### Description:
-            This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
-            - You can ask questions or provide text, and the assistant will respond.
-            - You can upload an image, and the assistant will process it and answer questions about the image.
-            - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
-            - PDF support: Upload a PDF and ask questions about its content.
-            - PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
-            - Enter your OpenAI API key to start interacting with the model.
-            - You can use the 'Clear History' button to remove the conversation history.
-            - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
-            ### Reasoning Effort:
-            The reasoning effort controls how complex or detailed the assistant's answers should be.
-            - **Low**: Provides quick, concise answers with minimal reasoning or details.
-            - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
-            - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
-            """)
-        # Store PDF content as a state variable
-        pdf_content = gr.State("")
-        with gr.Row():
-            openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
-        # Input type selector
-        with gr.Row():
-            input_type = gr.Radio(
-                ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
-                label="Choose Input Type",
-                value="Text"
-            )
-        # Create the input components (initially text is visible, others are hidden)
-        with gr.Row():
-            # Text input
-            input_text = gr.Textbox(
-                label="Enter Text Question",
-                placeholder="Ask a question or provide text",
-                lines=2,
-                visible=True
-            )
-            # Image input
-            image_input = gr.Image(
-                label="Upload an Image",
-                type="pil",
-                visible=False
-            )
-            # Audio input
-            audio_input = gr.Audio(
-                label="Upload or Record Audio",
-                type="filepath",
-                visible=False
-            )
-            # PDF input
-            pdf_input = gr.File(
-                label="Upload your PDF",
-                file_types=[".pdf"],
-                visible=False
-            )
-            # Quiz specific components
-            quiz_questions_slider = gr.Slider(
-                minimum=1,
-                maximum=20,
-                value=5,
-                step=1,
-                label="Number of Quiz Questions",
-                visible=False
-            )
-            # Hidden state for quiz mode
-            quiz_mode = gr.Checkbox(
-                label="Quiz Mode",
-                visible=False,
-                value=False
-            )
-        with gr.Row():
-            reasoning_effort = gr.Dropdown(
-                label="Reasoning Effort",
-                choices=["low", "medium", "high"],
-                value="medium"
-            )
-            model_choice = gr.Dropdown(
-                label="Select Model",
-                choices=["o1", "o3-mini"],
-                value="o1"  # Default to 'o1' for image-related tasks
-            )
-            submit_btn = gr.Button("Ask!", elem_id="submit-btn")
-            clear_btn = gr.Button("Clear History", elem_id="clear-history")
-        chat_history = gr.Chatbot()
-        # Connect the input type selector to the update function
-        input_type.change(
-            fn=update_input_type,
-            inputs=[input_type],
-            outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
-        )
-        # Process PDF when uploaded
-        pdf_input.change(
-            fn=process_pdf,
-            inputs=[pdf_input],
-            outputs=[pdf_content]
-        )
-        # Button interactions
-        submit_btn.click(
-            fn=chatbot,
-            inputs=[
-                input_text,
-                image_input,
-                audio_input,
-                pdf_input,
-                openai_api_key,
-                reasoning_effort,
-                model_choice,
-                pdf_content,
-                quiz_questions_slider,
-                quiz_mode,
-                chat_history
-            ],
-            outputs=[
-                input_text,
-                image_input,
-                audio_input,
-                pdf_input,
-                pdf_content,
-                chat_history
-            ]
-        )
-        clear_btn.click(
-            fn=clear_history,
-            inputs=[],
-            outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
-        )
-    return demo
-# Run the interface
-if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()