Spaces:

eagle0504
/

test2

Sleeping

File size: 10,611 Bytes

bac2ab2
 
7f55fea
bac2ab2
 
7f55fea
bac2ab2
 
 
7f55fea
bac2ab2
 
 
7f55fea
bac2ab2
 
 
7f55fea
bac2ab2
 
 
 
7f55fea
bac2ab2
7f55fea
 
bac2ab2
 
 
 
 
 
 
 
 
7f55fea
bac2ab2
7f55fea
bac2ab2
 
7f55fea
bac2ab2
7f55fea
bac2ab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f55fea
bac2ab2

import os

import gradio as gr
from openai import OpenAI
import time

# Initialize OpenAI client
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
client = OpenAI(api_key = OPENAI_API_KEY)  # Assumes OPENAI_API_KEY is set in environment

def chat_with_gpt(message, history, system_prompt):
    # Convert Gradio history format to OpenAI messages format
    messages = []

    # Add system prompt if provided
    if system_prompt.strip():
        messages.append({"role": "system", "content": system_prompt.strip()})

    # Add conversation history
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})

    # Add current user message
    messages.append({"role": "user", "content": message})

    try:
        # Call OpenAI API with full conversation context
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # or "gpt-4o" for better quality
            messages=messages,
            max_tokens=500,
            temperature=0.7,
            top_p=0.9
        )

        assistant_response = response.choices[0].message.content

        # Update history with new exchange
        history.append((message, assistant_response))

        return history, ""  # Return updated history and clear input

    except Exception as e:
        error_msg = f"Error: {str(e)}"
        history.append((message, error_msg))
        return history, ""

def evaluate_conversation(history, system_prompt, evaluation_metrics, progress=gr.Progress()):
    if not history:
        return "❌ No conversation to evaluate. Please have a conversation first."

    # Initialize progress
    progress(0, desc="Starting evaluation...")

    # Prepare the conversation transcript
    progress(0.2, desc="Preparing conversation transcript...")
    conversation_text = ""
    if system_prompt.strip():
        conversation_text += f"System Prompt: {system_prompt}\n\n"

    conversation_text += "Conversation:\n"
    for i, (user_msg, assistant_msg) in enumerate(history, 1):
        conversation_text += f"Turn {i}:\n"
        conversation_text += f"User: {user_msg}\n"
        conversation_text += f"Assistant: {assistant_msg}\n\n"

    # Create evaluation prompt
    progress(0.4, desc="Crafting evaluation prompt...")
    evaluation_prompt = f"""Please evaluate the following conversation based on these specific criteria:

{evaluation_metrics}

CONVERSATION TO EVALUATE:
{conversation_text}

Please provide a detailed evaluation report that:
1. Scores each criterion on a scale of 1-10
2. Provides specific examples from the conversation to support your scores
3. Offers constructive feedback for improvement
4. Gives an overall assessment

Format your response clearly with headings for each evaluation criterion."""

    try:
        # Call OpenAI API for evaluation
        progress(0.6, desc="Sending request to OpenAI...")
        response = client.chat.completions.create(
            model="gpt-4o",  # Use better model for evaluation
            messages=[
                {"role": "system", "content": "You are an expert conversation analyst. Provide thorough, objective evaluations with specific examples and actionable feedback."},
                {"role": "user", "content": evaluation_prompt}
            ],
            max_tokens=1000,
            temperature=0.3  # Lower temperature for more consistent evaluation
        )

        progress(0.9, desc="Processing evaluation results...")

        # Format the response nicely
        evaluation_result = f"""# 📊 Conversation Evaluation Report

{response.choices[0].message.content}

---
*Evaluation completed at {time.strftime('%Y-%m-%d %H:%M:%S')}*
*Conversation length: {len(history)} exchanges*
"""

        progress(1.0, desc="Evaluation complete!")
        return evaluation_result

    except Exception as e:
        progress(1.0, desc="Evaluation failed")
        return f"❌ **Error during evaluation:** {str(e)}"

def start_evaluation():
    """Return initial evaluation status"""
    return "🔄 **Evaluating conversation...** \n\nPlease wait while we analyze your conversation. This may take 10-30 seconds depending on conversation length."

def reset_conversation():
    return [], "", "No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback."  # Clear history, input, and evaluation

def load_preset_prompt(preset):
    """Load predefined system prompts"""
    presets = {
        "General Assistant": "You are a helpful, knowledgeable, and friendly AI assistant.",
        "Therapist": "You are a compassionate and professional therapist. Provide supportive, empathetic responses while maintaining appropriate boundaries. Ask thoughtful questions to help the user explore their feelings.",
        "Distressed Teen Jen": "You are a distressed 14 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.",
        "Distressed Teen John": "You are a distressed 15 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.",
        "Technical Expert": "You are a technical expert with deep knowledge in programming, engineering, and technology. Provide detailed, accurate technical explanations and solutions.",
        "Creative Writer": "You are a creative writing assistant. Help with storytelling, character development, plot ideas, and provide creative inspiration with vivid descriptions.",
        "Custom": ""
    }
    return presets.get(preset, "")

# Default evaluation metrics
default_evaluation = """Please evaluate the conversation according to:

1) **Coherence**: How logically consistent and well-structured are the responses? Do they flow naturally from one turn to the next?

2) **Relevance**: How well do the assistant's responses address the user's specific questions, needs, and context?

3) **Engagement**: How natural, conversational, and engaging is the interaction? Does it feel like a meaningful dialogue?

4) **Helpfulness**: How useful and actionable are the assistant's responses? Do they provide value to the user?

5) **Role Consistency**: How well does the assistant maintain its assigned role/persona throughout the conversation? Are there any character breaks?"""

# Create the Gradio interface
with gr.Blocks(title="OpenAI Chatbot with Evaluation") as demo:
    gr.Markdown("# OpenAI Chatbot with Conversation Evaluation")

    with gr.Row():
        # Left sidebar for system prompt and evaluation configuration
        with gr.Column(scale=1, min_width=350):
            gr.Markdown("## System Configuration")

            # Preset dropdown
            preset_dropdown = gr.Dropdown(
                choices=["General Assistant", "Therapist", "Distressed Teen Jen", "Distressed Teen John", "Technical Expert", "Creative Writer", "Custom"],
                value="General Assistant",
                label="Quick Presets",
                info="Select a preset or choose 'Custom' to write your own"
            )

            # System prompt textbox
            system_prompt = gr.Textbox(
                label="System Prompt",
                placeholder="Enter system instructions here...",
                value="You are a helpful, knowledgeable, and friendly AI assistant.",
                lines=4,
                info="This guides the AI's behavior and personality"
            )

            gr.Markdown("## Evaluation Configuration")

            # Evaluation metrics textbox
            evaluation_metrics = gr.Textbox(
                label="Evaluation Metrics",
                placeholder="Enter evaluation criteria here...",
                value=default_evaluation,
                lines=8,
                info="Customize how you want the conversation to be evaluated"
            )

            gr.Markdown("### Usage")
            gr.Markdown("• Configure system prompt and evaluation criteria")
            gr.Markdown("• Have a conversation with the AI")
            gr.Markdown("• Click 'Evaluate' to get detailed feedback")
            gr.Markdown("• Evaluation takes 10-30 seconds ⏱️")

        # Right side for chat interface
        with gr.Column(scale=2):
            gr.Markdown("**Chat with your configured AI assistant**")

            # Chatbot component to display conversation
            chatbot = gr.Chatbot(
                label="Conversation",
                value=[],
                height=500
            )

            # Input textbox
            msg_input = gr.Textbox(
                label="Your message",
                placeholder="Type your message here...",
                lines=2,
                scale=4
            )

            # Buttons row
            with gr.Row():
                send_btn = gr.Button("Send", variant="primary", scale=1)
                reset_btn = gr.Button("Reset Chat", variant="secondary", scale=1)
                evaluate_btn = gr.Button("🔍 Evaluate", variant="huggingface", scale=1)

    # Evaluation results section (collapsible)
    with gr.Accordion("📊 Evaluation Report", open=False) as evaluation_accordion:
        evaluation_output = gr.Markdown(
            value="No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback.",
            label="Evaluation Results"
        )

    # Event handlers

    # Load preset prompts
    preset_dropdown.change(
        fn=load_preset_prompt,
        inputs=[preset_dropdown],
        outputs=[system_prompt]
    )

    # Send message
    send_btn.click(
        fn=chat_with_gpt,
        inputs=[msg_input, chatbot, system_prompt],
        outputs=[chatbot, msg_input]
    )

    # Allow Enter key to send message
    msg_input.submit(
        fn=chat_with_gpt,
        inputs=[msg_input, chatbot, system_prompt],
        outputs=[chatbot, msg_input]
    )

    # Evaluate conversation with progress tracking
    evaluate_btn.click(
        fn=start_evaluation,
        inputs=[],
        outputs=[evaluation_output]
    ).then(
        fn=evaluate_conversation,
        inputs=[chatbot, system_prompt, evaluation_metrics],
        outputs=[evaluation_output]
    )

    # Reset button functionality
    reset_btn.click(
        fn=reset_conversation,
        inputs=[],
        outputs=[chatbot, msg_input, evaluation_output]
    )

# Launch the app
demo.launch(share=True)