test2 / app.py
eagle0504's picture
Update app.py
bac2ab2 verified
import os
import gradio as gr
from openai import OpenAI
import time
# Initialize OpenAI client
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
client = OpenAI(api_key = OPENAI_API_KEY) # Assumes OPENAI_API_KEY is set in environment
def chat_with_gpt(message, history, system_prompt):
# Convert Gradio history format to OpenAI messages format
messages = []
# Add system prompt if provided
if system_prompt.strip():
messages.append({"role": "system", "content": system_prompt.strip()})
# Add conversation history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current user message
messages.append({"role": "user", "content": message})
try:
# Call OpenAI API with full conversation context
response = client.chat.completions.create(
model="gpt-4o-mini", # or "gpt-4o" for better quality
messages=messages,
max_tokens=500,
temperature=0.7,
top_p=0.9
)
assistant_response = response.choices[0].message.content
# Update history with new exchange
history.append((message, assistant_response))
return history, "" # Return updated history and clear input
except Exception as e:
error_msg = f"Error: {str(e)}"
history.append((message, error_msg))
return history, ""
def evaluate_conversation(history, system_prompt, evaluation_metrics, progress=gr.Progress()):
if not history:
return "❌ No conversation to evaluate. Please have a conversation first."
# Initialize progress
progress(0, desc="Starting evaluation...")
# Prepare the conversation transcript
progress(0.2, desc="Preparing conversation transcript...")
conversation_text = ""
if system_prompt.strip():
conversation_text += f"System Prompt: {system_prompt}\n\n"
conversation_text += "Conversation:\n"
for i, (user_msg, assistant_msg) in enumerate(history, 1):
conversation_text += f"Turn {i}:\n"
conversation_text += f"User: {user_msg}\n"
conversation_text += f"Assistant: {assistant_msg}\n\n"
# Create evaluation prompt
progress(0.4, desc="Crafting evaluation prompt...")
evaluation_prompt = f"""Please evaluate the following conversation based on these specific criteria:
{evaluation_metrics}
CONVERSATION TO EVALUATE:
{conversation_text}
Please provide a detailed evaluation report that:
1. Scores each criterion on a scale of 1-10
2. Provides specific examples from the conversation to support your scores
3. Offers constructive feedback for improvement
4. Gives an overall assessment
Format your response clearly with headings for each evaluation criterion."""
try:
# Call OpenAI API for evaluation
progress(0.6, desc="Sending request to OpenAI...")
response = client.chat.completions.create(
model="gpt-4o", # Use better model for evaluation
messages=[
{"role": "system", "content": "You are an expert conversation analyst. Provide thorough, objective evaluations with specific examples and actionable feedback."},
{"role": "user", "content": evaluation_prompt}
],
max_tokens=1000,
temperature=0.3 # Lower temperature for more consistent evaluation
)
progress(0.9, desc="Processing evaluation results...")
# Format the response nicely
evaluation_result = f"""# πŸ“Š Conversation Evaluation Report
{response.choices[0].message.content}
---
*Evaluation completed at {time.strftime('%Y-%m-%d %H:%M:%S')}*
*Conversation length: {len(history)} exchanges*
"""
progress(1.0, desc="Evaluation complete!")
return evaluation_result
except Exception as e:
progress(1.0, desc="Evaluation failed")
return f"❌ **Error during evaluation:** {str(e)}"
def start_evaluation():
"""Return initial evaluation status"""
return "πŸ”„ **Evaluating conversation...** \n\nPlease wait while we analyze your conversation. This may take 10-30 seconds depending on conversation length."
def reset_conversation():
return [], "", "No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback." # Clear history, input, and evaluation
def load_preset_prompt(preset):
"""Load predefined system prompts"""
presets = {
"General Assistant": "You are a helpful, knowledgeable, and friendly AI assistant.",
"Therapist": "You are a compassionate and professional therapist. Provide supportive, empathetic responses while maintaining appropriate boundaries. Ask thoughtful questions to help the user explore their feelings.",
"Distressed Teen Jen": "You are a distressed 14 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.",
"Distressed Teen John": "You are a distressed 15 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.",
"Technical Expert": "You are a technical expert with deep knowledge in programming, engineering, and technology. Provide detailed, accurate technical explanations and solutions.",
"Creative Writer": "You are a creative writing assistant. Help with storytelling, character development, plot ideas, and provide creative inspiration with vivid descriptions.",
"Custom": ""
}
return presets.get(preset, "")
# Default evaluation metrics
default_evaluation = """Please evaluate the conversation according to:
1) **Coherence**: How logically consistent and well-structured are the responses? Do they flow naturally from one turn to the next?
2) **Relevance**: How well do the assistant's responses address the user's specific questions, needs, and context?
3) **Engagement**: How natural, conversational, and engaging is the interaction? Does it feel like a meaningful dialogue?
4) **Helpfulness**: How useful and actionable are the assistant's responses? Do they provide value to the user?
5) **Role Consistency**: How well does the assistant maintain its assigned role/persona throughout the conversation? Are there any character breaks?"""
# Create the Gradio interface
with gr.Blocks(title="OpenAI Chatbot with Evaluation") as demo:
gr.Markdown("# OpenAI Chatbot with Conversation Evaluation")
with gr.Row():
# Left sidebar for system prompt and evaluation configuration
with gr.Column(scale=1, min_width=350):
gr.Markdown("## System Configuration")
# Preset dropdown
preset_dropdown = gr.Dropdown(
choices=["General Assistant", "Therapist", "Distressed Teen Jen", "Distressed Teen John", "Technical Expert", "Creative Writer", "Custom"],
value="General Assistant",
label="Quick Presets",
info="Select a preset or choose 'Custom' to write your own"
)
# System prompt textbox
system_prompt = gr.Textbox(
label="System Prompt",
placeholder="Enter system instructions here...",
value="You are a helpful, knowledgeable, and friendly AI assistant.",
lines=4,
info="This guides the AI's behavior and personality"
)
gr.Markdown("## Evaluation Configuration")
# Evaluation metrics textbox
evaluation_metrics = gr.Textbox(
label="Evaluation Metrics",
placeholder="Enter evaluation criteria here...",
value=default_evaluation,
lines=8,
info="Customize how you want the conversation to be evaluated"
)
gr.Markdown("### Usage")
gr.Markdown("β€’ Configure system prompt and evaluation criteria")
gr.Markdown("β€’ Have a conversation with the AI")
gr.Markdown("β€’ Click 'Evaluate' to get detailed feedback")
gr.Markdown("β€’ Evaluation takes 10-30 seconds ⏱️")
# Right side for chat interface
with gr.Column(scale=2):
gr.Markdown("**Chat with your configured AI assistant**")
# Chatbot component to display conversation
chatbot = gr.Chatbot(
label="Conversation",
value=[],
height=500
)
# Input textbox
msg_input = gr.Textbox(
label="Your message",
placeholder="Type your message here...",
lines=2,
scale=4
)
# Buttons row
with gr.Row():
send_btn = gr.Button("Send", variant="primary", scale=1)
reset_btn = gr.Button("Reset Chat", variant="secondary", scale=1)
evaluate_btn = gr.Button("πŸ” Evaluate", variant="huggingface", scale=1)
# Evaluation results section (collapsible)
with gr.Accordion("πŸ“Š Evaluation Report", open=False) as evaluation_accordion:
evaluation_output = gr.Markdown(
value="No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback.",
label="Evaluation Results"
)
# Event handlers
# Load preset prompts
preset_dropdown.change(
fn=load_preset_prompt,
inputs=[preset_dropdown],
outputs=[system_prompt]
)
# Send message
send_btn.click(
fn=chat_with_gpt,
inputs=[msg_input, chatbot, system_prompt],
outputs=[chatbot, msg_input]
)
# Allow Enter key to send message
msg_input.submit(
fn=chat_with_gpt,
inputs=[msg_input, chatbot, system_prompt],
outputs=[chatbot, msg_input]
)
# Evaluate conversation with progress tracking
evaluate_btn.click(
fn=start_evaluation,
inputs=[],
outputs=[evaluation_output]
).then(
fn=evaluate_conversation,
inputs=[chatbot, system_prompt, evaluation_metrics],
outputs=[evaluation_output]
)
# Reset button functionality
reset_btn.click(
fn=reset_conversation,
inputs=[],
outputs=[chatbot, msg_input, evaluation_output]
)
# Launch the app
demo.launch(share=True)