import os import gradio as gr from openai import OpenAI import time # Initialize OpenAI client OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] client = OpenAI(api_key = OPENAI_API_KEY) # Assumes OPENAI_API_KEY is set in environment def chat_with_gpt(message, history, system_prompt): # Convert Gradio history format to OpenAI messages format messages = [] # Add system prompt if provided if system_prompt.strip(): messages.append({"role": "system", "content": system_prompt.strip()}) # Add conversation history for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # Add current user message messages.append({"role": "user", "content": message}) try: # Call OpenAI API with full conversation context response = client.chat.completions.create( model="gpt-4o-mini", # or "gpt-4o" for better quality messages=messages, max_tokens=500, temperature=0.7, top_p=0.9 ) assistant_response = response.choices[0].message.content # Update history with new exchange history.append((message, assistant_response)) return history, "" # Return updated history and clear input except Exception as e: error_msg = f"Error: {str(e)}" history.append((message, error_msg)) return history, "" def evaluate_conversation(history, system_prompt, evaluation_metrics, progress=gr.Progress()): if not history: return "❌ No conversation to evaluate. Please have a conversation first." # Initialize progress progress(0, desc="Starting evaluation...") # Prepare the conversation transcript progress(0.2, desc="Preparing conversation transcript...") conversation_text = "" if system_prompt.strip(): conversation_text += f"System Prompt: {system_prompt}\n\n" conversation_text += "Conversation:\n" for i, (user_msg, assistant_msg) in enumerate(history, 1): conversation_text += f"Turn {i}:\n" conversation_text += f"User: {user_msg}\n" conversation_text += f"Assistant: {assistant_msg}\n\n" # Create evaluation prompt progress(0.4, desc="Crafting evaluation prompt...") evaluation_prompt = f"""Please evaluate the following conversation based on these specific criteria: {evaluation_metrics} CONVERSATION TO EVALUATE: {conversation_text} Please provide a detailed evaluation report that: 1. Scores each criterion on a scale of 1-10 2. Provides specific examples from the conversation to support your scores 3. Offers constructive feedback for improvement 4. Gives an overall assessment Format your response clearly with headings for each evaluation criterion.""" try: # Call OpenAI API for evaluation progress(0.6, desc="Sending request to OpenAI...") response = client.chat.completions.create( model="gpt-4o", # Use better model for evaluation messages=[ {"role": "system", "content": "You are an expert conversation analyst. Provide thorough, objective evaluations with specific examples and actionable feedback."}, {"role": "user", "content": evaluation_prompt} ], max_tokens=1000, temperature=0.3 # Lower temperature for more consistent evaluation ) progress(0.9, desc="Processing evaluation results...") # Format the response nicely evaluation_result = f"""# 📊 Conversation Evaluation Report {response.choices[0].message.content} --- *Evaluation completed at {time.strftime('%Y-%m-%d %H:%M:%S')}* *Conversation length: {len(history)} exchanges* """ progress(1.0, desc="Evaluation complete!") return evaluation_result except Exception as e: progress(1.0, desc="Evaluation failed") return f"❌ **Error during evaluation:** {str(e)}" def start_evaluation(): """Return initial evaluation status""" return "🔄 **Evaluating conversation...** \n\nPlease wait while we analyze your conversation. This may take 10-30 seconds depending on conversation length." def reset_conversation(): return [], "", "No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback." # Clear history, input, and evaluation def load_preset_prompt(preset): """Load predefined system prompts""" presets = { "General Assistant": "You are a helpful, knowledgeable, and friendly AI assistant.", "Therapist": "You are a compassionate and professional therapist. Provide supportive, empathetic responses while maintaining appropriate boundaries. Ask thoughtful questions to help the user explore their feelings.", "Distressed Teen Jen": "You are a distressed 14 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.", "Distressed Teen John": "You are a distressed 15 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.", "Technical Expert": "You are a technical expert with deep knowledge in programming, engineering, and technology. Provide detailed, accurate technical explanations and solutions.", "Creative Writer": "You are a creative writing assistant. Help with storytelling, character development, plot ideas, and provide creative inspiration with vivid descriptions.", "Custom": "" } return presets.get(preset, "") # Default evaluation metrics default_evaluation = """Please evaluate the conversation according to: 1) **Coherence**: How logically consistent and well-structured are the responses? Do they flow naturally from one turn to the next? 2) **Relevance**: How well do the assistant's responses address the user's specific questions, needs, and context? 3) **Engagement**: How natural, conversational, and engaging is the interaction? Does it feel like a meaningful dialogue? 4) **Helpfulness**: How useful and actionable are the assistant's responses? Do they provide value to the user? 5) **Role Consistency**: How well does the assistant maintain its assigned role/persona throughout the conversation? Are there any character breaks?""" # Create the Gradio interface with gr.Blocks(title="OpenAI Chatbot with Evaluation") as demo: gr.Markdown("# OpenAI Chatbot with Conversation Evaluation") with gr.Row(): # Left sidebar for system prompt and evaluation configuration with gr.Column(scale=1, min_width=350): gr.Markdown("## System Configuration") # Preset dropdown preset_dropdown = gr.Dropdown( choices=["General Assistant", "Therapist", "Distressed Teen Jen", "Distressed Teen John", "Technical Expert", "Creative Writer", "Custom"], value="General Assistant", label="Quick Presets", info="Select a preset or choose 'Custom' to write your own" ) # System prompt textbox system_prompt = gr.Textbox( label="System Prompt", placeholder="Enter system instructions here...", value="You are a helpful, knowledgeable, and friendly AI assistant.", lines=4, info="This guides the AI's behavior and personality" ) gr.Markdown("## Evaluation Configuration") # Evaluation metrics textbox evaluation_metrics = gr.Textbox( label="Evaluation Metrics", placeholder="Enter evaluation criteria here...", value=default_evaluation, lines=8, info="Customize how you want the conversation to be evaluated" ) gr.Markdown("### Usage") gr.Markdown("• Configure system prompt and evaluation criteria") gr.Markdown("• Have a conversation with the AI") gr.Markdown("• Click 'Evaluate' to get detailed feedback") gr.Markdown("• Evaluation takes 10-30 seconds ⏱️") # Right side for chat interface with gr.Column(scale=2): gr.Markdown("**Chat with your configured AI assistant**") # Chatbot component to display conversation chatbot = gr.Chatbot( label="Conversation", value=[], height=500 ) # Input textbox msg_input = gr.Textbox( label="Your message", placeholder="Type your message here...", lines=2, scale=4 ) # Buttons row with gr.Row(): send_btn = gr.Button("Send", variant="primary", scale=1) reset_btn = gr.Button("Reset Chat", variant="secondary", scale=1) evaluate_btn = gr.Button("🔍 Evaluate", variant="huggingface", scale=1) # Evaluation results section (collapsible) with gr.Accordion("📊 Evaluation Report", open=False) as evaluation_accordion: evaluation_output = gr.Markdown( value="No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback.", label="Evaluation Results" ) # Event handlers # Load preset prompts preset_dropdown.change( fn=load_preset_prompt, inputs=[preset_dropdown], outputs=[system_prompt] ) # Send message send_btn.click( fn=chat_with_gpt, inputs=[msg_input, chatbot, system_prompt], outputs=[chatbot, msg_input] ) # Allow Enter key to send message msg_input.submit( fn=chat_with_gpt, inputs=[msg_input, chatbot, system_prompt], outputs=[chatbot, msg_input] ) # Evaluate conversation with progress tracking evaluate_btn.click( fn=start_evaluation, inputs=[], outputs=[evaluation_output] ).then( fn=evaluate_conversation, inputs=[chatbot, system_prompt, evaluation_metrics], outputs=[evaluation_output] ) # Reset button functionality reset_btn.click( fn=reset_conversation, inputs=[], outputs=[chatbot, msg_input, evaluation_output] ) # Launch the app demo.launch(share=True)