Spaces:

eagle0504
/

test2

Sleeping

App Files Files Community

test2 / app.py

eagle0504

Update app.py

bac2ab2 verified 1 day ago

raw

history blame contribute delete

10.6 kB

	import os

	import gradio as gr
	from openai import OpenAI
	import time

	# Initialize OpenAI client
	OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
	client = OpenAI(api_key = OPENAI_API_KEY) # Assumes OPENAI_API_KEY is set in environment

	def chat_with_gpt(message, history, system_prompt):
	# Convert Gradio history format to OpenAI messages format
	messages = []

	# Add system prompt if provided
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt.strip()})

	# Add conversation history
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current user message
	messages.append({"role": "user", "content": message})

	try:
	# Call OpenAI API with full conversation context
	response = client.chat.completions.create(
	model="gpt-4o-mini", # or "gpt-4o" for better quality
	messages=messages,
	max_tokens=500,
	temperature=0.7,
	top_p=0.9
	)

	assistant_response = response.choices[0].message.content

	# Update history with new exchange
	history.append((message, assistant_response))

	return history, "" # Return updated history and clear input

	except Exception as e:
	error_msg = f"Error: {str(e)}"
	history.append((message, error_msg))
	return history, ""

	def evaluate_conversation(history, system_prompt, evaluation_metrics, progress=gr.Progress()):
	if not history:
	return "❌ No conversation to evaluate. Please have a conversation first."

	# Initialize progress
	progress(0, desc="Starting evaluation...")

	# Prepare the conversation transcript
	progress(0.2, desc="Preparing conversation transcript...")
	conversation_text = ""
	if system_prompt.strip():
	conversation_text += f"System Prompt: {system_prompt}\n\n"

	conversation_text += "Conversation:\n"
	for i, (user_msg, assistant_msg) in enumerate(history, 1):
	conversation_text += f"Turn {i}:\n"
	conversation_text += f"User: {user_msg}\n"
	conversation_text += f"Assistant: {assistant_msg}\n\n"

	# Create evaluation prompt
	progress(0.4, desc="Crafting evaluation prompt...")
	evaluation_prompt = f"""Please evaluate the following conversation based on these specific criteria:

	{evaluation_metrics}

	CONVERSATION TO EVALUATE:
	{conversation_text}

	Please provide a detailed evaluation report that:
	1. Scores each criterion on a scale of 1-10
	2. Provides specific examples from the conversation to support your scores
	3. Offers constructive feedback for improvement
	4. Gives an overall assessment

	Format your response clearly with headings for each evaluation criterion."""

	try:
	# Call OpenAI API for evaluation
	progress(0.6, desc="Sending request to OpenAI...")
	response = client.chat.completions.create(
	model="gpt-4o", # Use better model for evaluation
	messages=[
	{"role": "system", "content": "You are an expert conversation analyst. Provide thorough, objective evaluations with specific examples and actionable feedback."},
	{"role": "user", "content": evaluation_prompt}
	],
	max_tokens=1000,
	temperature=0.3 # Lower temperature for more consistent evaluation
	)

	progress(0.9, desc="Processing evaluation results...")

	# Format the response nicely
	evaluation_result = f"""# 📊 Conversation Evaluation Report

	{response.choices[0].message.content}

	---
	Evaluation completed at {time.strftime('%Y-%m-%d %H:%M:%S')}
	Conversation length: {len(history)} exchanges
	"""

	progress(1.0, desc="Evaluation complete!")
	return evaluation_result

	except Exception as e:
	progress(1.0, desc="Evaluation failed")
	return f"❌ Error during evaluation: {str(e)}"

	def start_evaluation():
	"""Return initial evaluation status"""
	return "🔄 Evaluating conversation... \n\nPlease wait while we analyze your conversation. This may take 10-30 seconds depending on conversation length."

	def reset_conversation():
	return [], "", "No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback." # Clear history, input, and evaluation

	def load_preset_prompt(preset):
	"""Load predefined system prompts"""
	presets = {
	"General Assistant": "You are a helpful, knowledgeable, and friendly AI assistant.",
	"Therapist": "You are a compassionate and professional therapist. Provide supportive, empathetic responses while maintaining appropriate boundaries. Ask thoughtful questions to help the user explore their feelings.",
	"Distressed Teen Jen": "You are a distressed 14 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.",
	"Distressed Teen John": "You are a distressed 15 year old teenager dealing with typical teenage problems like school stress, peer pressure, and family issues. Respond with the emotional intensity and perspective of a troubled teen seeking help.",
	"Technical Expert": "You are a technical expert with deep knowledge in programming, engineering, and technology. Provide detailed, accurate technical explanations and solutions.",
	"Creative Writer": "You are a creative writing assistant. Help with storytelling, character development, plot ideas, and provide creative inspiration with vivid descriptions.",
	"Custom": ""
	}
	return presets.get(preset, "")

	# Default evaluation metrics
	default_evaluation = """Please evaluate the conversation according to:

	1) Coherence: How logically consistent and well-structured are the responses? Do they flow naturally from one turn to the next?

	2) Relevance: How well do the assistant's responses address the user's specific questions, needs, and context?

	3) Engagement: How natural, conversational, and engaging is the interaction? Does it feel like a meaningful dialogue?

	4) Helpfulness: How useful and actionable are the assistant's responses? Do they provide value to the user?

	5) Role Consistency: How well does the assistant maintain its assigned role/persona throughout the conversation? Are there any character breaks?"""

	# Create the Gradio interface
	with gr.Blocks(title="OpenAI Chatbot with Evaluation") as demo:
	gr.Markdown("# OpenAI Chatbot with Conversation Evaluation")

	with gr.Row():
	# Left sidebar for system prompt and evaluation configuration
	with gr.Column(scale=1, min_width=350):
	gr.Markdown("## System Configuration")

	# Preset dropdown
	preset_dropdown = gr.Dropdown(
	choices=["General Assistant", "Therapist", "Distressed Teen Jen", "Distressed Teen John", "Technical Expert", "Creative Writer", "Custom"],
	value="General Assistant",
	label="Quick Presets",
	info="Select a preset or choose 'Custom' to write your own"
	)

	# System prompt textbox
	system_prompt = gr.Textbox(
	label="System Prompt",
	placeholder="Enter system instructions here...",
	value="You are a helpful, knowledgeable, and friendly AI assistant.",
	lines=4,
	info="This guides the AI's behavior and personality"
	)

	gr.Markdown("## Evaluation Configuration")

	# Evaluation metrics textbox
	evaluation_metrics = gr.Textbox(
	label="Evaluation Metrics",
	placeholder="Enter evaluation criteria here...",
	value=default_evaluation,
	lines=8,
	info="Customize how you want the conversation to be evaluated"
	)

	gr.Markdown("### Usage")
	gr.Markdown("• Configure system prompt and evaluation criteria")
	gr.Markdown("• Have a conversation with the AI")
	gr.Markdown("• Click 'Evaluate' to get detailed feedback")
	gr.Markdown("• Evaluation takes 10-30 seconds ⏱️")

	# Right side for chat interface
	with gr.Column(scale=2):
	gr.Markdown("Chat with your configured AI assistant")

	# Chatbot component to display conversation
	chatbot = gr.Chatbot(
	label="Conversation",
	value=[],
	height=500
	)

	# Input textbox
	msg_input = gr.Textbox(
	label="Your message",
	placeholder="Type your message here...",
	lines=2,
	scale=4
	)

	# Buttons row
	with gr.Row():
	send_btn = gr.Button("Send", variant="primary", scale=1)
	reset_btn = gr.Button("Reset Chat", variant="secondary", scale=1)
	evaluate_btn = gr.Button("🔍 Evaluate", variant="huggingface", scale=1)

	# Evaluation results section (collapsible)
	with gr.Accordion("📊 Evaluation Report", open=False) as evaluation_accordion:
	evaluation_output = gr.Markdown(
	value="No evaluation yet. Have a conversation and click 'Evaluate' to see detailed feedback.",
	label="Evaluation Results"
	)

	# Event handlers

	# Load preset prompts
	preset_dropdown.change(
	fn=load_preset_prompt,
	inputs=[preset_dropdown],
	outputs=[system_prompt]
	)

	# Send message
	send_btn.click(
	fn=chat_with_gpt,
	inputs=[msg_input, chatbot, system_prompt],
	outputs=[chatbot, msg_input]
	)

	# Allow Enter key to send message
	msg_input.submit(
	fn=chat_with_gpt,
	inputs=[msg_input, chatbot, system_prompt],
	outputs=[chatbot, msg_input]
	)

	# Evaluate conversation with progress tracking
	evaluate_btn.click(
	fn=start_evaluation,
	inputs=[],
	outputs=[evaluation_output]
	).then(
	fn=evaluate_conversation,
	inputs=[chatbot, system_prompt, evaluation_metrics],
	outputs=[evaluation_output]
	)

	# Reset button functionality
	reset_btn.click(
	fn=reset_conversation,
	inputs=[],
	outputs=[chatbot, msg_input, evaluation_output]
	)

	# Launch the app
	demo.launch(share=True)