store-user-feedback

Runtime error

App Files Files Community

store-user-feedback / app.py

WillHeld

Update app.py

d951e6a verified 4 months ago

raw

history blame

13.1 kB

	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import gradio as gr
	from threading import Thread
	import os
	import json
	import uuid
	from datasets import Dataset, load_dataset
	from huggingface_hub import HfApi, login
	import time

	# Install required packages if not present
	from gradio_modal import Modal
	import huggingface_hub
	import datasets

	# Model setup
	checkpoint = "WillHeld/soft-raccoon"
	device = "cuda"
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

	# Constants for dataset
	DATASET_REPO = "WillHeld/model-feedback" # Replace with your username
	DATASET_PATH = "./feedback_data" # Local path to store feedback
	DATASET_FILENAME = "feedback.jsonl" # Filename for feedback data

	# Ensure feedback directory exists
	os.makedirs(DATASET_PATH, exist_ok=True)

	# Sync existing dataset from Hub if available
	def sync_dataset_from_hub():
	"""Download existing dataset from Hub and merge with local data"""
	try:
	# Try to get token from environment variable
	hf_token = os.environ.get("HF_TOKEN")
	if hf_token:
	login(token=hf_token)

	# Check if the dataset exists on Hub
	api = HfApi()
	try:
	dataset_info = api.dataset_info(DATASET_REPO)
	# Dataset exists, download it
	print(f"Syncing existing dataset from {DATASET_REPO}")
	remote_dataset = load_dataset(DATASET_REPO)

	# Convert to list of dictionaries
	remote_data = [item for item in remote_dataset['train']]

	# Check if local file exists
	local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
	local_data = []

	if os.path.exists(local_file):
	# Read local data
	with open(local_file, 'r') as f:
	for line in f:
	try:
	local_data.append(json.loads(line))
	except json.JSONDecodeError:
	continue

	# Merge data (using IDs to avoid duplicates)
	all_items = {}
	for item in remote_data + local_data:
	all_items[item['id']] = item

	# Write back merged data
	with open(local_file, 'w') as f:
	for item in all_items.values():
	f.write(json.dumps(item) + '\n')

	print(f"Synced {len(all_items)} feedback items")
	return True

	except Exception as e:
	print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
	return False

	except Exception as e:
	print(f"Error syncing dataset: {e}")
	return False

	# Call sync on startup
	sync_dataset_from_hub()

	# Feedback storage functions
	def save_feedback_locally(conversation, satisfaction, feedback_text):
	"""Save feedback to a local JSONL file"""
	# Create a unique ID for this feedback entry
	feedback_id = str(uuid.uuid4())

	# Create a timestamp
	timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

	# Prepare the feedback data
	feedback_data = {
	"id": feedback_id,
	"timestamp": timestamp,
	"conversation": conversation,
	"satisfaction": satisfaction,
	"feedback": feedback_text
	}

	# Save to local file
	feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
	with open(feedback_file, "a") as f:
	f.write(json.dumps(feedback_data) + "\n")

	return feedback_id

	def push_feedback_to_hub(hf_token=None):
	"""Push the local feedback data to HuggingFace as a dataset"""
	# Check if we have a token
	if hf_token is None:
	# Try to get token from environment variable
	hf_token = os.environ.get("HF_TOKEN")
	if hf_token is None:
	print("No HuggingFace token provided. Cannot push to Hub.")
	return False

	try:
	# Login to HuggingFace
	login(token=hf_token)

	# Check if we have data to push
	feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
	if not os.path.exists(feedback_file):
	print("No feedback data to push.")
	return False

	# Load data from the JSONL file
	with open(feedback_file, "r") as f:
	feedback_data = [json.loads(line) for line in f]

	# Create a dataset from the feedback data
	dataset = Dataset.from_list(feedback_data)

	# Push to Hub
	dataset.push_to_hub(
	DATASET_REPO,
	private=True # Set to False if you want the dataset to be public
	)

	print(f"Feedback data pushed to {DATASET_REPO} successfully.")
	return True

	except Exception as e:
	print(f"Error pushing feedback data to Hub: {e}")
	return False

	# Function to handle the research feedback submission
	def submit_research_feedback(conv_history, satisfaction, feedback_text):
	"""Save user feedback both locally and to HuggingFace Hub"""
	# Print debug information
	print(f"Saving feedback with conversation history containing {len(conv_history)} messages")
	if conv_history and len(conv_history) > 0:
	print(f"First message: {conv_history[0]['role']}: {conv_history[0]['content'][:30]}...")
	print(f"Last message: {conv_history[-1]['role']}: {conv_history[-1]['content'][:30]}...")

	# Save locally first
	feedback_id = save_feedback_locally(conv_history, satisfaction, feedback_text)

	# Get token from environment variable
	env_token = os.environ.get("HF_TOKEN")

	# Use environment token
	push_success = push_feedback_to_hub(env_token)

	if push_success:
	status_msg = "Thank you for your valuable feedback! Your insights have been saved to the dataset."
	else:
	status_msg = "Thank you for your feedback! It has been saved locally, but couldn't be pushed to the dataset. Please check server logs."

	return status_msg

	# Initial state - set up at app start
	def initialize_state():
	"""Initialize the conversation state - this could load previous sessions or start fresh"""
	return [] # Start with empty conversation history

	# Create the Gradio blocks interface
	with gr.Blocks() as demo:
	# Create state to store full conversation history with proper initialization
	conv_state = gr.State(initialize_state)

	with gr.Row():
	with gr.Column(scale=3):
	# Create a custom predict function that updates our state
	def enhanced_predict(message, history, temperature, top_p, state):
	# Initialize state if needed
	if state is None:
	state = []
	print("Initializing empty state")

	# Copy history to state if state is empty but history exists
	if len(state) == 0 and len(history) > 0:
	state = history.copy()
	print(f"Copied {len(history)} messages from history to state")

	# Add user message to state
	state.append({"role": "user", "content": message})

	# Process with the model (this doesn't modify the original history)
	input_text = tokenizer.apply_chat_template(state, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)

	# Create a streamer
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	# Set up generation parameters
	generation_kwargs = {
	"input_ids": inputs,
	"max_new_tokens": 1024,
	"temperature": float(temperature),
	"top_p": float(top_p),
	"do_sample": True,
	"streamer": streamer,
	"eos_token_id": 128009,
	}

	# Run generation in a separate thread
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Yield from the streamer as tokens are generated
	response = ""
	for new_text in streamer:
	response += new_text
	# For each partial response, yield the text only
	# We'll update the state after generation is complete
	yield response

	# After generation completes, update our state with the final response
	state.append({"role": "assistant", "content": response})

	# Return the updated state
	return state

	# Create a wrapper that connects to ChatInterface but also updates our state
	def chat_with_state(message, history, temperature, top_p):
	# This function is what interfaces with the ChatInterface
	nonlocal conv_state

	# Access the current state
	current_state = conv_state.value if conv_state.value else []

	# Call the main function that generates responses and updates state
	# This is a generator function, so we need to iterate through its outputs
	response_gen = enhanced_predict(message, history, temperature, top_p, current_state)

	# For each response, yield it and also update our state at the end
	last_response = None
	for response in response_gen:
	last_response = response
	yield response

	# After generation is complete, update our state
	if last_response is not None:
	# Create a full copy of the history plus the new exchange
	updated_state = []
	# Add all previous history
	for msg in history:
	updated_state.append(msg.copy())
	# Add new exchange
	updated_state.append({"role": "user", "content": message})
	updated_state.append({"role": "assistant", "content": last_response})

	# Store in our state
	conv_state.value = updated_state

	# Debug
	print(f"Updated conversation state with {len(updated_state)} messages")
	if updated_state:
	last_msg = updated_state[-1]
	print(f"Last message: {last_msg['role']}: {last_msg['content'][:30]}...")

	# Create ChatInterface
	chatbot = gr.ChatInterface(
	chat_with_state,
	additional_inputs=[
	gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
	],
	type="messages"
	)

	with gr.Column(scale=1):
	report_button = gr.Button("Share Feedback", variant="primary")

	# Create the modal with feedback form components
	with Modal(visible=False) as feedback_modal:
	with gr.Column():
	gr.Markdown("## Research Preview Feedback")
	gr.Markdown("Thank you for testing our research model. Your feedback (positive or negative) helps us improve!")

	satisfaction = gr.Radio(
	["Very satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very unsatisfied"],
	label="How would you rate your experience with this research model?",
	value="Neutral"
	)

	feedback_text = gr.Textbox(
	lines=5,
	label="Share your observations (strengths, weaknesses, suggestions):",
	placeholder="We welcome both positive feedback and constructive criticism to help improve this research prototype..."
	)

	submit_button = gr.Button("Submit Research Feedback", variant="primary")
	response_text = gr.Textbox(label="Status", interactive=False)

	# Connect the "Share Feedback" button to show the modal
	report_button.click(
	lambda: Modal(visible=True),
	None,
	feedback_modal
	)

	# Connect the submit button to the submit_research_feedback function
	submit_button.click(
	submit_research_feedback,
	inputs=[conv_state, satisfaction, feedback_text],
	outputs=response_text
	)

	# Launch the demo
	demo.launch()