store-user-feedback

Runtime error

File size: 13,136 Bytes

import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
from threading import Thread
import os
import json
import uuid
from datasets import Dataset, load_dataset
from huggingface_hub import HfApi, login
import time

# Install required packages if not present
from gradio_modal import Modal
import huggingface_hub
import datasets

# Model setup
checkpoint = "WillHeld/soft-raccoon"
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

# Constants for dataset
DATASET_REPO = "WillHeld/model-feedback"  # Replace with your username
DATASET_PATH = "./feedback_data"  # Local path to store feedback
DATASET_FILENAME = "feedback.jsonl"  # Filename for feedback data

# Ensure feedback directory exists
os.makedirs(DATASET_PATH, exist_ok=True)

# Sync existing dataset from Hub if available
def sync_dataset_from_hub():
    """Download existing dataset from Hub and merge with local data"""
    try:
        # Try to get token from environment variable
        hf_token = os.environ.get("HF_TOKEN")
        if hf_token:
            login(token=hf_token)
        
        # Check if the dataset exists on Hub
        api = HfApi()
        try:
            dataset_info = api.dataset_info(DATASET_REPO)
            # Dataset exists, download it
            print(f"Syncing existing dataset from {DATASET_REPO}")
            remote_dataset = load_dataset(DATASET_REPO)
            
            # Convert to list of dictionaries
            remote_data = [item for item in remote_dataset['train']]
            
            # Check if local file exists
            local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
            local_data = []
            
            if os.path.exists(local_file):
                # Read local data
                with open(local_file, 'r') as f:
                    for line in f:
                        try:
                            local_data.append(json.loads(line))
                        except json.JSONDecodeError:
                            continue
            
            # Merge data (using IDs to avoid duplicates)
            all_items = {}
            for item in remote_data + local_data:
                all_items[item['id']] = item
            
            # Write back merged data
            with open(local_file, 'w') as f:
                for item in all_items.values():
                    f.write(json.dumps(item) + '\n')
            
            print(f"Synced {len(all_items)} feedback items")
            return True
        
        except Exception as e:
            print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
            return False
            
    except Exception as e:
        print(f"Error syncing dataset: {e}")
        return False

# Call sync on startup
sync_dataset_from_hub()

# Feedback storage functions
def save_feedback_locally(conversation, satisfaction, feedback_text):
    """Save feedback to a local JSONL file"""
    # Create a unique ID for this feedback entry
    feedback_id = str(uuid.uuid4())
    
    # Create a timestamp
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Prepare the feedback data
    feedback_data = {
        "id": feedback_id,
        "timestamp": timestamp,
        "conversation": conversation,
        "satisfaction": satisfaction,
        "feedback": feedback_text
    }
    
    # Save to local file
    feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
    with open(feedback_file, "a") as f:
        f.write(json.dumps(feedback_data) + "\n")
    
    return feedback_id

def push_feedback_to_hub(hf_token=None):
    """Push the local feedback data to HuggingFace as a dataset"""
    # Check if we have a token
    if hf_token is None:
        # Try to get token from environment variable
        hf_token = os.environ.get("HF_TOKEN")
        if hf_token is None:
            print("No HuggingFace token provided. Cannot push to Hub.")
            return False
    
    try:
        # Login to HuggingFace
        login(token=hf_token)
        
        # Check if we have data to push
        feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
        if not os.path.exists(feedback_file):
            print("No feedback data to push.")
            return False
        
        # Load data from the JSONL file
        with open(feedback_file, "r") as f:
            feedback_data = [json.loads(line) for line in f]
        
        # Create a dataset from the feedback data
        dataset = Dataset.from_list(feedback_data)
        
        # Push to Hub
        dataset.push_to_hub(
            DATASET_REPO,
            private=True  # Set to False if you want the dataset to be public
        )
        
        print(f"Feedback data pushed to {DATASET_REPO} successfully.")
        return True
    
    except Exception as e:
        print(f"Error pushing feedback data to Hub: {e}")
        return False

# Function to handle the research feedback submission
def submit_research_feedback(conv_history, satisfaction, feedback_text):
    """Save user feedback both locally and to HuggingFace Hub"""
    # Print debug information
    print(f"Saving feedback with conversation history containing {len(conv_history)} messages")
    if conv_history and len(conv_history) > 0:
        print(f"First message: {conv_history[0]['role']}: {conv_history[0]['content'][:30]}...")
        print(f"Last message: {conv_history[-1]['role']}: {conv_history[-1]['content'][:30]}...")
    
    # Save locally first
    feedback_id = save_feedback_locally(conv_history, satisfaction, feedback_text)
    
    # Get token from environment variable
    env_token = os.environ.get("HF_TOKEN")
    
    # Use environment token
    push_success = push_feedback_to_hub(env_token)
    
    if push_success:
        status_msg = "Thank you for your valuable feedback! Your insights have been saved to the dataset."
    else:
        status_msg = "Thank you for your feedback! It has been saved locally, but couldn't be pushed to the dataset. Please check server logs."
    
    return status_msg

# Initial state - set up at app start
def initialize_state():
    """Initialize the conversation state - this could load previous sessions or start fresh"""
    return []  # Start with empty conversation history

# Create the Gradio blocks interface
with gr.Blocks() as demo:
    # Create state to store full conversation history with proper initialization
    conv_state = gr.State(initialize_state)
    
    with gr.Row():
        with gr.Column(scale=3):
            # Create a custom predict function that updates our state
            def enhanced_predict(message, history, temperature, top_p, state):
                # Initialize state if needed
                if state is None:
                    state = []
                    print("Initializing empty state")
                
                # Copy history to state if state is empty but history exists
                if len(state) == 0 and len(history) > 0:
                    state = history.copy()
                    print(f"Copied {len(history)} messages from history to state")
                
                # Add user message to state
                state.append({"role": "user", "content": message})
                
                # Process with the model (this doesn't modify the original history)
                input_text = tokenizer.apply_chat_template(state, tokenize=False, add_generation_prompt=True)
                inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
                
                # Create a streamer
                streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
                
                # Set up generation parameters
                generation_kwargs = {
                    "input_ids": inputs,
                    "max_new_tokens": 1024,
                    "temperature": float(temperature),
                    "top_p": float(top_p),
                    "do_sample": True,
                    "streamer": streamer,
                    "eos_token_id": 128009,
                }
                
                # Run generation in a separate thread
                thread = Thread(target=model.generate, kwargs=generation_kwargs)
                thread.start()
                
                # Yield from the streamer as tokens are generated
                response = ""
                for new_text in streamer:
                    response += new_text
                    # For each partial response, yield the text only
                    # We'll update the state after generation is complete
                    yield response
                
                # After generation completes, update our state with the final response
                state.append({"role": "assistant", "content": response})
                
                # Return the updated state
                return state
            
            # Create a wrapper that connects to ChatInterface but also updates our state
            def chat_with_state(message, history, temperature, top_p):
                # This function is what interfaces with the ChatInterface
                nonlocal conv_state
                
                # Access the current state
                current_state = conv_state.value if conv_state.value else []
                
                # Call the main function that generates responses and updates state
                # This is a generator function, so we need to iterate through its outputs
                response_gen = enhanced_predict(message, history, temperature, top_p, current_state)
                
                # For each response, yield it and also update our state at the end
                last_response = None
                for response in response_gen:
                    last_response = response
                    yield response
                
                # After generation is complete, update our state
                if last_response is not None:
                    # Create a full copy of the history plus the new exchange
                    updated_state = []
                    # Add all previous history
                    for msg in history:
                        updated_state.append(msg.copy())
                    # Add new exchange
                    updated_state.append({"role": "user", "content": message})
                    updated_state.append({"role": "assistant", "content": last_response})
                    
                    # Store in our state
                    conv_state.value = updated_state
                    
                    # Debug
                    print(f"Updated conversation state with {len(updated_state)} messages")
                    if updated_state:
                        last_msg = updated_state[-1]
                        print(f"Last message: {last_msg['role']}: {last_msg['content'][:30]}...")
            
            # Create ChatInterface
            chatbot = gr.ChatInterface(
                chat_with_state,
                additional_inputs=[
                    gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
                    gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
                ],
                type="messages"
            )
        
        with gr.Column(scale=1):
            report_button = gr.Button("Share Feedback", variant="primary")
    
    # Create the modal with feedback form components
    with Modal(visible=False) as feedback_modal:
        with gr.Column():
            gr.Markdown("## Research Preview Feedback")
            gr.Markdown("Thank you for testing our research model. Your feedback (positive or negative) helps us improve!")
            
            satisfaction = gr.Radio(
                ["Very satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very unsatisfied"],
                label="How would you rate your experience with this research model?",
                value="Neutral"
            )
            
            feedback_text = gr.Textbox(
                lines=5,
                label="Share your observations (strengths, weaknesses, suggestions):",
                placeholder="We welcome both positive feedback and constructive criticism to help improve this research prototype..."
            )
            
            submit_button = gr.Button("Submit Research Feedback", variant="primary")
            response_text = gr.Textbox(label="Status", interactive=False)
    
    # Connect the "Share Feedback" button to show the modal
    report_button.click(
        lambda: Modal(visible=True),
        None,
        feedback_modal
    )
    
    # Connect the submit button to the submit_research_feedback function
    submit_button.click(
        submit_research_feedback,
        inputs=[conv_state, satisfaction, feedback_text],
        outputs=response_text
    )

# Launch the demo
demo.launch()