File size: 13,136 Bytes
bc12570
 
 
 
b9a25dd
ef8a823
 
f66cdbc
ef8a823
 
 
bc12570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f66cdbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc12570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d951e6a
bc12570
d951e6a
 
 
 
 
 
bc12570
d951e6a
bc12570
 
 
 
 
 
 
 
 
 
 
 
 
 
d951e6a
 
 
 
 
51b5709
b9a25dd
d951e6a
 
51b5709
b0dd995
 
d951e6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51b5709
 
b9a25dd
51b5709
b9a25dd
 
 
dff15d7
b9a25dd
b0dd995
 
 
ef8a823
b0dd995
b9a25dd
 
 
ef8a823
 
 
b9a25dd
 
ef8a823
b9a25dd
 
 
 
 
ef8a823
 
b9a25dd
 
ef8a823
b9a25dd
 
78da149
b9a25dd
 
 
 
dff15d7
 
d951e6a
b9a25dd
51b5709
d951e6a
b9a25dd
bc12570
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
from threading import Thread
import os
import json
import uuid
from datasets import Dataset, load_dataset
from huggingface_hub import HfApi, login
import time

# Install required packages if not present
from gradio_modal import Modal
import huggingface_hub
import datasets

# Model setup
checkpoint = "WillHeld/soft-raccoon"
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

# Constants for dataset
DATASET_REPO = "WillHeld/model-feedback"  # Replace with your username
DATASET_PATH = "./feedback_data"  # Local path to store feedback
DATASET_FILENAME = "feedback.jsonl"  # Filename for feedback data

# Ensure feedback directory exists
os.makedirs(DATASET_PATH, exist_ok=True)

# Sync existing dataset from Hub if available
def sync_dataset_from_hub():
    """Download existing dataset from Hub and merge with local data"""
    try:
        # Try to get token from environment variable
        hf_token = os.environ.get("HF_TOKEN")
        if hf_token:
            login(token=hf_token)
        
        # Check if the dataset exists on Hub
        api = HfApi()
        try:
            dataset_info = api.dataset_info(DATASET_REPO)
            # Dataset exists, download it
            print(f"Syncing existing dataset from {DATASET_REPO}")
            remote_dataset = load_dataset(DATASET_REPO)
            
            # Convert to list of dictionaries
            remote_data = [item for item in remote_dataset['train']]
            
            # Check if local file exists
            local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
            local_data = []
            
            if os.path.exists(local_file):
                # Read local data
                with open(local_file, 'r') as f:
                    for line in f:
                        try:
                            local_data.append(json.loads(line))
                        except json.JSONDecodeError:
                            continue
            
            # Merge data (using IDs to avoid duplicates)
            all_items = {}
            for item in remote_data + local_data:
                all_items[item['id']] = item
            
            # Write back merged data
            with open(local_file, 'w') as f:
                for item in all_items.values():
                    f.write(json.dumps(item) + '\n')
            
            print(f"Synced {len(all_items)} feedback items")
            return True
        
        except Exception as e:
            print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
            return False
            
    except Exception as e:
        print(f"Error syncing dataset: {e}")
        return False

# Call sync on startup
sync_dataset_from_hub()

# Feedback storage functions
def save_feedback_locally(conversation, satisfaction, feedback_text):
    """Save feedback to a local JSONL file"""
    # Create a unique ID for this feedback entry
    feedback_id = str(uuid.uuid4())
    
    # Create a timestamp
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Prepare the feedback data
    feedback_data = {
        "id": feedback_id,
        "timestamp": timestamp,
        "conversation": conversation,
        "satisfaction": satisfaction,
        "feedback": feedback_text
    }
    
    # Save to local file
    feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
    with open(feedback_file, "a") as f:
        f.write(json.dumps(feedback_data) + "\n")
    
    return feedback_id

def push_feedback_to_hub(hf_token=None):
    """Push the local feedback data to HuggingFace as a dataset"""
    # Check if we have a token
    if hf_token is None:
        # Try to get token from environment variable
        hf_token = os.environ.get("HF_TOKEN")
        if hf_token is None:
            print("No HuggingFace token provided. Cannot push to Hub.")
            return False
    
    try:
        # Login to HuggingFace
        login(token=hf_token)
        
        # Check if we have data to push
        feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
        if not os.path.exists(feedback_file):
            print("No feedback data to push.")
            return False
        
        # Load data from the JSONL file
        with open(feedback_file, "r") as f:
            feedback_data = [json.loads(line) for line in f]
        
        # Create a dataset from the feedback data
        dataset = Dataset.from_list(feedback_data)
        
        # Push to Hub
        dataset.push_to_hub(
            DATASET_REPO,
            private=True  # Set to False if you want the dataset to be public
        )
        
        print(f"Feedback data pushed to {DATASET_REPO} successfully.")
        return True
    
    except Exception as e:
        print(f"Error pushing feedback data to Hub: {e}")
        return False

# Function to handle the research feedback submission
def submit_research_feedback(conv_history, satisfaction, feedback_text):
    """Save user feedback both locally and to HuggingFace Hub"""
    # Print debug information
    print(f"Saving feedback with conversation history containing {len(conv_history)} messages")
    if conv_history and len(conv_history) > 0:
        print(f"First message: {conv_history[0]['role']}: {conv_history[0]['content'][:30]}...")
        print(f"Last message: {conv_history[-1]['role']}: {conv_history[-1]['content'][:30]}...")
    
    # Save locally first
    feedback_id = save_feedback_locally(conv_history, satisfaction, feedback_text)
    
    # Get token from environment variable
    env_token = os.environ.get("HF_TOKEN")
    
    # Use environment token
    push_success = push_feedback_to_hub(env_token)
    
    if push_success:
        status_msg = "Thank you for your valuable feedback! Your insights have been saved to the dataset."
    else:
        status_msg = "Thank you for your feedback! It has been saved locally, but couldn't be pushed to the dataset. Please check server logs."
    
    return status_msg

# Initial state - set up at app start
def initialize_state():
    """Initialize the conversation state - this could load previous sessions or start fresh"""
    return []  # Start with empty conversation history

# Create the Gradio blocks interface
with gr.Blocks() as demo:
    # Create state to store full conversation history with proper initialization
    conv_state = gr.State(initialize_state)
    
    with gr.Row():
        with gr.Column(scale=3):
            # Create a custom predict function that updates our state
            def enhanced_predict(message, history, temperature, top_p, state):
                # Initialize state if needed
                if state is None:
                    state = []
                    print("Initializing empty state")
                
                # Copy history to state if state is empty but history exists
                if len(state) == 0 and len(history) > 0:
                    state = history.copy()
                    print(f"Copied {len(history)} messages from history to state")
                
                # Add user message to state
                state.append({"role": "user", "content": message})
                
                # Process with the model (this doesn't modify the original history)
                input_text = tokenizer.apply_chat_template(state, tokenize=False, add_generation_prompt=True)
                inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
                
                # Create a streamer
                streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
                
                # Set up generation parameters
                generation_kwargs = {
                    "input_ids": inputs,
                    "max_new_tokens": 1024,
                    "temperature": float(temperature),
                    "top_p": float(top_p),
                    "do_sample": True,
                    "streamer": streamer,
                    "eos_token_id": 128009,
                }
                
                # Run generation in a separate thread
                thread = Thread(target=model.generate, kwargs=generation_kwargs)
                thread.start()
                
                # Yield from the streamer as tokens are generated
                response = ""
                for new_text in streamer:
                    response += new_text
                    # For each partial response, yield the text only
                    # We'll update the state after generation is complete
                    yield response
                
                # After generation completes, update our state with the final response
                state.append({"role": "assistant", "content": response})
                
                # Return the updated state
                return state
            
            # Create a wrapper that connects to ChatInterface but also updates our state
            def chat_with_state(message, history, temperature, top_p):
                # This function is what interfaces with the ChatInterface
                nonlocal conv_state
                
                # Access the current state
                current_state = conv_state.value if conv_state.value else []
                
                # Call the main function that generates responses and updates state
                # This is a generator function, so we need to iterate through its outputs
                response_gen = enhanced_predict(message, history, temperature, top_p, current_state)
                
                # For each response, yield it and also update our state at the end
                last_response = None
                for response in response_gen:
                    last_response = response
                    yield response
                
                # After generation is complete, update our state
                if last_response is not None:
                    # Create a full copy of the history plus the new exchange
                    updated_state = []
                    # Add all previous history
                    for msg in history:
                        updated_state.append(msg.copy())
                    # Add new exchange
                    updated_state.append({"role": "user", "content": message})
                    updated_state.append({"role": "assistant", "content": last_response})
                    
                    # Store in our state
                    conv_state.value = updated_state
                    
                    # Debug
                    print(f"Updated conversation state with {len(updated_state)} messages")
                    if updated_state:
                        last_msg = updated_state[-1]
                        print(f"Last message: {last_msg['role']}: {last_msg['content'][:30]}...")
            
            # Create ChatInterface
            chatbot = gr.ChatInterface(
                chat_with_state,
                additional_inputs=[
                    gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
                    gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
                ],
                type="messages"
            )
        
        with gr.Column(scale=1):
            report_button = gr.Button("Share Feedback", variant="primary")
    
    # Create the modal with feedback form components
    with Modal(visible=False) as feedback_modal:
        with gr.Column():
            gr.Markdown("## Research Preview Feedback")
            gr.Markdown("Thank you for testing our research model. Your feedback (positive or negative) helps us improve!")
            
            satisfaction = gr.Radio(
                ["Very satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very unsatisfied"],
                label="How would you rate your experience with this research model?",
                value="Neutral"
            )
            
            feedback_text = gr.Textbox(
                lines=5,
                label="Share your observations (strengths, weaknesses, suggestions):",
                placeholder="We welcome both positive feedback and constructive criticism to help improve this research prototype..."
            )
            
            submit_button = gr.Button("Submit Research Feedback", variant="primary")
            response_text = gr.Textbox(label="Status", interactive=False)
    
    # Connect the "Share Feedback" button to show the modal
    report_button.click(
        lambda: Modal(visible=True),
        None,
        feedback_modal
    )
    
    # Connect the submit button to the submit_research_feedback function
    submit_button.click(
        submit_research_feedback,
        inputs=[conv_state, satisfaction, feedback_text],
        outputs=response_text
    )

# Launch the demo
demo.launch()