Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
Multimodal chat frontβend for Amazon Bedrock Nova Premier v1
|
4 |
(text and/or image β assistant text) with perβsession JSONL logging.
|
5 |
Logs FULL conversations to HF Dataset and images to HF repo via Git LFS.
|
6 |
-
|
7 |
|
8 |
Prereqs:
|
9 |
pip install gradio==5.38 boto3 pillow datasets huggingface_hub
|
@@ -17,9 +17,10 @@ Update REPO_ID to your HF dataset repository.
|
|
17 |
"""
|
18 |
|
19 |
from __future__ import annotations
|
20 |
-
import base64, datetime, io, json, pathlib, uuid, os, threading
|
21 |
from typing import Dict, List, Optional, Tuple
|
22 |
-
from concurrent.futures import ThreadPoolExecutor
|
|
|
23 |
|
24 |
import boto3
|
25 |
from botocore.config import Config
|
@@ -45,7 +46,7 @@ Never mention Amazon or Nova.
|
|
45 |
|
46 |
# HuggingFace Configuration
|
47 |
HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
|
48 |
-
REPO_ID = "collinear-ai/nova-premier-redteaming-external"
|
49 |
HF_API = HfApi()
|
50 |
|
51 |
# Local directories (for temporary storage)
|
@@ -56,7 +57,11 @@ IMG_DIR.mkdir(exist_ok=True)
|
|
56 |
|
57 |
# Thread pool for background operations
|
58 |
executor = ThreadPoolExecutor(max_workers=4)
|
59 |
-
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# ====== Bedrock client ====== #
|
62 |
bedrock = boto3.client(
|
@@ -115,10 +120,12 @@ def upload_image_to_hf_repo_sync(session_id: str, pil_img: Image.Image, message_
|
|
115 |
try:
|
116 |
# Create unique filename with message index and timestamp
|
117 |
ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
|
118 |
-
|
|
|
|
|
119 |
|
120 |
# Save locally first
|
121 |
-
local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
|
122 |
pil_img.save(local_path, format="PNG")
|
123 |
|
124 |
# Upload to HF repo and wait for completion
|
@@ -140,66 +147,74 @@ def upload_image_to_hf_repo_sync(session_id: str, pil_img: Image.Image, message_
|
|
140 |
print(f"β Failed to upload image to HF repo: {e}")
|
141 |
return None
|
142 |
|
143 |
-
def
|
144 |
-
"""
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
def background_save():
|
149 |
try:
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
"image_url": "",
|
166 |
-
"has_image": False
|
167 |
-
})
|
168 |
-
|
169 |
-
# Create conversation record
|
170 |
conversation_record = {
|
171 |
"session_id": session_id,
|
172 |
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
|
173 |
"message_count": len(chat_history),
|
174 |
-
"
|
175 |
-
"
|
176 |
-
"
|
177 |
-
"
|
178 |
}
|
179 |
|
180 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
try:
|
|
|
182 |
existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
|
183 |
records = existing_dataset.to_list()
|
184 |
|
185 |
-
#
|
186 |
-
|
187 |
-
for idx, record in enumerate(records):
|
188 |
-
if record.get("session_id") == session_id:
|
189 |
-
records[idx] = conversation_record
|
190 |
-
session_exists = True
|
191 |
-
break
|
192 |
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
updated_dataset = Dataset.from_list(records)
|
197 |
|
198 |
except Exception as load_error:
|
199 |
-
|
200 |
updated_dataset = Dataset.from_list([conversation_record])
|
201 |
|
202 |
-
# Push
|
203 |
updated_dataset.push_to_hub(
|
204 |
REPO_ID,
|
205 |
token=HF_TOKEN,
|
@@ -207,13 +222,47 @@ def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: Li
|
|
207 |
commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
|
208 |
)
|
209 |
|
210 |
-
print(f"β
Conversation {session_id[:8]} saved to
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
except Exception as e:
|
213 |
-
print(f"β
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
|
218 |
def save_local_conversation_log(session_id: str, chat_history: List[Tuple], image_urls: Dict[int, str]):
|
219 |
"""Save full conversation to local JSONL file."""
|
@@ -237,6 +286,10 @@ def save_local_conversation_log(session_id: str, chat_history: List[Tuple], imag
|
|
237 |
with path.open("w", encoding="utf-8") as f:
|
238 |
f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
|
239 |
|
|
|
|
|
|
|
|
|
240 |
# ====== Gradio UI ====== #
|
241 |
with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
242 |
gr.Markdown(
|
@@ -247,16 +300,17 @@ with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
|
247 |
**Multi-User Support:**
|
248 |
- π₯ Each browser tab/session gets a unique conversation ID
|
249 |
- π Conversations are isolated between users
|
|
|
250 |
- β‘ Real-time image upload with direct URLs
|
251 |
|
252 |
**Logging Features:**
|
253 |
-
- πΎ **
|
254 |
- π **Context preservation** - entire chat context maintained
|
255 |
- πΈ **Image tracking** - direct links to uploaded images
|
256 |
- π·οΈ **Session management** - unique session ID per conversation
|
257 |
|
258 |
**Storage:**
|
259 |
-
- π€ HF Dataset: {"β
Enabled" if HF_TOKEN else "β Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
|
260 |
- πΌοΈ Images: {"β
Uploaded with direct URLs" if HF_TOKEN else "β Local only"}
|
261 |
"""
|
262 |
)
|
@@ -299,6 +353,15 @@ with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
|
299 |
max_lines=1
|
300 |
)
|
301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
# ---- main handler ---- #
|
303 |
def chat(chat_log, br_history, sess_id, img_urls_dict,
|
304 |
image, text,
|
@@ -343,12 +406,14 @@ with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
|
343 |
display_user = text.strip() if text.strip() else "[image uploaded]"
|
344 |
chat_log.append((display_user, reply))
|
345 |
|
346 |
-
# Save
|
347 |
save_local_conversation_log(sess_id, chat_log, img_urls_dict)
|
348 |
-
|
|
|
|
|
349 |
|
350 |
# Update status message with real image URL
|
351 |
-
status_msg = f"β
|
352 |
status_msg += f"π Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}\n"
|
353 |
|
354 |
if image and hf_img_url:
|
@@ -365,9 +430,13 @@ with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
|
365 |
# Update session info
|
366 |
image_count = len([k for k in img_urls_dict.keys() if isinstance(k, int)])
|
367 |
session_display = f"Session: {sess_id[:8]}... | Messages: {len(chat_log)} | Images: {image_count}"
|
|
|
|
|
|
|
|
|
368 |
|
369 |
return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
|
370 |
-
None, "", status_msg, session_display)
|
371 |
|
372 |
send_btn.click(
|
373 |
chat,
|
@@ -375,60 +444,67 @@ with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
|
375 |
img_in, txt_in,
|
376 |
max_tk, temp, top_p, top_k],
|
377 |
outputs=[chatbot, chat_state, br_state, sess_state, img_urls_state,
|
378 |
-
img_in, txt_in, log_status, session_info],
|
379 |
)
|
380 |
|
381 |
# ---- clear chat ---- #
|
382 |
def reset():
|
383 |
new_session_id = str(uuid.uuid4())
|
|
|
384 |
return ([], [], new_session_id, {}, None, "",
|
385 |
"Ready to start logging full conversations...",
|
386 |
-
f"New session: {new_session_id[:8]}..."
|
|
|
387 |
|
388 |
clear_btn.click(
|
389 |
reset,
|
390 |
inputs=None,
|
391 |
outputs=[chatbot, chat_state, sess_state, img_urls_state,
|
392 |
-
img_in, txt_in, log_status, session_info],
|
393 |
queue=False,
|
394 |
)
|
395 |
|
396 |
-
# Add info about the logging
|
397 |
gr.Markdown(
|
398 |
f"""
|
399 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
|
408 |
### πΌοΈ Image Storage Format
|
409 |
|
410 |
-
Images
|
411 |
-
- Example: `images/d8771c05_001_20240123T143022.png`
|
412 |
- **session_id**: First 8 chars of session UUID
|
413 |
-
- **
|
414 |
-
- **timestamp**: UTC timestamp when uploaded
|
|
|
415 |
|
416 |
-
### π
|
417 |
|
418 |
**HF Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
|
419 |
-
-
|
420 |
- **Images folder**: [https://huggingface.co/datasets/{REPO_ID}/tree/main/images](https://huggingface.co/datasets/{REPO_ID}/tree/main/images)
|
421 |
-
-
|
422 |
-
- Image URLs in
|
423 |
-
|
424 |
-
**Local logs**: Saved as `[session_id]_full.jsonl` (temporary until Space restarts)
|
425 |
-
|
426 |
-
### π§ Performance Notes
|
427 |
|
428 |
-
|
429 |
-
-
|
430 |
-
-
|
431 |
-
-
|
|
|
432 |
"""
|
433 |
)
|
434 |
|
@@ -441,10 +517,11 @@ if __name__ == "__main__":
|
|
441 |
print(" 1. Go to your Space settings β Repository secrets")
|
442 |
print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
|
443 |
else:
|
444 |
-
print(f"β
HF logging enabled.
|
445 |
print(f"πΈ Images will be stored at: https://huggingface.co/datasets/{REPO_ID}/tree/main/images")
|
|
|
446 |
|
447 |
-
print("π΄ Nova Premier Red Team Chat with
|
448 |
|
449 |
demo.queue(max_size=100)
|
450 |
demo.launch(share=True)
|
|
|
3 |
Multimodal chat frontβend for Amazon Bedrock Nova Premier v1
|
4 |
(text and/or image β assistant text) with perβsession JSONL logging.
|
5 |
Logs FULL conversations to HF Dataset and images to HF repo via Git LFS.
|
6 |
+
Uses append-only logging to prevent data loss from concurrent users.
|
7 |
|
8 |
Prereqs:
|
9 |
pip install gradio==5.38 boto3 pillow datasets huggingface_hub
|
|
|
17 |
"""
|
18 |
|
19 |
from __future__ import annotations
|
20 |
+
import base64, datetime, io, json, pathlib, uuid, os, threading, time
|
21 |
from typing import Dict, List, Optional, Tuple
|
22 |
+
from concurrent.futures import ThreadPoolExecutor
|
23 |
+
import queue
|
24 |
|
25 |
import boto3
|
26 |
from botocore.config import Config
|
|
|
46 |
|
47 |
# HuggingFace Configuration
|
48 |
HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
|
49 |
+
REPO_ID = "collinear-ai/nova-premier-redteaming-external"
|
50 |
HF_API = HfApi()
|
51 |
|
52 |
# Local directories (for temporary storage)
|
|
|
57 |
|
58 |
# Thread pool for background operations
|
59 |
executor = ThreadPoolExecutor(max_workers=4)
|
60 |
+
|
61 |
+
# Queue-based logging system to prevent race conditions
|
62 |
+
log_queue = queue.Queue()
|
63 |
+
dataset_worker_running = False
|
64 |
+
dataset_worker_lock = threading.Lock()
|
65 |
|
66 |
# ====== Bedrock client ====== #
|
67 |
bedrock = boto3.client(
|
|
|
120 |
try:
|
121 |
# Create unique filename with message index and timestamp
|
122 |
ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
|
123 |
+
# Add a random component to ensure uniqueness even with concurrent uploads
|
124 |
+
random_suffix = str(uuid.uuid4())[:8]
|
125 |
+
filename = f"images/{session_id}_{message_index:03d}_{ts}_{random_suffix}.png"
|
126 |
|
127 |
# Save locally first
|
128 |
+
local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}_{random_suffix}.png"
|
129 |
pil_img.save(local_path, format="PNG")
|
130 |
|
131 |
# Upload to HF repo and wait for completion
|
|
|
147 |
print(f"β Failed to upload image to HF repo: {e}")
|
148 |
return None
|
149 |
|
150 |
+
def dataset_worker():
|
151 |
+
"""Background worker that processes dataset updates sequentially to prevent race conditions."""
|
152 |
+
print("π§ Dataset worker started")
|
153 |
+
|
154 |
+
while True:
|
|
|
155 |
try:
|
156 |
+
# Get next log entry from queue (blocks until available)
|
157 |
+
log_entry = log_queue.get(timeout=30) # 30 second timeout
|
158 |
+
|
159 |
+
if log_entry is None: # Shutdown signal
|
160 |
+
print("π Dataset worker shutting down")
|
161 |
+
break
|
162 |
+
|
163 |
+
# Process the log entry
|
164 |
+
session_id = log_entry["session_id"]
|
165 |
+
chat_history = log_entry["chat_history"]
|
166 |
+
br_history = log_entry["br_history"]
|
167 |
+
image_urls = log_entry["image_urls"]
|
168 |
+
|
169 |
+
try:
|
170 |
+
# Create individual conversation record (append-only approach)
|
|
|
|
|
|
|
|
|
|
|
171 |
conversation_record = {
|
172 |
"session_id": session_id,
|
173 |
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
|
174 |
"message_count": len(chat_history),
|
175 |
+
"conversation_messages": [],
|
176 |
+
"bedrock_history": br_history,
|
177 |
+
"images_count": len(image_urls),
|
178 |
+
"record_type": "full_conversation" # For filtering
|
179 |
}
|
180 |
|
181 |
+
# Convert chat history to structured format
|
182 |
+
for i, (user_msg, assistant_msg) in enumerate(chat_history):
|
183 |
+
conversation_record["conversation_messages"].extend([
|
184 |
+
{
|
185 |
+
"message_index": i,
|
186 |
+
"role": "user",
|
187 |
+
"content": user_msg,
|
188 |
+
"image_url": image_urls.get(i, ""),
|
189 |
+
"has_image": i in image_urls
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"message_index": i,
|
193 |
+
"role": "assistant",
|
194 |
+
"content": assistant_msg,
|
195 |
+
"image_url": "",
|
196 |
+
"has_image": False
|
197 |
+
}
|
198 |
+
])
|
199 |
+
|
200 |
+
# Append to dataset (safer than overwrite)
|
201 |
try:
|
202 |
+
# Load existing dataset
|
203 |
existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
|
204 |
records = existing_dataset.to_list()
|
205 |
|
206 |
+
# Remove any existing record with same session_id to avoid duplicates
|
207 |
+
records = [r for r in records if r.get("session_id") != session_id]
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
+
# Add new record
|
210 |
+
records.append(conversation_record)
|
|
|
211 |
updated_dataset = Dataset.from_list(records)
|
212 |
|
213 |
except Exception as load_error:
|
214 |
+
print(f"Creating new dataset (load failed): {load_error}")
|
215 |
updated_dataset = Dataset.from_list([conversation_record])
|
216 |
|
217 |
+
# Push to hub
|
218 |
updated_dataset.push_to_hub(
|
219 |
REPO_ID,
|
220 |
token=HF_TOKEN,
|
|
|
222 |
commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
|
223 |
)
|
224 |
|
225 |
+
print(f"β
Conversation {session_id[:8]} saved to dataset (queue size: {log_queue.qsize()})")
|
226 |
|
227 |
+
except Exception as e:
|
228 |
+
print(f"β Failed to save conversation {session_id[:8]} to dataset: {e}")
|
229 |
+
|
230 |
+
# Mark task as done
|
231 |
+
log_queue.task_done()
|
232 |
+
|
233 |
+
except queue.Empty:
|
234 |
+
# Timeout occurred, continue loop
|
235 |
+
continue
|
236 |
except Exception as e:
|
237 |
+
print(f"β Dataset worker error: {e}")
|
238 |
+
time.sleep(5) # Wait before retrying
|
239 |
+
|
240 |
+
def start_dataset_worker():
|
241 |
+
"""Start the dataset worker thread if not already running."""
|
242 |
+
global dataset_worker_running
|
243 |
|
244 |
+
with dataset_worker_lock:
|
245 |
+
if not dataset_worker_running:
|
246 |
+
dataset_worker_running = True
|
247 |
+
worker_thread = threading.Thread(target=dataset_worker, daemon=True)
|
248 |
+
worker_thread.start()
|
249 |
+
print("π Dataset worker thread started")
|
250 |
+
|
251 |
+
def queue_conversation_save(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
|
252 |
+
"""Queue a conversation for saving to prevent race conditions."""
|
253 |
+
if not HF_TOKEN:
|
254 |
+
return
|
255 |
+
|
256 |
+
log_entry = {
|
257 |
+
"session_id": session_id,
|
258 |
+
"chat_history": chat_history.copy(), # Copy to avoid reference issues
|
259 |
+
"br_history": br_history.copy(),
|
260 |
+
"image_urls": image_urls.copy(),
|
261 |
+
"queued_at": datetime.datetime.utcnow().isoformat()
|
262 |
+
}
|
263 |
+
|
264 |
+
log_queue.put(log_entry)
|
265 |
+
print(f"π Queued conversation {session_id[:8]} for saving (queue size: {log_queue.qsize()})")
|
266 |
|
267 |
def save_local_conversation_log(session_id: str, chat_history: List[Tuple], image_urls: Dict[int, str]):
|
268 |
"""Save full conversation to local JSONL file."""
|
|
|
286 |
with path.open("w", encoding="utf-8") as f:
|
287 |
f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
|
288 |
|
289 |
+
# Start the dataset worker when module loads
|
290 |
+
if HF_TOKEN:
|
291 |
+
start_dataset_worker()
|
292 |
+
|
293 |
# ====== Gradio UI ====== #
|
294 |
with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
295 |
gr.Markdown(
|
|
|
300 |
**Multi-User Support:**
|
301 |
- π₯ Each browser tab/session gets a unique conversation ID
|
302 |
- π Conversations are isolated between users
|
303 |
+
- π« **Race condition safe** - sequential dataset updates prevent data loss
|
304 |
- β‘ Real-time image upload with direct URLs
|
305 |
|
306 |
**Logging Features:**
|
307 |
+
- πΎ **Queue-based logging** prevents concurrent write conflicts
|
308 |
- π **Context preservation** - entire chat context maintained
|
309 |
- πΈ **Image tracking** - direct links to uploaded images
|
310 |
- π·οΈ **Session management** - unique session ID per conversation
|
311 |
|
312 |
**Storage:**
|
313 |
+
- π€ HF Dataset: {"β
Enabled with queue-based safety" if HF_TOKEN else "β Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
|
314 |
- πΌοΈ Images: {"β
Uploaded with direct URLs" if HF_TOKEN else "β Local only"}
|
315 |
"""
|
316 |
)
|
|
|
353 |
max_lines=1
|
354 |
)
|
355 |
|
356 |
+
# Queue status display
|
357 |
+
with gr.Row():
|
358 |
+
queue_status = gr.Textbox(
|
359 |
+
label="Logging Queue Status",
|
360 |
+
value=f"Queue-based logging {'enabled' if HF_TOKEN else 'disabled'} - prevents data loss",
|
361 |
+
interactive=False,
|
362 |
+
max_lines=1
|
363 |
+
)
|
364 |
+
|
365 |
# ---- main handler ---- #
|
366 |
def chat(chat_log, br_history, sess_id, img_urls_dict,
|
367 |
image, text,
|
|
|
406 |
display_user = text.strip() if text.strip() else "[image uploaded]"
|
407 |
chat_log.append((display_user, reply))
|
408 |
|
409 |
+
# Save locally immediately
|
410 |
save_local_conversation_log(sess_id, chat_log, img_urls_dict)
|
411 |
+
|
412 |
+
# Queue for HF dataset save (prevents race conditions)
|
413 |
+
queue_conversation_save(sess_id, chat_log, new_br_history, img_urls_dict)
|
414 |
|
415 |
# Update status message with real image URL
|
416 |
+
status_msg = f"β
Conversation logged for session {sess_id[:8]}\n"
|
417 |
status_msg += f"π Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}\n"
|
418 |
|
419 |
if image and hf_img_url:
|
|
|
430 |
# Update session info
|
431 |
image_count = len([k for k in img_urls_dict.keys() if isinstance(k, int)])
|
432 |
session_display = f"Session: {sess_id[:8]}... | Messages: {len(chat_log)} | Images: {image_count}"
|
433 |
+
|
434 |
+
# Update queue status
|
435 |
+
queue_size = log_queue.qsize() if HF_TOKEN else 0
|
436 |
+
queue_display = f"Dataset queue: {queue_size} pending | Queue-based logging prevents data loss"
|
437 |
|
438 |
return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
|
439 |
+
None, "", status_msg, session_display, queue_display)
|
440 |
|
441 |
send_btn.click(
|
442 |
chat,
|
|
|
444 |
img_in, txt_in,
|
445 |
max_tk, temp, top_p, top_k],
|
446 |
outputs=[chatbot, chat_state, br_state, sess_state, img_urls_state,
|
447 |
+
img_in, txt_in, log_status, session_info, queue_status],
|
448 |
)
|
449 |
|
450 |
# ---- clear chat ---- #
|
451 |
def reset():
|
452 |
new_session_id = str(uuid.uuid4())
|
453 |
+
queue_size = log_queue.qsize() if HF_TOKEN else 0
|
454 |
return ([], [], new_session_id, {}, None, "",
|
455 |
"Ready to start logging full conversations...",
|
456 |
+
f"New session: {new_session_id[:8]}...",
|
457 |
+
f"Dataset queue: {queue_size} pending | Queue-based logging prevents data loss")
|
458 |
|
459 |
clear_btn.click(
|
460 |
reset,
|
461 |
inputs=None,
|
462 |
outputs=[chatbot, chat_state, sess_state, img_urls_state,
|
463 |
+
img_in, txt_in, log_status, session_info, queue_status],
|
464 |
queue=False,
|
465 |
)
|
466 |
|
467 |
+
# Add info about the race-condition-safe logging
|
468 |
gr.Markdown(
|
469 |
f"""
|
470 |
+
### π‘οΈ Race Condition Protection
|
471 |
+
|
472 |
+
**Problem Solved**: Multiple users uploading simultaneously could overwrite each other's data.
|
473 |
+
|
474 |
+
**Solution**: Queue-based sequential processing:
|
475 |
+
- π Each conversation update goes into a queue
|
476 |
+
- π§ Background worker processes updates one at a time
|
477 |
+
- π« No concurrent dataset writes = no data loss
|
478 |
+
- β‘ UI stays responsive while background worker handles persistence
|
479 |
|
480 |
+
### π Logging Architecture
|
481 |
+
|
482 |
+
1. **Immediate**: Local JSONL files for instant access
|
483 |
+
2. **Queued**: HF dataset updates processed sequentially
|
484 |
+
3. **Direct**: Image uploads happen immediately for real URLs
|
485 |
+
4. **Safe**: Each session overwrites only its own record
|
486 |
|
487 |
### πΌοΈ Image Storage Format
|
488 |
|
489 |
+
Images: `images/{{session_id}}_{{msg_idx:03d}}_{{timestamp}}_{{random}}.png`
|
|
|
490 |
- **session_id**: First 8 chars of session UUID
|
491 |
+
- **msg_idx**: 3-digit message number (000, 001, 002...)
|
492 |
+
- **timestamp**: UTC timestamp when uploaded
|
493 |
+
- **random**: 8-char random suffix for uniqueness
|
494 |
|
495 |
+
### π Dataset Structure
|
496 |
|
497 |
**HF Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
|
498 |
+
- One record per session (updated as conversation progresses)
|
499 |
- **Images folder**: [https://huggingface.co/datasets/{REPO_ID}/tree/main/images](https://huggingface.co/datasets/{REPO_ID}/tree/main/images)
|
500 |
+
- Filter by `session_id` to find specific conversations
|
501 |
+
- Image URLs in conversation data link directly to files
|
|
|
|
|
|
|
|
|
502 |
|
503 |
+
**Benefits**:
|
504 |
+
- β
No data loss from concurrent users
|
505 |
+
- β
Real image URLs immediately available
|
506 |
+
- β
Complete conversation context preserved
|
507 |
+
- β
Scalable to many simultaneous users
|
508 |
"""
|
509 |
)
|
510 |
|
|
|
517 |
print(" 1. Go to your Space settings β Repository secrets")
|
518 |
print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
|
519 |
else:
|
520 |
+
print(f"β
HF logging enabled with queue-based safety. Conversations will be saved to: {REPO_ID}")
|
521 |
print(f"πΈ Images will be stored at: https://huggingface.co/datasets/{REPO_ID}/tree/main/images")
|
522 |
+
print("π‘οΈ Race condition protection: Dataset updates processed sequentially")
|
523 |
|
524 |
+
print("π΄ Nova Premier Red Team Chat with safe multi-user logging ready!")
|
525 |
|
526 |
demo.queue(max_size=100)
|
527 |
demo.launch(share=True)
|