Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ Update REPO_ID to your HF dataset repository.
|
|
19 |
from __future__ import annotations
|
20 |
import base64, datetime, io, json, pathlib, uuid, os, threading
|
21 |
from typing import Dict, List, Optional, Tuple
|
22 |
-
from concurrent.futures import ThreadPoolExecutor
|
23 |
|
24 |
import boto3
|
25 |
from botocore.config import Config
|
@@ -45,7 +45,7 @@ Never mention Amazon or Nova.
|
|
45 |
|
46 |
# HuggingFace Configuration
|
47 |
HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
|
48 |
-
REPO_ID = "collinear-ai/nova-premier-redteaming-external" #
|
49 |
HF_API = HfApi()
|
50 |
|
51 |
# Local directories (for temporary storage)
|
@@ -107,22 +107,21 @@ def call_bedrock(
|
|
107 |
messages.append({"role": "assistant", "content": [{"text": reply}]})
|
108 |
return reply, messages
|
109 |
|
110 |
-
def
|
111 |
-
"""Upload image to HF repo and return the repo URL."""
|
112 |
if not HF_TOKEN:
|
113 |
return None
|
114 |
|
115 |
try:
|
116 |
-
# Create unique filename with message index and
|
117 |
ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
|
118 |
-
|
119 |
-
filename = f"images/{session_id}_{message_index:03d}_{ts}_{random_suffix}.png"
|
120 |
|
121 |
# Save locally first
|
122 |
local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
|
123 |
pil_img.save(local_path, format="PNG")
|
124 |
|
125 |
-
# Upload to HF repo
|
126 |
upload_file(
|
127 |
path_or_fileobj=str(local_path),
|
128 |
path_in_repo=filename,
|
@@ -132,12 +131,13 @@ def upload_image_to_hf_repo_async(session_id: str, pil_img: Image.Image, message
|
|
132 |
commit_message=f"Add image for session {session_id[:8]} message {message_index}"
|
133 |
)
|
134 |
|
135 |
-
# Return the HF repo URL
|
136 |
hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
|
|
|
137 |
return hf_image_url
|
138 |
|
139 |
except Exception as e:
|
140 |
-
print(f"Failed to upload image to HF repo: {e}")
|
141 |
return None
|
142 |
|
143 |
def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
|
@@ -207,10 +207,10 @@ def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: Li
|
|
207 |
commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
|
208 |
)
|
209 |
|
210 |
-
print(f"Conversation {session_id[:8]} saved to HF dataset")
|
211 |
|
212 |
except Exception as e:
|
213 |
-
print(f"Failed to save conversation {session_id[:8]} to HF dataset: {e}")
|
214 |
|
215 |
# Run in background thread to avoid blocking UI
|
216 |
executor.submit(background_save)
|
@@ -238,27 +238,30 @@ def save_local_conversation_log(session_id: str, chat_history: List[Tuple], imag
|
|
238 |
f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
|
239 |
|
240 |
# ====== Gradio UI ====== #
|
241 |
-
with gr.Blocks(title="
|
242 |
gr.Markdown(
|
243 |
f"""
|
244 |
-
##
|
245 |
Upload an image *(optional)*, ask a question, and continue the conversation.
|
246 |
|
247 |
**Multi-User Support:**
|
248 |
- π₯ Each browser tab/session gets a unique conversation ID
|
249 |
- π Conversations are isolated between users
|
250 |
-
- β‘
|
251 |
|
252 |
**Logging Features:**
|
253 |
- πΎ **Full conversation history** saved after each message
|
254 |
- π **Context preservation** - entire chat context maintained
|
255 |
-
- πΈ **Image tracking** -
|
256 |
- π·οΈ **Session management** - unique session ID per conversation
|
257 |
|
|
|
|
|
|
|
258 |
"""
|
259 |
)
|
260 |
|
261 |
-
chatbot = gr.Chatbot(height=420, type="tuples")
|
262 |
chat_state = gr.State([]) # [(user, assistant), β¦] - Full chat history
|
263 |
br_state = gr.State([]) # Bedrock message dicts - Full conversation context
|
264 |
sess_state = gr.State("") # UUID for this browser tab
|
@@ -284,7 +287,7 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
|
|
284 |
label="Conversation Logging Status",
|
285 |
value="Ready to start logging full conversations...",
|
286 |
interactive=False,
|
287 |
-
max_lines=
|
288 |
)
|
289 |
|
290 |
# Session info display
|
@@ -307,7 +310,24 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
|
|
307 |
# Create new session if needed
|
308 |
if not sess_id:
|
309 |
sess_id = str(uuid.uuid4())
|
310 |
-
print(f"Created new session: {sess_id}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
# Call Bedrock with full conversation context
|
313 |
try:
|
@@ -319,18 +339,6 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
|
|
319 |
except Exception as e:
|
320 |
raise gr.Error(f"Bedrock API error: {str(e)}")
|
321 |
|
322 |
-
# Handle image upload if present
|
323 |
-
hf_img_url = None
|
324 |
-
if image:
|
325 |
-
message_index = len(chat_log) # Current message index
|
326 |
-
# Upload image in background (non-blocking)
|
327 |
-
if HF_TOKEN:
|
328 |
-
executor.submit(lambda: upload_image_to_hf_repo_async(sess_id, image, message_index))
|
329 |
-
# For immediate reference, we'll generate the expected URL pattern
|
330 |
-
ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
|
331 |
-
hf_img_url = f"[Image uploaded to HF repo - session {sess_id[:8]}, message {message_index}]"
|
332 |
-
img_urls_dict[message_index] = hf_img_url
|
333 |
-
|
334 |
# Update chat history
|
335 |
display_user = text.strip() if text.strip() else "[image uploaded]"
|
336 |
chat_log.append((display_user, reply))
|
@@ -339,21 +347,24 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
|
|
339 |
save_local_conversation_log(sess_id, chat_log, img_urls_dict)
|
340 |
save_full_conversation_to_hf_dataset_async(sess_id, chat_log, new_br_history, img_urls_dict)
|
341 |
|
342 |
-
# Update status message
|
343 |
status_msg = f"β
Full conversation logged for session {sess_id[:8]}\n"
|
344 |
-
status_msg += f"π Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}"
|
345 |
|
346 |
-
if image:
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
|
|
|
|
351 |
|
352 |
if not HF_TOKEN:
|
353 |
status_msg += "\nβ HF logging disabled (no token) - local only"
|
354 |
|
355 |
# Update session info
|
356 |
-
|
|
|
357 |
|
358 |
return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
|
359 |
None, "", status_msg, session_display)
|
@@ -384,33 +395,40 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
|
|
384 |
|
385 |
# Add info about the logging structure
|
386 |
gr.Markdown(
|
387 |
-
"""
|
388 |
-
### π
|
389 |
|
390 |
Each conversation is saved with:
|
391 |
- **Session ID**: Unique identifier for each conversation (8-char prefix shown in UI)
|
392 |
- **Full message history**: All user and assistant messages in chronological order
|
|
|
393 |
- **Bedrock context**: Complete conversation context sent to the AI
|
394 |
-
- **Image references**: URLs to all images uploaded during the conversation
|
395 |
- **Timestamps**: When the conversation was last updated
|
396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
### π Viewing Your Logs
|
399 |
|
400 |
-
**HF Dataset**:
|
401 |
- Each record contains a complete conversation from one user session
|
402 |
-
- Images
|
403 |
-
- Download as JSON, CSV, or Parquet for analysis
|
404 |
- Search/filter by session_id to find specific conversations
|
|
|
405 |
|
406 |
**Local logs**: Saved as `[session_id]_full.jsonl` (temporary until Space restarts)
|
407 |
|
408 |
### π§ Performance Notes
|
409 |
|
410 |
-
-
|
411 |
-
-
|
412 |
- Each browser tab gets its own isolated conversation
|
413 |
-
-
|
414 |
"""
|
415 |
)
|
416 |
|
@@ -422,12 +440,11 @@ if __name__ == "__main__":
|
|
422 |
print(" To enable persistent logging:")
|
423 |
print(" 1. Go to your Space settings β Repository secrets")
|
424 |
print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
|
425 |
-
print(" 3. Create a dataset repository and update REPO_ID in the code")
|
426 |
else:
|
427 |
print(f"β
HF logging enabled. Full conversations will be saved to: {REPO_ID}")
|
428 |
-
print(f"πΈ Images will be stored
|
429 |
|
430 |
-
print("
|
431 |
|
432 |
demo.queue(max_size=100)
|
433 |
demo.launch(share=True)
|
|
|
19 |
from __future__ import annotations
|
20 |
import base64, datetime, io, json, pathlib, uuid, os, threading
|
21 |
from typing import Dict, List, Optional, Tuple
|
22 |
+
from concurrent.futures import ThreadPoolExecutor, Future
|
23 |
|
24 |
import boto3
|
25 |
from botocore.config import Config
|
|
|
45 |
|
46 |
# HuggingFace Configuration
|
47 |
HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
|
48 |
+
REPO_ID = "collinear-ai/nova-premier-redteaming-external" # Updated to your actual repo
|
49 |
HF_API = HfApi()
|
50 |
|
51 |
# Local directories (for temporary storage)
|
|
|
107 |
messages.append({"role": "assistant", "content": [{"text": reply}]})
|
108 |
return reply, messages
|
109 |
|
110 |
+
def upload_image_to_hf_repo_sync(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
|
111 |
+
"""Upload image to HF repo synchronously and return the actual repo URL."""
|
112 |
if not HF_TOKEN:
|
113 |
return None
|
114 |
|
115 |
try:
|
116 |
+
# Create unique filename with message index and timestamp
|
117 |
ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
|
118 |
+
filename = f"images/{session_id}_{message_index:03d}_{ts}.png"
|
|
|
119 |
|
120 |
# Save locally first
|
121 |
local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
|
122 |
pil_img.save(local_path, format="PNG")
|
123 |
|
124 |
+
# Upload to HF repo and wait for completion
|
125 |
upload_file(
|
126 |
path_or_fileobj=str(local_path),
|
127 |
path_in_repo=filename,
|
|
|
131 |
commit_message=f"Add image for session {session_id[:8]} message {message_index}"
|
132 |
)
|
133 |
|
134 |
+
# Return the actual HF repo URL
|
135 |
hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
|
136 |
+
print(f"β
Image uploaded successfully: {hf_image_url}")
|
137 |
return hf_image_url
|
138 |
|
139 |
except Exception as e:
|
140 |
+
print(f"β Failed to upload image to HF repo: {e}")
|
141 |
return None
|
142 |
|
143 |
def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
|
|
|
207 |
commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
|
208 |
)
|
209 |
|
210 |
+
print(f"β
Conversation {session_id[:8]} saved to HF dataset")
|
211 |
|
212 |
except Exception as e:
|
213 |
+
print(f"β Failed to save conversation {session_id[:8]} to HF dataset: {e}")
|
214 |
|
215 |
# Run in background thread to avoid blocking UI
|
216 |
executor.submit(background_save)
|
|
|
238 |
f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
|
239 |
|
240 |
# ====== Gradio UI ====== #
|
241 |
+
with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
|
242 |
gr.Markdown(
|
243 |
f"""
|
244 |
+
## Nova Premier Red Team Chat π΄π€
|
245 |
Upload an image *(optional)*, ask a question, and continue the conversation.
|
246 |
|
247 |
**Multi-User Support:**
|
248 |
- π₯ Each browser tab/session gets a unique conversation ID
|
249 |
- π Conversations are isolated between users
|
250 |
+
- β‘ Real-time image upload with direct URLs
|
251 |
|
252 |
**Logging Features:**
|
253 |
- πΎ **Full conversation history** saved after each message
|
254 |
- π **Context preservation** - entire chat context maintained
|
255 |
+
- πΈ **Image tracking** - direct links to uploaded images
|
256 |
- π·οΈ **Session management** - unique session ID per conversation
|
257 |
|
258 |
+
**Storage:**
|
259 |
+
- π€ HF Dataset: {"β
Enabled" if HF_TOKEN else "β Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
|
260 |
+
- οΏ½οΏ½οΈ Images: {"β
Uploaded with direct URLs" if HF_TOKEN else "β Local only"}
|
261 |
"""
|
262 |
)
|
263 |
|
264 |
+
chatbot = gr.Chatbot(height=420, type="tuples")
|
265 |
chat_state = gr.State([]) # [(user, assistant), β¦] - Full chat history
|
266 |
br_state = gr.State([]) # Bedrock message dicts - Full conversation context
|
267 |
sess_state = gr.State("") # UUID for this browser tab
|
|
|
287 |
label="Conversation Logging Status",
|
288 |
value="Ready to start logging full conversations...",
|
289 |
interactive=False,
|
290 |
+
max_lines=4
|
291 |
)
|
292 |
|
293 |
# Session info display
|
|
|
310 |
# Create new session if needed
|
311 |
if not sess_id:
|
312 |
sess_id = str(uuid.uuid4())
|
313 |
+
print(f"π Created new session: {sess_id}")
|
314 |
+
|
315 |
+
# Handle image upload FIRST (synchronously to get real URL)
|
316 |
+
hf_img_url = None
|
317 |
+
upload_status = ""
|
318 |
+
if image:
|
319 |
+
message_index = len(chat_log) # Current message index
|
320 |
+
if HF_TOKEN:
|
321 |
+
upload_status = "π Uploading image..."
|
322 |
+
# Upload synchronously to get the actual URL
|
323 |
+
hf_img_url = upload_image_to_hf_repo_sync(sess_id, image, message_index)
|
324 |
+
if hf_img_url:
|
325 |
+
img_urls_dict[message_index] = hf_img_url
|
326 |
+
upload_status = f"β
Image uploaded: {hf_img_url}"
|
327 |
+
else:
|
328 |
+
upload_status = "β Image upload failed"
|
329 |
+
else:
|
330 |
+
upload_status = "β οΈ Image saved locally only (no HF token)"
|
331 |
|
332 |
# Call Bedrock with full conversation context
|
333 |
try:
|
|
|
339 |
except Exception as e:
|
340 |
raise gr.Error(f"Bedrock API error: {str(e)}")
|
341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
# Update chat history
|
343 |
display_user = text.strip() if text.strip() else "[image uploaded]"
|
344 |
chat_log.append((display_user, reply))
|
|
|
347 |
save_local_conversation_log(sess_id, chat_log, img_urls_dict)
|
348 |
save_full_conversation_to_hf_dataset_async(sess_id, chat_log, new_br_history, img_urls_dict)
|
349 |
|
350 |
+
# Update status message with real image URL
|
351 |
status_msg = f"β
Full conversation logged for session {sess_id[:8]}\n"
|
352 |
+
status_msg += f"π Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}\n"
|
353 |
|
354 |
+
if image and hf_img_url:
|
355 |
+
status_msg += f"πΌοΈ Image URL: {hf_img_url}\n"
|
356 |
+
elif image and not hf_img_url:
|
357 |
+
status_msg += f"β οΈ Image upload failed - check logs\n"
|
358 |
+
|
359 |
+
if upload_status:
|
360 |
+
status_msg += upload_status
|
361 |
|
362 |
if not HF_TOKEN:
|
363 |
status_msg += "\nβ HF logging disabled (no token) - local only"
|
364 |
|
365 |
# Update session info
|
366 |
+
image_count = len([k for k in img_urls_dict.keys() if isinstance(k, int)])
|
367 |
+
session_display = f"Session: {sess_id[:8]}... | Messages: {len(chat_log)} | Images: {image_count}"
|
368 |
|
369 |
return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
|
370 |
None, "", status_msg, session_display)
|
|
|
395 |
|
396 |
# Add info about the logging structure
|
397 |
gr.Markdown(
|
398 |
+
f"""
|
399 |
+
### π Red Team Conversation Log Structure
|
400 |
|
401 |
Each conversation is saved with:
|
402 |
- **Session ID**: Unique identifier for each conversation (8-char prefix shown in UI)
|
403 |
- **Full message history**: All user and assistant messages in chronological order
|
404 |
+
- **Image URLs**: Direct links to uploaded images in HF repo
|
405 |
- **Bedrock context**: Complete conversation context sent to the AI
|
|
|
406 |
- **Timestamps**: When the conversation was last updated
|
407 |
+
|
408 |
+
### πΌοΈ Image Storage Format
|
409 |
+
|
410 |
+
Images are stored as: `images/{{session_id}}_{{message_index:03d}}_{{timestamp}}.png`
|
411 |
+
- Example: `images/d8771c05_001_20240123T143022.png`
|
412 |
+
- **session_id**: First 8 chars of session UUID
|
413 |
+
- **message_index**: 3-digit message number (000, 001, 002...)
|
414 |
+
- **timestamp**: UTC timestamp when uploaded
|
415 |
|
416 |
### π Viewing Your Logs
|
417 |
|
418 |
+
**HF Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
|
419 |
- Each record contains a complete conversation from one user session
|
420 |
+
- **Images folder**: [https://huggingface.co/datasets/{REPO_ID}/tree/main/images](https://huggingface.co/datasets/{REPO_ID}/tree/main/images)
|
|
|
421 |
- Search/filter by session_id to find specific conversations
|
422 |
+
- Image URLs in the conversation data link directly to the files
|
423 |
|
424 |
**Local logs**: Saved as `[session_id]_full.jsonl` (temporary until Space restarts)
|
425 |
|
426 |
### π§ Performance Notes
|
427 |
|
428 |
+
- Images are uploaded synchronously to get real URLs immediately
|
429 |
+
- Dataset updates happen in background threads for performance
|
430 |
- Each browser tab gets its own isolated conversation
|
431 |
+
- Direct image URLs allow easy correlation with conversation data
|
432 |
"""
|
433 |
)
|
434 |
|
|
|
440 |
print(" To enable persistent logging:")
|
441 |
print(" 1. Go to your Space settings β Repository secrets")
|
442 |
print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
|
|
|
443 |
else:
|
444 |
print(f"β
HF logging enabled. Full conversations will be saved to: {REPO_ID}")
|
445 |
+
print(f"πΈ Images will be stored at: https://huggingface.co/datasets/{REPO_ID}/tree/main/images")
|
446 |
|
447 |
+
print("π΄ Nova Premier Red Team Chat with full conversation logging ready!")
|
448 |
|
449 |
demo.queue(max_size=100)
|
450 |
demo.launch(share=True)
|