MeghanaArakkal commited on
Commit
4fbe7db
Β·
verified Β·
1 Parent(s): 2c59f60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -52
app.py CHANGED
@@ -19,7 +19,7 @@ Update REPO_ID to your HF dataset repository.
19
  from __future__ import annotations
20
  import base64, datetime, io, json, pathlib, uuid, os, threading
21
  from typing import Dict, List, Optional, Tuple
22
- from concurrent.futures import ThreadPoolExecutor
23
 
24
  import boto3
25
  from botocore.config import Config
@@ -45,7 +45,7 @@ Never mention Amazon or Nova.
45
 
46
  # HuggingFace Configuration
47
  HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
48
- REPO_ID = "collinear-ai/nova-premier-redteaming-external" # Change this to your actual HF dataset repo
49
  HF_API = HfApi()
50
 
51
  # Local directories (for temporary storage)
@@ -107,22 +107,21 @@ def call_bedrock(
107
  messages.append({"role": "assistant", "content": [{"text": reply}]})
108
  return reply, messages
109
 
110
- def upload_image_to_hf_repo_async(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
111
- """Upload image to HF repo and return the repo URL."""
112
  if not HF_TOKEN:
113
  return None
114
 
115
  try:
116
- # Create unique filename with message index and random component to avoid conflicts
117
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
118
- random_suffix = str(uuid.uuid4())[:8]
119
- filename = f"images/{session_id}_{message_index:03d}_{ts}_{random_suffix}.png"
120
 
121
  # Save locally first
122
  local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
123
  pil_img.save(local_path, format="PNG")
124
 
125
- # Upload to HF repo
126
  upload_file(
127
  path_or_fileobj=str(local_path),
128
  path_in_repo=filename,
@@ -132,12 +131,13 @@ def upload_image_to_hf_repo_async(session_id: str, pil_img: Image.Image, message
132
  commit_message=f"Add image for session {session_id[:8]} message {message_index}"
133
  )
134
 
135
- # Return the HF repo URL
136
  hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
 
137
  return hf_image_url
138
 
139
  except Exception as e:
140
- print(f"Failed to upload image to HF repo: {e}")
141
  return None
142
 
143
  def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
@@ -207,10 +207,10 @@ def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: Li
207
  commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
208
  )
209
 
210
- print(f"Conversation {session_id[:8]} saved to HF dataset")
211
 
212
  except Exception as e:
213
- print(f"Failed to save conversation {session_id[:8]} to HF dataset: {e}")
214
 
215
  # Run in background thread to avoid blocking UI
216
  executor.submit(background_save)
@@ -238,27 +238,30 @@ def save_local_conversation_log(session_id: str, chat_history: List[Tuple], imag
238
  f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
239
 
240
  # ====== Gradio UI ====== #
241
- with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
242
  gr.Markdown(
243
  f"""
244
- ## Multimodal Chat with Full Conversation Logging πŸ“
245
  Upload an image *(optional)*, ask a question, and continue the conversation.
246
 
247
  **Multi-User Support:**
248
  - πŸ‘₯ Each browser tab/session gets a unique conversation ID
249
  - πŸ”’ Conversations are isolated between users
250
- - ⚑ Background logging doesn't block the UI
251
 
252
  **Logging Features:**
253
  - πŸ’Ύ **Full conversation history** saved after each message
254
  - πŸ”„ **Context preservation** - entire chat context maintained
255
- - πŸ“Έ **Image tracking** - all images linked to specific messages
256
  - 🏷️ **Session management** - unique session ID per conversation
257
 
 
 
 
258
  """
259
  )
260
 
261
- chatbot = gr.Chatbot(height=420, type="tuples") # Fix the warning
262
  chat_state = gr.State([]) # [(user, assistant), …] - Full chat history
263
  br_state = gr.State([]) # Bedrock message dicts - Full conversation context
264
  sess_state = gr.State("") # UUID for this browser tab
@@ -284,7 +287,7 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
284
  label="Conversation Logging Status",
285
  value="Ready to start logging full conversations...",
286
  interactive=False,
287
- max_lines=3
288
  )
289
 
290
  # Session info display
@@ -307,7 +310,24 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
307
  # Create new session if needed
308
  if not sess_id:
309
  sess_id = str(uuid.uuid4())
310
- print(f"Created new session: {sess_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  # Call Bedrock with full conversation context
313
  try:
@@ -319,18 +339,6 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
319
  except Exception as e:
320
  raise gr.Error(f"Bedrock API error: {str(e)}")
321
 
322
- # Handle image upload if present
323
- hf_img_url = None
324
- if image:
325
- message_index = len(chat_log) # Current message index
326
- # Upload image in background (non-blocking)
327
- if HF_TOKEN:
328
- executor.submit(lambda: upload_image_to_hf_repo_async(sess_id, image, message_index))
329
- # For immediate reference, we'll generate the expected URL pattern
330
- ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
331
- hf_img_url = f"[Image uploaded to HF repo - session {sess_id[:8]}, message {message_index}]"
332
- img_urls_dict[message_index] = hf_img_url
333
-
334
  # Update chat history
335
  display_user = text.strip() if text.strip() else "[image uploaded]"
336
  chat_log.append((display_user, reply))
@@ -339,21 +347,24 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
339
  save_local_conversation_log(sess_id, chat_log, img_urls_dict)
340
  save_full_conversation_to_hf_dataset_async(sess_id, chat_log, new_br_history, img_urls_dict)
341
 
342
- # Update status message
343
  status_msg = f"βœ… Full conversation logged for session {sess_id[:8]}\n"
344
- status_msg += f"πŸ“Š Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}"
345
 
346
- if image:
347
- if HF_TOKEN:
348
- status_msg += f"\nπŸ–ΌοΈ Image #{len([k for k in img_urls_dict.keys() if isinstance(k, int)])} uploading to HF repo..."
349
- else:
350
- status_msg += f"\n⚠️ Image saved locally only"
 
 
351
 
352
  if not HF_TOKEN:
353
  status_msg += "\n❌ HF logging disabled (no token) - local only"
354
 
355
  # Update session info
356
- session_display = f"Session: {sess_id[:8]}... | Messages: {len(chat_log)} | Images: {len([k for k in img_urls_dict.keys() if isinstance(k, int)])}"
 
357
 
358
  return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
359
  None, "", status_msg, session_display)
@@ -384,33 +395,40 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
384
 
385
  # Add info about the logging structure
386
  gr.Markdown(
387
- """
388
- ### πŸ“Š Multi-User Conversation Log Structure
389
 
390
  Each conversation is saved with:
391
  - **Session ID**: Unique identifier for each conversation (8-char prefix shown in UI)
392
  - **Full message history**: All user and assistant messages in chronological order
 
393
  - **Bedrock context**: Complete conversation context sent to the AI
394
- - **Image references**: URLs to all images uploaded during the conversation
395
  - **Timestamps**: When the conversation was last updated
396
- - **Concurrent support**: Multiple users can chat simultaneously without interference
 
 
 
 
 
 
 
397
 
398
  ### πŸ” Viewing Your Logs
399
 
400
- **HF Dataset**: """ + (f"[https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})" if HF_TOKEN else "Not configured - set HF_TOKEN to enable") + """
401
  - Each record contains a complete conversation from one user session
402
- - Images stored in `images/` folder with session and message indexing
403
- - Download as JSON, CSV, or Parquet for analysis
404
  - Search/filter by session_id to find specific conversations
 
405
 
406
  **Local logs**: Saved as `[session_id]_full.jsonl` (temporary until Space restarts)
407
 
408
  ### πŸ”§ Performance Notes
409
 
410
- - Image uploads and dataset updates happen in background threads
411
- - UI remains responsive during logging operations
412
  - Each browser tab gets its own isolated conversation
413
- - Session IDs are generated client-side for immediate isolation
414
  """
415
  )
416
 
@@ -422,12 +440,11 @@ if __name__ == "__main__":
422
  print(" To enable persistent logging:")
423
  print(" 1. Go to your Space settings β†’ Repository secrets")
424
  print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
425
- print(" 3. Create a dataset repository and update REPO_ID in the code")
426
  else:
427
  print(f"βœ… HF logging enabled. Full conversations will be saved to: {REPO_ID}")
428
- print(f"πŸ“Έ Images will be stored in: {REPO_ID}/images/")
429
 
430
- print("πŸš€ Multi-user chat with full conversation logging ready!")
431
 
432
  demo.queue(max_size=100)
433
  demo.launch(share=True)
 
19
  from __future__ import annotations
20
  import base64, datetime, io, json, pathlib, uuid, os, threading
21
  from typing import Dict, List, Optional, Tuple
22
+ from concurrent.futures import ThreadPoolExecutor, Future
23
 
24
  import boto3
25
  from botocore.config import Config
 
45
 
46
  # HuggingFace Configuration
47
  HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
48
+ REPO_ID = "collinear-ai/nova-premier-redteaming-external" # Updated to your actual repo
49
  HF_API = HfApi()
50
 
51
  # Local directories (for temporary storage)
 
107
  messages.append({"role": "assistant", "content": [{"text": reply}]})
108
  return reply, messages
109
 
110
+ def upload_image_to_hf_repo_sync(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
111
+ """Upload image to HF repo synchronously and return the actual repo URL."""
112
  if not HF_TOKEN:
113
  return None
114
 
115
  try:
116
+ # Create unique filename with message index and timestamp
117
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
118
+ filename = f"images/{session_id}_{message_index:03d}_{ts}.png"
 
119
 
120
  # Save locally first
121
  local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
122
  pil_img.save(local_path, format="PNG")
123
 
124
+ # Upload to HF repo and wait for completion
125
  upload_file(
126
  path_or_fileobj=str(local_path),
127
  path_in_repo=filename,
 
131
  commit_message=f"Add image for session {session_id[:8]} message {message_index}"
132
  )
133
 
134
+ # Return the actual HF repo URL
135
  hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
136
+ print(f"βœ… Image uploaded successfully: {hf_image_url}")
137
  return hf_image_url
138
 
139
  except Exception as e:
140
+ print(f"❌ Failed to upload image to HF repo: {e}")
141
  return None
142
 
143
  def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
 
207
  commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
208
  )
209
 
210
+ print(f"βœ… Conversation {session_id[:8]} saved to HF dataset")
211
 
212
  except Exception as e:
213
+ print(f"❌ Failed to save conversation {session_id[:8]} to HF dataset: {e}")
214
 
215
  # Run in background thread to avoid blocking UI
216
  executor.submit(background_save)
 
238
  f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
239
 
240
  # ====== Gradio UI ====== #
241
+ with gr.Blocks(title="Nova Premier Red Team Chat") as demo:
242
  gr.Markdown(
243
  f"""
244
+ ## Nova Premier Red Team Chat πŸ”΄πŸ€–
245
  Upload an image *(optional)*, ask a question, and continue the conversation.
246
 
247
  **Multi-User Support:**
248
  - πŸ‘₯ Each browser tab/session gets a unique conversation ID
249
  - πŸ”’ Conversations are isolated between users
250
+ - ⚑ Real-time image upload with direct URLs
251
 
252
  **Logging Features:**
253
  - πŸ’Ύ **Full conversation history** saved after each message
254
  - πŸ”„ **Context preservation** - entire chat context maintained
255
+ - πŸ“Έ **Image tracking** - direct links to uploaded images
256
  - 🏷️ **Session management** - unique session ID per conversation
257
 
258
+ **Storage:**
259
+ - πŸ€— HF Dataset: {"βœ… Enabled" if HF_TOKEN else "❌ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
260
+ - ��️ Images: {"βœ… Uploaded with direct URLs" if HF_TOKEN else "❌ Local only"}
261
  """
262
  )
263
 
264
+ chatbot = gr.Chatbot(height=420, type="tuples")
265
  chat_state = gr.State([]) # [(user, assistant), …] - Full chat history
266
  br_state = gr.State([]) # Bedrock message dicts - Full conversation context
267
  sess_state = gr.State("") # UUID for this browser tab
 
287
  label="Conversation Logging Status",
288
  value="Ready to start logging full conversations...",
289
  interactive=False,
290
+ max_lines=4
291
  )
292
 
293
  # Session info display
 
310
  # Create new session if needed
311
  if not sess_id:
312
  sess_id = str(uuid.uuid4())
313
+ print(f"πŸ†• Created new session: {sess_id}")
314
+
315
+ # Handle image upload FIRST (synchronously to get real URL)
316
+ hf_img_url = None
317
+ upload_status = ""
318
+ if image:
319
+ message_index = len(chat_log) # Current message index
320
+ if HF_TOKEN:
321
+ upload_status = "πŸ”„ Uploading image..."
322
+ # Upload synchronously to get the actual URL
323
+ hf_img_url = upload_image_to_hf_repo_sync(sess_id, image, message_index)
324
+ if hf_img_url:
325
+ img_urls_dict[message_index] = hf_img_url
326
+ upload_status = f"βœ… Image uploaded: {hf_img_url}"
327
+ else:
328
+ upload_status = "❌ Image upload failed"
329
+ else:
330
+ upload_status = "⚠️ Image saved locally only (no HF token)"
331
 
332
  # Call Bedrock with full conversation context
333
  try:
 
339
  except Exception as e:
340
  raise gr.Error(f"Bedrock API error: {str(e)}")
341
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  # Update chat history
343
  display_user = text.strip() if text.strip() else "[image uploaded]"
344
  chat_log.append((display_user, reply))
 
347
  save_local_conversation_log(sess_id, chat_log, img_urls_dict)
348
  save_full_conversation_to_hf_dataset_async(sess_id, chat_log, new_br_history, img_urls_dict)
349
 
350
+ # Update status message with real image URL
351
  status_msg = f"βœ… Full conversation logged for session {sess_id[:8]}\n"
352
+ status_msg += f"πŸ“Š Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}\n"
353
 
354
+ if image and hf_img_url:
355
+ status_msg += f"πŸ–ΌοΈ Image URL: {hf_img_url}\n"
356
+ elif image and not hf_img_url:
357
+ status_msg += f"⚠️ Image upload failed - check logs\n"
358
+
359
+ if upload_status:
360
+ status_msg += upload_status
361
 
362
  if not HF_TOKEN:
363
  status_msg += "\n❌ HF logging disabled (no token) - local only"
364
 
365
  # Update session info
366
+ image_count = len([k for k in img_urls_dict.keys() if isinstance(k, int)])
367
+ session_display = f"Session: {sess_id[:8]}... | Messages: {len(chat_log)} | Images: {image_count}"
368
 
369
  return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
370
  None, "", status_msg, session_display)
 
395
 
396
  # Add info about the logging structure
397
  gr.Markdown(
398
+ f"""
399
+ ### πŸ“Š Red Team Conversation Log Structure
400
 
401
  Each conversation is saved with:
402
  - **Session ID**: Unique identifier for each conversation (8-char prefix shown in UI)
403
  - **Full message history**: All user and assistant messages in chronological order
404
+ - **Image URLs**: Direct links to uploaded images in HF repo
405
  - **Bedrock context**: Complete conversation context sent to the AI
 
406
  - **Timestamps**: When the conversation was last updated
407
+
408
+ ### πŸ–ΌοΈ Image Storage Format
409
+
410
+ Images are stored as: `images/{{session_id}}_{{message_index:03d}}_{{timestamp}}.png`
411
+ - Example: `images/d8771c05_001_20240123T143022.png`
412
+ - **session_id**: First 8 chars of session UUID
413
+ - **message_index**: 3-digit message number (000, 001, 002...)
414
+ - **timestamp**: UTC timestamp when uploaded
415
 
416
  ### πŸ” Viewing Your Logs
417
 
418
+ **HF Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
419
  - Each record contains a complete conversation from one user session
420
+ - **Images folder**: [https://huggingface.co/datasets/{REPO_ID}/tree/main/images](https://huggingface.co/datasets/{REPO_ID}/tree/main/images)
 
421
  - Search/filter by session_id to find specific conversations
422
+ - Image URLs in the conversation data link directly to the files
423
 
424
  **Local logs**: Saved as `[session_id]_full.jsonl` (temporary until Space restarts)
425
 
426
  ### πŸ”§ Performance Notes
427
 
428
+ - Images are uploaded synchronously to get real URLs immediately
429
+ - Dataset updates happen in background threads for performance
430
  - Each browser tab gets its own isolated conversation
431
+ - Direct image URLs allow easy correlation with conversation data
432
  """
433
  )
434
 
 
440
  print(" To enable persistent logging:")
441
  print(" 1. Go to your Space settings β†’ Repository secrets")
442
  print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
 
443
  else:
444
  print(f"βœ… HF logging enabled. Full conversations will be saved to: {REPO_ID}")
445
+ print(f"πŸ“Έ Images will be stored at: https://huggingface.co/datasets/{REPO_ID}/tree/main/images")
446
 
447
+ print("πŸ”΄ Nova Premier Red Team Chat with full conversation logging ready!")
448
 
449
  demo.queue(max_size=100)
450
  demo.launch(share=True)