MeghanaArakkal commited on
Commit
786d0d0
ยท
verified ยท
1 Parent(s): 293183a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -92
app.py CHANGED
@@ -3,6 +3,7 @@
3
  Multimodal chat frontโ€‘end for Amazon Bedrock Nova Premier v1
4
  (text and/or image โžœ assistant text) with perโ€‘session JSONL logging.
5
  Logs FULL conversations to HF Dataset and images to HF repo via Git LFS.
 
6
 
7
  Prereqs:
8
  pip install gradio==5.38 boto3 pillow datasets huggingface_hub
@@ -16,8 +17,9 @@ Update REPO_ID to your HF dataset repository.
16
  """
17
 
18
  from __future__ import annotations
19
- import base64, datetime, io, json, pathlib, uuid, os
20
  from typing import Dict, List, Optional, Tuple
 
21
 
22
  import boto3
23
  from botocore.config import Config
@@ -52,6 +54,10 @@ IMG_DIR = LOG_DIR / "imgs"
52
  LOG_DIR.mkdir(exist_ok=True)
53
  IMG_DIR.mkdir(exist_ok=True)
54
 
 
 
 
 
55
  # ====== Bedrock client ====== #
56
  bedrock = boto3.client(
57
  "bedrock-runtime",
@@ -101,16 +107,16 @@ def call_bedrock(
101
  messages.append({"role": "assistant", "content": [{"text": reply}]})
102
  return reply, messages
103
 
104
- def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
105
  """Upload image to HF repo and return the repo URL."""
106
  if not HF_TOKEN:
107
- print("Warning: HF_TOKEN not set, skipping image upload to HF repo")
108
  return None
109
 
110
  try:
111
- # Create unique filename with message index
112
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
113
- filename = f"images/{session_id}_{message_index:03d}_{ts}.png"
 
114
 
115
  # Save locally first
116
  local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
@@ -128,85 +134,86 @@ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image, message_index
128
 
129
  # Return the HF repo URL
130
  hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
131
- print(f"Image uploaded to HF repo: {hf_image_url}")
132
  return hf_image_url
133
 
134
  except Exception as e:
135
  print(f"Failed to upload image to HF repo: {e}")
136
  return None
137
 
138
- def save_full_conversation_to_hf_dataset(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
139
- """Save the ENTIRE conversation to HF Dataset."""
140
  if not HF_TOKEN:
141
- print("Warning: HF_TOKEN not set, skipping upload to HF dataset")
142
  return
143
 
144
- try:
145
- # Convert chat history to structured format
146
- messages = []
147
- for i, (user_msg, assistant_msg) in enumerate(chat_history):
148
- messages.append({
149
- "message_index": i,
150
- "role": "user",
151
- "content": user_msg,
152
- "image_url": image_urls.get(i, ""),
153
- "has_image": i in image_urls
154
- })
155
- messages.append({
156
- "message_index": i,
157
- "role": "assistant",
158
- "content": assistant_msg,
159
- "image_url": "",
160
- "has_image": False
161
- })
162
-
163
- # Create conversation record
164
- conversation_record = {
165
- "session_id": session_id,
166
- "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
167
- "message_count": len(chat_history),
168
- "total_messages": len(messages),
169
- "conversation_messages": messages,
170
- "bedrock_history": br_history, # Full Bedrock conversation context
171
- "images_count": len(image_urls)
172
- }
173
-
174
- # Load existing dataset
175
  try:
176
- existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
177
- records = existing_dataset.to_list()
178
-
179
- # Check if session already exists and update it
180
- session_exists = False
181
- for idx, record in enumerate(records):
182
- if record.get("session_id") == session_id:
183
- records[idx] = conversation_record
184
- session_exists = True
185
- print(f"Updated existing session {session_id[:8]} in dataset")
186
- break
187
-
188
- if not session_exists:
189
- records.append(conversation_record)
190
- print(f"Added new session {session_id[:8]} to dataset")
 
 
 
191
 
192
- updated_dataset = Dataset.from_list(records)
193
-
194
- except Exception as load_error:
195
- print(f"Could not load existing dataset (creating new): {load_error}")
196
- updated_dataset = Dataset.from_list([conversation_record])
197
-
198
- # Push updated dataset
199
- updated_dataset.push_to_hub(
200
- REPO_ID,
201
- token=HF_TOKEN,
202
- private=True,
203
- commit_message=f"Update full conversation for session {session_id[:8]} ({len(chat_history)} exchanges)"
204
- )
205
-
206
- print(f"Full conversation logged to HF dataset: {REPO_ID}")
207
-
208
- except Exception as e:
209
- print(f"Failed to upload full conversation to HF dataset: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  def save_local_conversation_log(session_id: str, chat_history: List[Tuple], image_urls: Dict[int, str]):
212
  """Save full conversation to local JSONL file."""
@@ -237,11 +244,16 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
237
  ## Multimodal Chat with Full Conversation Logging ๐Ÿ“
238
  Upload an image *(optional)*, ask a question, and continue the conversation.
239
 
 
 
 
 
 
240
  **Logging Features:**
241
  - ๐Ÿ’พ **Full conversation history** saved after each message
242
  - ๐Ÿ”„ **Context preservation** - entire chat context maintained
243
  - ๐Ÿ“ธ **Image tracking** - all images linked to specific messages
244
- - ๐Ÿท๏ธ **Session management** - each browser tab gets unique session ID
245
 
246
  **Storage:**
247
  - ๐Ÿค— HF Dataset: {"โœ… Enabled" if HF_TOKEN else "โŒ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
@@ -249,7 +261,7 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
249
  """
250
  )
251
 
252
- chatbot = gr.Chatbot(height=420)
253
  chat_state = gr.State([]) # [(user, assistant), โ€ฆ] - Full chat history
254
  br_state = gr.State([]) # Bedrock message dicts - Full conversation context
255
  sess_state = gr.State("") # UUID for this browser tab
@@ -278,6 +290,15 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
278
  max_lines=3
279
  )
280
 
 
 
 
 
 
 
 
 
 
281
  # ---- main handler ---- #
282
  def chat(chat_log, br_history, sess_id, img_urls_dict,
283
  image, text,
@@ -286,8 +307,10 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
286
  if image is None and not text.strip():
287
  raise gr.Error("Upload an image or enter a message.")
288
 
 
289
  if not sess_id:
290
  sess_id = str(uuid.uuid4())
 
291
 
292
  # Call Bedrock with full conversation context
293
  try:
@@ -303,32 +326,40 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
303
  hf_img_url = None
304
  if image:
305
  message_index = len(chat_log) # Current message index
306
- hf_img_url = upload_image_to_hf_repo(sess_id, image, message_index)
307
- if hf_img_url:
 
 
 
 
308
  img_urls_dict[message_index] = hf_img_url
309
 
310
  # Update chat history
311
  display_user = text.strip() if text.strip() else "[image uploaded]"
312
  chat_log.append((display_user, reply))
313
 
314
- # Save FULL conversation (local and HF)
315
  save_local_conversation_log(sess_id, chat_log, img_urls_dict)
316
- save_full_conversation_to_hf_dataset(sess_id, chat_log, new_br_history, img_urls_dict)
317
 
318
  # Update status message
319
  status_msg = f"โœ… Full conversation logged for session {sess_id[:8]}\n"
320
  status_msg += f"๐Ÿ“Š Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}"
321
 
322
  if image:
323
- if hf_img_url:
324
- status_msg += f"\n๐Ÿ–ผ๏ธ Image #{len(img_urls_dict)} uploaded to HF repo"
325
  else:
326
  status_msg += f"\nโš ๏ธ Image saved locally only"
327
 
328
  if not HF_TOKEN:
329
  status_msg += "\nโŒ HF logging disabled (no token) - local only"
330
 
331
- return chat_log, chat_log, new_br_history, sess_id, img_urls_dict, None, "", status_msg
 
 
 
 
332
 
333
  send_btn.click(
334
  chat,
@@ -336,41 +367,53 @@ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
336
  img_in, txt_in,
337
  max_tk, temp, top_p, top_k],
338
  outputs=[chatbot, chat_state, br_state, sess_state, img_urls_state,
339
- img_in, txt_in, log_status],
340
  )
341
 
342
  # ---- clear chat ---- #
343
  def reset():
344
- return [], [], "", {}, None, "", "Ready to start logging full conversations..."
 
 
 
345
 
346
  clear_btn.click(
347
  reset,
348
  inputs=None,
349
  outputs=[chatbot, chat_state, sess_state, img_urls_state,
350
- img_in, txt_in, log_status],
351
  queue=False,
352
  )
353
 
354
  # Add info about the logging structure
355
  gr.Markdown(
356
- f"""
357
- ### ๐Ÿ“Š Conversation Log Structure
358
 
359
  Each conversation is saved with:
360
- - **Session ID**: Unique identifier for this browser tab/conversation
361
- - **Full message history**: All user and assistant messages in order
362
  - **Bedrock context**: Complete conversation context sent to the AI
363
  - **Image references**: URLs to all images uploaded during the conversation
364
  - **Timestamps**: When the conversation was last updated
 
365
 
366
  ### ๐Ÿ” Viewing Your Logs
367
 
368
- {"**HF Dataset**: [https://huggingface.co/datasets/" + REPO_ID + "](https://huggingface.co/datasets/" + REPO_ID + ")" if HF_TOKEN else "**HF Dataset**: Not configured - set HF_TOKEN to enable"}
369
- - Each record contains a complete conversation
370
  - Images stored in `images/` folder with session and message indexing
371
  - Download as JSON, CSV, or Parquet for analysis
 
372
 
373
- **Local logs**: Saved as `{session_id}_full.jsonl` (temporary until Space restarts)
 
 
 
 
 
 
 
374
  """
375
  )
376
 
@@ -387,5 +430,7 @@ if __name__ == "__main__":
387
  print(f"โœ… HF logging enabled. Full conversations will be saved to: {REPO_ID}")
388
  print(f"๐Ÿ“ธ Images will be stored in: {REPO_ID}/images/")
389
 
 
 
390
  demo.queue(max_size=100)
391
  demo.launch(share=True)
 
3
  Multimodal chat frontโ€‘end for Amazon Bedrock Nova Premier v1
4
  (text and/or image โžœ assistant text) with perโ€‘session JSONL logging.
5
  Logs FULL conversations to HF Dataset and images to HF repo via Git LFS.
6
+ Supports multiple concurrent users with separate sessions.
7
 
8
  Prereqs:
9
  pip install gradio==5.38 boto3 pillow datasets huggingface_hub
 
17
  """
18
 
19
  from __future__ import annotations
20
+ import base64, datetime, io, json, pathlib, uuid, os, threading
21
  from typing import Dict, List, Optional, Tuple
22
+ from concurrent.futures import ThreadPoolExecutor
23
 
24
  import boto3
25
  from botocore.config import Config
 
54
  LOG_DIR.mkdir(exist_ok=True)
55
  IMG_DIR.mkdir(exist_ok=True)
56
 
57
+ # Thread pool for background operations
58
+ executor = ThreadPoolExecutor(max_workers=4)
59
+ dataset_lock = threading.Lock() # Prevent concurrent dataset updates
60
+
61
  # ====== Bedrock client ====== #
62
  bedrock = boto3.client(
63
  "bedrock-runtime",
 
107
  messages.append({"role": "assistant", "content": [{"text": reply}]})
108
  return reply, messages
109
 
110
+ def upload_image_to_hf_repo_async(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
111
  """Upload image to HF repo and return the repo URL."""
112
  if not HF_TOKEN:
 
113
  return None
114
 
115
  try:
116
+ # Create unique filename with message index and random component to avoid conflicts
117
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
118
+ random_suffix = str(uuid.uuid4())[:8]
119
+ filename = f"images/{session_id}_{message_index:03d}_{ts}_{random_suffix}.png"
120
 
121
  # Save locally first
122
  local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
 
134
 
135
  # Return the HF repo URL
136
  hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
 
137
  return hf_image_url
138
 
139
  except Exception as e:
140
  print(f"Failed to upload image to HF repo: {e}")
141
  return None
142
 
143
+ def save_full_conversation_to_hf_dataset_async(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
144
+ """Save the ENTIRE conversation to HF Dataset with thread safety."""
145
  if not HF_TOKEN:
 
146
  return
147
 
148
+ def background_save():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  try:
150
+ with dataset_lock: # Prevent concurrent dataset updates
151
+ # Convert chat history to structured format
152
+ messages = []
153
+ for i, (user_msg, assistant_msg) in enumerate(chat_history):
154
+ messages.append({
155
+ "message_index": i,
156
+ "role": "user",
157
+ "content": user_msg,
158
+ "image_url": image_urls.get(i, ""),
159
+ "has_image": i in image_urls
160
+ })
161
+ messages.append({
162
+ "message_index": i,
163
+ "role": "assistant",
164
+ "content": assistant_msg,
165
+ "image_url": "",
166
+ "has_image": False
167
+ })
168
 
169
+ # Create conversation record
170
+ conversation_record = {
171
+ "session_id": session_id,
172
+ "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
173
+ "message_count": len(chat_history),
174
+ "total_messages": len(messages),
175
+ "conversation_messages": messages,
176
+ "bedrock_history": br_history, # Full Bedrock conversation context
177
+ "images_count": len(image_urls)
178
+ }
179
+
180
+ # Load existing dataset
181
+ try:
182
+ existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
183
+ records = existing_dataset.to_list()
184
+
185
+ # Check if session already exists and update it
186
+ session_exists = False
187
+ for idx, record in enumerate(records):
188
+ if record.get("session_id") == session_id:
189
+ records[idx] = conversation_record
190
+ session_exists = True
191
+ break
192
+
193
+ if not session_exists:
194
+ records.append(conversation_record)
195
+
196
+ updated_dataset = Dataset.from_list(records)
197
+
198
+ except Exception as load_error:
199
+ # Dataset doesn't exist yet, create new one
200
+ updated_dataset = Dataset.from_list([conversation_record])
201
+
202
+ # Push updated dataset
203
+ updated_dataset.push_to_hub(
204
+ REPO_ID,
205
+ token=HF_TOKEN,
206
+ private=True,
207
+ commit_message=f"Update conversation {session_id[:8]} ({len(chat_history)} exchanges)"
208
+ )
209
+
210
+ print(f"Conversation {session_id[:8]} saved to HF dataset")
211
+
212
+ except Exception as e:
213
+ print(f"Failed to save conversation {session_id[:8]} to HF dataset: {e}")
214
+
215
+ # Run in background thread to avoid blocking UI
216
+ executor.submit(background_save)
217
 
218
  def save_local_conversation_log(session_id: str, chat_history: List[Tuple], image_urls: Dict[int, str]):
219
  """Save full conversation to local JSONL file."""
 
244
  ## Multimodal Chat with Full Conversation Logging ๐Ÿ“
245
  Upload an image *(optional)*, ask a question, and continue the conversation.
246
 
247
+ **Multi-User Support:**
248
+ - ๐Ÿ‘ฅ Each browser tab/session gets a unique conversation ID
249
+ - ๐Ÿ”’ Conversations are isolated between users
250
+ - โšก Background logging doesn't block the UI
251
+
252
  **Logging Features:**
253
  - ๐Ÿ’พ **Full conversation history** saved after each message
254
  - ๐Ÿ”„ **Context preservation** - entire chat context maintained
255
  - ๐Ÿ“ธ **Image tracking** - all images linked to specific messages
256
+ - ๐Ÿท๏ธ **Session management** - unique session ID per conversation
257
 
258
  **Storage:**
259
  - ๐Ÿค— HF Dataset: {"โœ… Enabled" if HF_TOKEN else "โŒ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
 
261
  """
262
  )
263
 
264
+ chatbot = gr.Chatbot(height=420, type="tuples") # Fix the warning
265
  chat_state = gr.State([]) # [(user, assistant), โ€ฆ] - Full chat history
266
  br_state = gr.State([]) # Bedrock message dicts - Full conversation context
267
  sess_state = gr.State("") # UUID for this browser tab
 
290
  max_lines=3
291
  )
292
 
293
+ # Session info display
294
+ with gr.Row():
295
+ session_info = gr.Textbox(
296
+ label="Session Info",
297
+ value="New session will be created on first message",
298
+ interactive=False,
299
+ max_lines=1
300
+ )
301
+
302
  # ---- main handler ---- #
303
  def chat(chat_log, br_history, sess_id, img_urls_dict,
304
  image, text,
 
307
  if image is None and not text.strip():
308
  raise gr.Error("Upload an image or enter a message.")
309
 
310
+ # Create new session if needed
311
  if not sess_id:
312
  sess_id = str(uuid.uuid4())
313
+ print(f"Created new session: {sess_id}")
314
 
315
  # Call Bedrock with full conversation context
316
  try:
 
326
  hf_img_url = None
327
  if image:
328
  message_index = len(chat_log) # Current message index
329
+ # Upload image in background (non-blocking)
330
+ if HF_TOKEN:
331
+ executor.submit(lambda: upload_image_to_hf_repo_async(sess_id, image, message_index))
332
+ # For immediate reference, we'll generate the expected URL pattern
333
+ ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
334
+ hf_img_url = f"[Image uploaded to HF repo - session {sess_id[:8]}, message {message_index}]"
335
  img_urls_dict[message_index] = hf_img_url
336
 
337
  # Update chat history
338
  display_user = text.strip() if text.strip() else "[image uploaded]"
339
  chat_log.append((display_user, reply))
340
 
341
+ # Save FULL conversation (local immediately, HF in background)
342
  save_local_conversation_log(sess_id, chat_log, img_urls_dict)
343
+ save_full_conversation_to_hf_dataset_async(sess_id, chat_log, new_br_history, img_urls_dict)
344
 
345
  # Update status message
346
  status_msg = f"โœ… Full conversation logged for session {sess_id[:8]}\n"
347
  status_msg += f"๐Ÿ“Š Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}"
348
 
349
  if image:
350
+ if HF_TOKEN:
351
+ status_msg += f"\n๐Ÿ–ผ๏ธ Image #{len([k for k in img_urls_dict.keys() if isinstance(k, int)])} uploading to HF repo..."
352
  else:
353
  status_msg += f"\nโš ๏ธ Image saved locally only"
354
 
355
  if not HF_TOKEN:
356
  status_msg += "\nโŒ HF logging disabled (no token) - local only"
357
 
358
+ # Update session info
359
+ session_display = f"Session: {sess_id[:8]}... | Messages: {len(chat_log)} | Images: {len([k for k in img_urls_dict.keys() if isinstance(k, int)])}"
360
+
361
+ return (chat_log, chat_log, new_br_history, sess_id, img_urls_dict,
362
+ None, "", status_msg, session_display)
363
 
364
  send_btn.click(
365
  chat,
 
367
  img_in, txt_in,
368
  max_tk, temp, top_p, top_k],
369
  outputs=[chatbot, chat_state, br_state, sess_state, img_urls_state,
370
+ img_in, txt_in, log_status, session_info],
371
  )
372
 
373
  # ---- clear chat ---- #
374
  def reset():
375
+ new_session_id = str(uuid.uuid4())
376
+ return ([], [], new_session_id, {}, None, "",
377
+ "Ready to start logging full conversations...",
378
+ f"New session: {new_session_id[:8]}...")
379
 
380
  clear_btn.click(
381
  reset,
382
  inputs=None,
383
  outputs=[chatbot, chat_state, sess_state, img_urls_state,
384
+ img_in, txt_in, log_status, session_info],
385
  queue=False,
386
  )
387
 
388
  # Add info about the logging structure
389
  gr.Markdown(
390
+ """
391
+ ### ๐Ÿ“Š Multi-User Conversation Log Structure
392
 
393
  Each conversation is saved with:
394
+ - **Session ID**: Unique identifier for each conversation (8-char prefix shown in UI)
395
+ - **Full message history**: All user and assistant messages in chronological order
396
  - **Bedrock context**: Complete conversation context sent to the AI
397
  - **Image references**: URLs to all images uploaded during the conversation
398
  - **Timestamps**: When the conversation was last updated
399
+ - **Concurrent support**: Multiple users can chat simultaneously without interference
400
 
401
  ### ๐Ÿ” Viewing Your Logs
402
 
403
+ **HF Dataset**: """ + (f"[https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})" if HF_TOKEN else "Not configured - set HF_TOKEN to enable") + """
404
+ - Each record contains a complete conversation from one user session
405
  - Images stored in `images/` folder with session and message indexing
406
  - Download as JSON, CSV, or Parquet for analysis
407
+ - Search/filter by session_id to find specific conversations
408
 
409
+ **Local logs**: Saved as `[session_id]_full.jsonl` (temporary until Space restarts)
410
+
411
+ ### ๐Ÿ”ง Performance Notes
412
+
413
+ - Image uploads and dataset updates happen in background threads
414
+ - UI remains responsive during logging operations
415
+ - Each browser tab gets its own isolated conversation
416
+ - Session IDs are generated client-side for immediate isolation
417
  """
418
  )
419
 
 
430
  print(f"โœ… HF logging enabled. Full conversations will be saved to: {REPO_ID}")
431
  print(f"๐Ÿ“ธ Images will be stored in: {REPO_ID}/images/")
432
 
433
+ print("๐Ÿš€ Multi-user chat with full conversation logging ready!")
434
+
435
  demo.queue(max_size=100)
436
  demo.launch(share=True)