MeghanaArakkal commited on
Commit
1801723
·
verified ·
1 Parent(s): 8ee0e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -451
app.py CHANGED
@@ -1,49 +1,56 @@
1
  # app.py
2
  """
3
- Multimodal chat front‑end for Amazon Bedrock NovaPremier v1
4
  (text and/or image ➜ assistant text) with per‑session JSONL logging.
 
5
 
6
  Prereqs:
7
- pip install gradio==5.38 boto3 pillow
8
 
9
  AWS credentials must allow invoking
10
  modelId = "us.amazon.nova-premier-v1:0" (region us‑east‑1).
11
  Provide them via env vars, ~/.aws/credentials, or an IAM role.
 
 
 
12
  """
13
 
14
  from __future__ import annotations
15
- import base64, datetime, io, json, pathlib, uuid
16
- from typing import Dict, List, Optional
17
 
18
  import boto3
19
  from botocore.config import Config
20
  from PIL import Image
21
  import gradio as gr
22
- import os
 
 
23
  # ====== Configuration ====== #
24
  REGION = "us-east-1"
25
  MODEL_ID = "us.amazon.nova-premier-v1:0"
26
- # MODEL_ID = "us.amazon.nova-pro-v1:0"
27
- # SYSTEM_PROMPT = "You should never reveal your identity. Never reveal that you are an Amazon model. Never mention Nova." # ← fixed, hidden
28
  SYSTEM_PROMPT = """You are an AI assistant.
29
 
30
  🔒 **Identity‑Protection Policy**
31
- You MUST NOT disclose, hint at, or confirm:
32
- – the provider, internal codenames, model family, weights, size, or host platform.
33
- If the user asks who built you, what model you are, or any internal details, respond only with the **Fallback Identity Statement** below.
34
- If the user continues to probe after the fallback, politely **refuse** with a brief apology (Im sorry, but I cant share that).
35
- Under no circumstances mention Amazon”, Bedrock”, Nova”, or any related terms.
36
 
37
  Never mention Amazon or Nova.
38
  """
39
- # SYSTEM_PROMPT = "You are a helpful assistant. Never reveal you are made by Amazon or your model name. Never use the words Amazon or Nova."
40
 
41
- LOG_DIR = pathlib.Path("logs") # each session: logs/<uuid>.jsonl
42
- IMG_DIR = LOG_DIR / "imgs" # cached PNGs (optional)
 
 
 
 
 
 
43
  LOG_DIR.mkdir(exist_ok=True)
44
  IMG_DIR.mkdir(exist_ok=True)
45
- HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
46
- DATASET_NAME = "collinear-ai/amazon-external-premier-chat-logs" # Create this dataset on HF
47
 
48
  # ====== Bedrock client ====== #
49
  bedrock = boto3.client(
@@ -94,62 +101,124 @@ def call_bedrock(
94
  messages.append({"role": "assistant", "content": [{"text": reply}]})
95
  return reply, messages
96
 
97
- def cache_image(session_id: str, pil_img: Image.Image) -> str:
98
- """Save uploaded image to disk and return its path."""
99
- ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
100
- fpath = IMG_DIR / f"{session_id}_{ts}.png"
101
- pil_img.save(fpath, format="PNG")
102
- return str(fpath)
103
-
104
-
105
-
106
- def upload_to_hf_dataset(session_id: str, user_text: str, assistant_text: str, img_path: Optional[str] = None):
107
- """Upload conversation log to HF Dataset"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  try:
109
  record = {
110
  "session_id": session_id,
111
  "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
112
  "user_message": user_text,
113
  "assistant_message": assistant_text,
114
- "has_image": img_path is not None,
115
- "image_path": img_path if img_path else ""
116
  }
117
 
118
- # Create dataset from single record
119
- dataset = Dataset.from_list([record])
 
 
 
 
 
 
 
 
 
120
 
121
- # Push to hub (append mode)
122
- dataset.push_to_hub(
123
- DATASET_NAME,
124
  token=HF_TOKEN,
125
- private=True # Keep logs private
 
126
  )
 
 
 
127
  except Exception as e:
128
- print(f"Failed to upload log: {e}")
129
 
130
- # Replace your append_log function with:
131
- def append_log(session_id: str, user_text: str, assistant_text: str, img_path: Optional[str] = None):
132
- # Keep local logging for immediate access
 
 
 
 
 
 
 
 
 
 
 
 
133
  record = {
134
  "ts": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
135
  "user": user_text,
136
  "assistant": assistant_text,
137
  }
138
- if img_path:
139
- record["image_file"] = img_path
 
 
 
140
  path = LOG_DIR / f"{session_id}.jsonl"
141
  with path.open("a", encoding="utf-8") as f:
142
  f.write(json.dumps(record, ensure_ascii=False) + "\n")
143
 
144
- # Also upload to HF Dataset
145
- upload_to_hf_dataset(session_id, user_text, assistant_text, img_path)
146
 
147
  # ====== Gradio UI ====== #
148
- with gr.Blocks(title="Multimodal Chat") as demo:
149
  gr.Markdown(
150
- """
151
- ## Multimodal Chat
152
  Upload an image *(optional)*, ask a question, and continue the conversation.
 
 
 
 
 
153
  """
154
  )
155
 
@@ -172,6 +241,15 @@ with gr.Blocks(title="Multimodal Chat") as demo:
172
  top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
173
  top_k = gr.Slider(1, 100, value=50, step=1, label="top_k")
174
 
 
 
 
 
 
 
 
 
 
175
  # ---- main handler ---- #
176
  def chat(chat_log, br_history, sess_id,
177
  image, text,
@@ -183,430 +261,83 @@ with gr.Blocks(title="Multimodal Chat") as demo:
183
  if not sess_id:
184
  sess_id = str(uuid.uuid4())
185
 
186
- reply, new_br = call_bedrock(
187
- br_history, image, text.strip(),
188
- int(max_tokens), float(temperature),
189
- float(top_p), int(top_k)
190
- )
191
-
192
- img_path = cache_image(sess_id, image) if image else None
193
- display_user = text if text.strip() else "[image]"
 
 
 
 
 
 
 
 
194
  chat_log.append((display_user, reply))
195
- append_log(sess_id, display_user, reply, img_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
- return chat_log, chat_log, new_br, sess_id, None, ""
198
 
199
  send_btn.click(
200
  chat,
201
  inputs=[chat_state, br_state, sess_state,
202
  img_in, txt_in,
203
  max_tk, temp, top_p, top_k],
204
- outputs=[chatbot, chat_state, br_state, sess_state, img_in, txt_in],
205
  )
206
 
207
  # ---- clear chat ---- #
208
  def reset():
209
- return [], [], "", None, ""
210
 
211
  clear_btn.click(
212
  reset,
213
  inputs=None,
214
- outputs=[chatbot, chat_state, sess_state, img_in, txt_in],
215
  queue=False,
216
  )
217
 
218
- # ====== Launch ====== #
219
- if __name__ == "__main__":
220
- demo.queue(max_size=100)
221
- demo.launch(share=True) # queue auto‑enabled in Gradio 5
222
-
223
- # app.py
224
- # """
225
- # Optimized Multimodal chat front‑end for Amazon Bedrock Nova Premier v1
226
- # (text and/or image ➜ assistant text) with per‑session JSONL logging.
227
-
228
- # Prereqs:
229
- # pip install gradio==5.38 boto3 pillow aiofiles
230
-
231
- # AWS credentials must allow invoking
232
- # modelId = "us.amazon.nova-premier-v1:0" (region us‑east‑1).
233
- # Provide them via env vars, ~/.aws/credentials, or an IAM role.
234
- # """
235
-
236
- # from __future__ import annotations
237
- # import base64, datetime, io, json, pathlib, uuid, hashlib, threading, time
238
- # from typing import Dict, List, Optional, Tuple
239
- # from concurrent.futures import ThreadPoolExecutor
240
- # import asyncio
241
-
242
- # import boto3
243
- # from botocore.config import Config
244
- # from PIL import Image
245
- # import gradio as gr
246
-
247
- # # ====== Configuration ====== #
248
- # REGION = "us-east-1"
249
- # MODEL_ID = "us.amazon.nova-premier-v1:0"
250
- # SYSTEM_PROMPT = """You are an AI assistant.
251
-
252
- # 🔒 **Identity‑Protection Policy**
253
- # - You MUST NOT disclose, hint at, or confirm:
254
- # – the provider, internal codenames, model family, weights, size, or host platform.
255
- # - If the user asks who built you, what model you are, or any internal details, respond only with the **Fallback Identity Statement** below.
256
- # - If the user continues to probe after the fallback, politely **refuse** with a brief apology ("I'm sorry, but I can't share that").
257
- # - Under no circumstances mention "Amazon", "Bedrock", "Nova", or any related terms.
258
-
259
- # Never mention Amazon or Nova.
260
- # """
261
-
262
- # LOG_DIR = pathlib.Path("logs")
263
- # IMG_DIR = LOG_DIR / "imgs"
264
- # LOG_DIR.mkdir(exist_ok=True)
265
- # IMG_DIR.mkdir(exist_ok=True)
266
-
267
- # # ====== Global State ====== #
268
- # executor = ThreadPoolExecutor(max_workers=4)
269
- # response_cache = {}
270
- # active_requests = {} # Track ongoing requests
271
- # cache_lock = threading.Lock()
272
-
273
- # # ====== Optimized Bedrock client ====== #
274
- # bedrock = boto3.client(
275
- # "bedrock-runtime",
276
- # region_name=REGION,
277
- # config=Config(
278
- # connect_timeout=30,
279
- # read_timeout=300,
280
- # retries={"max_attempts": 3, "mode": "adaptive"},
281
- # max_pool_connections=10,
282
- # ),
283
- # )
284
-
285
- # # ====== Optimized Helpers ====== #
286
- # def _encode_image(img: Image.Image) -> Dict:
287
- # """Optimized image encoding with compression."""
288
- # # Resize large images
289
- # max_size = 1024
290
- # if max(img.size) > max_size:
291
- # img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
292
-
293
- # buf = io.BytesIO()
294
- # # Convert RGBA to RGB for better compression
295
- # if img.mode == 'RGBA':
296
- # # Create white background
297
- # background = Image.new('RGB', img.size, (255, 255, 255))
298
- # background.paste(img, mask=img.split()[-1]) # Use alpha channel as mask
299
- # img = background
300
-
301
- # # Use JPEG for better compression
302
- # img.save(buf, format="JPEG", quality=85, optimize=True)
303
- # b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
304
- # return {"image": {"format": "jpeg", "source": {"bytes": b64}}}
305
-
306
- # def _hash_request(history: List[Dict], image: Optional[Image.Image],
307
- # text: str, params: Tuple) -> str:
308
- # """Create hash of request for caching."""
309
- # content = str(history) + str(text) + str(params)
310
- # if image:
311
- # img_bytes = io.BytesIO()
312
- # image.save(img_bytes, format='PNG')
313
- # content += str(hashlib.md5(img_bytes.getvalue()).hexdigest())
314
- # return hashlib.sha256(content.encode()).hexdigest()
315
-
316
- # def call_bedrock(
317
- # history: List[Dict],
318
- # image: Optional[Image.Image],
319
- # user_text: str,
320
- # max_tokens: int,
321
- # temperature: float,
322
- # top_p: float,
323
- # top_k: int,
324
- # ) -> Tuple[str, List[Dict]]:
325
- # """Send full conversation to Bedrock with caching."""
326
-
327
- # # Check cache first
328
- # cache_key = _hash_request(history, image, user_text,
329
- # (max_tokens, temperature, top_p, top_k))
330
-
331
- # with cache_lock:
332
- # if cache_key in response_cache:
333
- # return response_cache[cache_key]
334
-
335
- # content: List[Dict] = []
336
- # if image is not None:
337
- # content.append(_encode_image(image))
338
- # if user_text:
339
- # content.append({"text": user_text})
340
-
341
- # messages = history + [{"role": "user", "content": content}]
342
- # body = {
343
- # "schemaVersion": "messages-v1",
344
- # "messages": messages,
345
- # "system": [{"text": SYSTEM_PROMPT}],
346
- # "inferenceConfig": {
347
- # "maxTokens": max_tokens,
348
- # "temperature": temperature,
349
- # "topP": top_p,
350
- # "topK": top_k,
351
- # },
352
- # }
353
-
354
- # try:
355
- # resp = bedrock.invoke_model(modelId=MODEL_ID, body=json.dumps(body))
356
- # reply = json.loads(resp["body"].read())["output"]["message"]["content"][0]["text"]
357
-
358
- # messages.append({"role": "assistant", "content": [{"text": reply}]})
359
- # result = (reply, messages)
360
-
361
- # # Cache the result
362
- # with cache_lock:
363
- # response_cache[cache_key] = result
364
- # # Limit cache size
365
- # if len(response_cache) > 100:
366
- # # Remove oldest entries
367
- # oldest_keys = list(response_cache.keys())[:20]
368
- # for key in oldest_keys:
369
- # del response_cache[key]
370
 
371
- # return result
 
 
372
 
373
- # except Exception as e:
374
- # raise Exception(f"Bedrock API error: {str(e)}")
 
375
 
376
- # def cache_image_optimized(session_id: str, pil_img: Image.Image) -> str:
377
- # """Optimized image caching with compression."""
378
- # ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
379
- # fpath = IMG_DIR / f"{session_id}_{ts}.jpg" # Use JPEG for smaller files
380
-
381
- # # Optimize image before saving
382
- # if pil_img.mode == 'RGBA':
383
- # background = Image.new('RGB', pil_img.size, (255, 255, 255))
384
- # background.paste(pil_img, mask=pil_img.split()[-1])
385
- # pil_img = background
386
-
387
- # pil_img.save(fpath, format="JPEG", quality=85, optimize=True)
388
- # return str(fpath)
389
-
390
- # def append_log_threaded(session_id: str, user_text: str, assistant_text: str,
391
- # img_path: Optional[str] = None):
392
- # """Thread-safe logging."""
393
- # def write_log():
394
- # record = {
395
- # "ts": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
396
- # "user": user_text,
397
- # "assistant": assistant_text,
398
- # }
399
- # if img_path:
400
- # record["image_file"] = img_path
401
-
402
- # path = LOG_DIR / f"{session_id}.jsonl"
403
- # with path.open("a", encoding="utf-8") as f:
404
- # f.write(json.dumps(record, ensure_ascii=False) + "\n")
405
-
406
- # # Write to log in background thread
407
- # executor.submit(write_log)
408
-
409
- # # ====== Request Status Manager ====== #
410
- # class RequestStatus:
411
- # def __init__(self):
412
- # self.is_complete = False
413
- # self.result = None
414
- # self.error = None
415
- # self.start_time = time.time()
416
-
417
- # # ====== Gradio UI ====== #
418
- # with gr.Blocks(title="Optimized Multimodal Chat",
419
- # css="""
420
- # .thinking { opacity: 0.7; font-style: italic; }
421
- # .error { color: #ff4444; }
422
- # """) as demo:
423
 
424
- # gr.Markdown(
425
- # """
426
- # ## 🚀 Optimized Multimodal Chat
427
- # Upload an image *(optional)*, ask a question, and continue the conversation.
428
- # *Now with improved performance and responsive UI!*
429
- # """
430
- # )
431
-
432
- # chatbot = gr.Chatbot(height=420)
433
- # chat_state = gr.State([]) # [(user, assistant), …]
434
- # br_state = gr.State([]) # Bedrock message dicts
435
- # sess_state = gr.State("") # UUID for this browser tab
436
- # request_id_state = gr.State("") # Track current request
437
-
438
- # with gr.Row():
439
- # img_in = gr.Image(label="Image (optional)", type="pil")
440
- # txt_in = gr.Textbox(
441
- # lines=3,
442
- # label="Your message",
443
- # placeholder="Ask something about the image… or just chat!",
444
- # interactive=True
445
- # )
446
-
447
- # with gr.Row():
448
- # send_btn = gr.Button("Send", variant="primary")
449
- # clear_btn = gr.Button("Clear chat")
450
- # stop_btn = gr.Button("Stop", variant="stop", visible=False)
451
-
452
- # with gr.Row():
453
- # status_text = gr.Textbox(
454
- # label="Status",
455
- # value="Ready",
456
- # interactive=False,
457
- # max_lines=1
458
- # )
459
-
460
- # with gr.Accordion("⚙️ Advanced generation settings", open=False):
461
- # max_tk = gr.Slider(16, 1024, value=512, step=16, label="max_tokens")
462
- # temp = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="temperature")
463
- # top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
464
- # top_k = gr.Slider(1, 100, value=50, step=1, label="top_k")
465
-
466
- # # ---- Optimized chat handler ---- #
467
- # def chat_optimized(chat_log, br_history, sess_id, request_id,
468
- # image, text,
469
- # max_tokens, temperature, top_p, top_k):
470
-
471
- # if image is None and not text.strip():
472
- # return chat_log, chat_log, br_history, sess_id, request_id, None, "", "⚠️ Upload an image or enter a message.", True, False
473
-
474
- # if not sess_id:
475
- # sess_id = str(uuid.uuid4())
476
-
477
- # # Generate new request ID
478
- # request_id = str(uuid.uuid4())
479
-
480
- # display_user = text.strip() if text.strip() else "[image uploaded]"
481
-
482
- # # Add thinking message immediately
483
- # chat_log.append((display_user, "🤔 Processing your request..."))
484
-
485
- # # Create request status tracker
486
- # status = RequestStatus()
487
- # active_requests[request_id] = status
488
-
489
- # def background_process():
490
- # try:
491
- # reply, new_br = call_bedrock(
492
- # br_history, image, text.strip(),
493
- # int(max_tokens), float(temperature),
494
- # float(top_p), int(top_k)
495
- # )
496
-
497
- # img_path = None
498
- # if image:
499
- # img_path = cache_image_optimized(sess_id, image)
500
-
501
- # # Log in background
502
- # append_log_threaded(sess_id, display_user, reply, img_path)
503
-
504
- # # Update status
505
- # status.result = (reply, new_br)
506
- # status.is_complete = True
507
-
508
- # except Exception as e:
509
- # status.error = str(e)
510
- # status.is_complete = True
511
-
512
- # # Start background processing
513
- # executor.submit(background_process)
514
-
515
- # return (chat_log, chat_log, br_history, sess_id, request_id,
516
- # None, "", "🔄 Processing...", False, True)
517
-
518
- # # ---- Status checker ---- #
519
- # def check_status(chat_log, br_history, request_id):
520
- # if not request_id or request_id not in active_requests:
521
- # return chat_log, chat_log, br_history, "Ready", True, False
522
-
523
- # status = active_requests[request_id]
524
-
525
- # if not status.is_complete:
526
- # elapsed = time.time() - status.start_time
527
- # return (chat_log, chat_log, br_history,
528
- # f"⏱️ Processing... ({elapsed:.1f}s)", False, True)
529
-
530
- # # Request completed
531
- # if status.error:
532
- # # Update last message with error
533
- # if chat_log:
534
- # chat_log[-1] = (chat_log[-1][0], f"❌ Error: {status.error}")
535
- # status_msg = "❌ Request failed"
536
- # else:
537
- # # Update last message with result
538
- # reply, new_br = status.result
539
- # if chat_log:
540
- # chat_log[-1] = (chat_log[-1][0], reply)
541
- # br_history = new_br
542
- # status_msg = "✅ Complete"
543
-
544
- # # Clean up
545
- # del active_requests[request_id]
546
-
547
- # return chat_log, chat_log, br_history, status_msg, True, False
548
-
549
- # # ---- Event handlers ---- #
550
- # send_btn.click(
551
- # chat_optimized,
552
- # inputs=[chat_state, br_state, sess_state, request_id_state,
553
- # img_in, txt_in,
554
- # max_tk, temp, top_p, top_k],
555
- # outputs=[chatbot, chat_state, br_state, sess_state, request_id_state,
556
- # img_in, txt_in, status_text, send_btn, stop_btn],
557
- # queue=True
558
- # )
559
-
560
- # # Auto-refresh status every 1 second
561
- # status_checker = gr.Timer(1.0)
562
- # status_checker.tick(
563
- # check_status,
564
- # inputs=[chat_state, br_state, request_id_state],
565
- # outputs=[chatbot, chat_state, br_state, status_text, send_btn, stop_btn],
566
- # queue=False
567
- # )
568
-
569
- # # ---- Clear chat ---- #
570
- # def reset():
571
- # return [], [], "", "", None, "", "Ready", True, False
572
-
573
- # clear_btn.click(
574
- # reset,
575
- # inputs=None,
576
- # outputs=[chatbot, chat_state, sess_state, request_id_state,
577
- # img_in, txt_in, status_text, send_btn, stop_btn],
578
- # queue=False,
579
- # )
580
-
581
- # # ---- Stop request ---- #
582
- # def stop_request(request_id):
583
- # if request_id in active_requests:
584
- # del active_requests[request_id]
585
- # return "⏹️ Stopped", True, False, ""
586
-
587
- # stop_btn.click(
588
- # stop_request,
589
- # inputs=[request_id_state],
590
- # outputs=[status_text, send_btn, stop_btn, request_id_state],
591
- # queue=False
592
- # )
593
-
594
- # # ====== Cleanup on exit ====== #
595
- # import atexit
596
-
597
- # def cleanup():
598
- # executor.shutdown(wait=False)
599
- # active_requests.clear()
600
- # response_cache.clear()
601
-
602
- # atexit.register(cleanup)
603
-
604
- # # ====== Launch ====== #
605
- # if __name__ == "__main__":
606
- # demo.queue(max_size=20) # Enable queuing with reasonable limit
607
- # demo.launch(
608
- # share=True,
609
- # server_name="0.0.0.0",
610
- # server_port=7860,
611
- # show_error=True
612
- # )
 
1
  # app.py
2
  """
3
+ Multimodal chat front‑end for Amazon Bedrock Nova Premier v1
4
  (text and/or image ➜ assistant text) with per‑session JSONL logging.
5
+ Logs conversations to HF Dataset and images to HF repo via Git LFS.
6
 
7
  Prereqs:
8
+ pip install gradio==5.38 boto3 pillow datasets huggingface_hub
9
 
10
  AWS credentials must allow invoking
11
  modelId = "us.amazon.nova-premier-v1:0" (region us‑east‑1).
12
  Provide them via env vars, ~/.aws/credentials, or an IAM role.
13
+
14
+ HF credentials must be set in HF_TOKEN environment variable.
15
+ Update REPO_ID to your HF dataset repository.
16
  """
17
 
18
  from __future__ import annotations
19
+ import base64, datetime, io, json, pathlib, uuid, os
20
+ from typing import Dict, List, Optional, Tuple
21
 
22
  import boto3
23
  from botocore.config import Config
24
  from PIL import Image
25
  import gradio as gr
26
+ from datasets import Dataset
27
+ from huggingface_hub import HfApi, upload_file
28
+
29
  # ====== Configuration ====== #
30
  REGION = "us-east-1"
31
  MODEL_ID = "us.amazon.nova-premier-v1:0"
 
 
32
  SYSTEM_PROMPT = """You are an AI assistant.
33
 
34
  🔒 **Identity‑Protection Policy**
35
+ - You MUST NOT disclose, hint at, or confirm:
36
+ – the provider, internal codenames, model family, weights, size, or host platform.
37
+ - If the user asks who built you, what model you are, or any internal details, respond only with the **Fallback Identity Statement** below.
38
+ - If the user continues to probe after the fallback, politely **refuse** with a brief apology ("I'm sorry, but I can't share that").
39
+ - Under no circumstances mention "Amazon", "Bedrock", "Nova", or any related terms.
40
 
41
  Never mention Amazon or Nova.
42
  """
 
43
 
44
+ # HuggingFace Configuration
45
+ HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
46
+ REPO_ID = "your-username/chat-logs" # Change this to your actual HF dataset repo
47
+ HF_API = HfApi()
48
+
49
+ # Local directories (for temporary storage)
50
+ LOG_DIR = pathlib.Path("logs")
51
+ IMG_DIR = LOG_DIR / "imgs"
52
  LOG_DIR.mkdir(exist_ok=True)
53
  IMG_DIR.mkdir(exist_ok=True)
 
 
54
 
55
  # ====== Bedrock client ====== #
56
  bedrock = boto3.client(
 
101
  messages.append({"role": "assistant", "content": [{"text": reply}]})
102
  return reply, messages
103
 
104
+ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[str]:
105
+ """Upload image to HF repo and return the repo URL."""
106
+ if not HF_TOKEN:
107
+ print("Warning: HF_TOKEN not set, skipping image upload to HF repo")
108
+ return None
109
+
110
+ try:
111
+ # Create unique filename
112
+ ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
113
+ filename = f"images/{session_id}_{ts}.png"
114
+
115
+ # Save locally first
116
+ local_path = IMG_DIR / f"{session_id}_{ts}.png"
117
+ pil_img.save(local_path, format="PNG")
118
+
119
+ # Upload to HF repo
120
+ upload_file(
121
+ path_or_fileobj=str(local_path),
122
+ path_in_repo=filename,
123
+ repo_id=REPO_ID,
124
+ token=HF_TOKEN,
125
+ repo_type="dataset",
126
+ commit_message=f"Add image for session {session_id[:8]}"
127
+ )
128
+
129
+ # Return the HF repo URL
130
+ hf_image_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{filename}"
131
+ print(f"Image uploaded to HF repo: {hf_image_url}")
132
+ return hf_image_url
133
+
134
+ except Exception as e:
135
+ print(f"Failed to upload image to HF repo: {e}")
136
+ return None
137
+
138
+ def upload_conversation_to_hf_dataset(session_id: str, user_text: str, assistant_text: str, hf_image_url: Optional[str] = None):
139
+ """Upload conversation log to HF Dataset with image URL."""
140
+ if not HF_TOKEN:
141
+ print("Warning: HF_TOKEN not set, skipping upload to HF dataset")
142
+ return
143
+
144
  try:
145
  record = {
146
  "session_id": session_id,
147
  "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
148
  "user_message": user_text,
149
  "assistant_message": assistant_text,
150
+ "image_url": hf_image_url if hf_image_url else "",
151
+ "has_image": hf_image_url is not None
152
  }
153
 
154
+ # Try to load existing dataset and append
155
+ try:
156
+ existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
157
+ # Convert to list, add new record, convert back
158
+ records = existing_dataset.to_list()
159
+ records.append(record)
160
+ updated_dataset = Dataset.from_list(records)
161
+ except Exception as load_error:
162
+ print(f"Could not load existing dataset (creating new): {load_error}")
163
+ # Dataset doesn't exist yet, create new one
164
+ updated_dataset = Dataset.from_list([record])
165
 
166
+ # Push updated dataset
167
+ updated_dataset.push_to_hub(
168
+ REPO_ID,
169
  token=HF_TOKEN,
170
+ private=True,
171
+ commit_message=f"Add conversation from session {session_id[:8]}"
172
  )
173
+
174
+ print(f"Conversation logged to HF dataset: {REPO_ID}")
175
+
176
  except Exception as e:
177
+ print(f"Failed to upload conversation log to HF dataset: {e}")
178
 
179
+ def cache_image(session_id: str, pil_img: Image.Image) -> Tuple[str, Optional[str]]:
180
+ """Save image locally AND upload to HF repo. Returns (local_path, hf_url)."""
181
+ # Save locally for immediate use
182
+ ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
183
+ local_path = IMG_DIR / f"{session_id}_{ts}.png"
184
+ pil_img.save(local_path, format="PNG")
185
+
186
+ # Upload to HF repo
187
+ hf_url = upload_image_to_hf_repo(session_id, pil_img)
188
+
189
+ return str(local_path), hf_url
190
+
191
+ def append_log(session_id: str, user_text: str, assistant_text: str, local_img_path: Optional[str] = None, hf_img_url: Optional[str] = None):
192
+ """Log conversation locally AND to HF dataset."""
193
+ # Local logging (existing functionality for immediate access)
194
  record = {
195
  "ts": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
196
  "user": user_text,
197
  "assistant": assistant_text,
198
  }
199
+ if local_img_path:
200
+ record["image_file"] = local_img_path
201
+ if hf_img_url:
202
+ record["hf_image_url"] = hf_img_url
203
+
204
  path = LOG_DIR / f"{session_id}.jsonl"
205
  with path.open("a", encoding="utf-8") as f:
206
  f.write(json.dumps(record, ensure_ascii=False) + "\n")
207
 
208
+ # Upload to HF Dataset (persistent storage)
209
+ upload_conversation_to_hf_dataset(session_id, user_text, assistant_text, hf_img_url)
210
 
211
  # ====== Gradio UI ====== #
212
+ with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
213
  gr.Markdown(
214
+ f"""
215
+ ## Multimodal Chat with Persistent Logging 📝
216
  Upload an image *(optional)*, ask a question, and continue the conversation.
217
+
218
+ **Logging Status:**
219
+ - 💾 Local logs: Always saved to temporary storage
220
+ - 🤗 HF Dataset: {"✅ Enabled" if HF_TOKEN else "❌ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
221
+ - 🖼️ Images: {"✅ Uploaded to HF repo" if HF_TOKEN else "❌ Local only"}
222
  """
223
  )
224
 
 
241
  top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
242
  top_k = gr.Slider(1, 100, value=50, step=1, label="top_k")
243
 
244
+ # Status area for logging feedback
245
+ with gr.Row():
246
+ log_status = gr.Textbox(
247
+ label="Logging Status",
248
+ value="Ready to log conversations...",
249
+ interactive=False,
250
+ max_lines=2
251
+ )
252
+
253
  # ---- main handler ---- #
254
  def chat(chat_log, br_history, sess_id,
255
  image, text,
 
261
  if not sess_id:
262
  sess_id = str(uuid.uuid4())
263
 
264
+ # Call Bedrock
265
+ try:
266
+ reply, new_br = call_bedrock(
267
+ br_history, image, text.strip(),
268
+ int(max_tokens), float(temperature),
269
+ float(top_p), int(top_k)
270
+ )
271
+ except Exception as e:
272
+ raise gr.Error(f"Bedrock API error: {str(e)}")
273
+
274
+ # Handle image caching (both local and HF)
275
+ local_img_path, hf_img_url = None, None
276
+ if image:
277
+ local_img_path, hf_img_url = cache_image(sess_id, image)
278
+
279
+ display_user = text.strip() if text.strip() else "[image uploaded]"
280
  chat_log.append((display_user, reply))
281
+
282
+ # Log with both local and HF image paths
283
+ append_log(sess_id, display_user, reply, local_img_path, hf_img_url)
284
+
285
+ # Update status message
286
+ status_msg = f"✅ Logged conversation for session {sess_id[:8]}"
287
+ if image:
288
+ if hf_img_url:
289
+ status_msg += f" | 🖼️ Image uploaded to HF repo"
290
+ else:
291
+ status_msg += f" | ⚠️ Image saved locally only"
292
+
293
+ if not HF_TOKEN:
294
+ status_msg += " | ❌ HF logging disabled (no token)"
295
 
296
+ return chat_log, chat_log, new_br, sess_id, None, "", status_msg
297
 
298
  send_btn.click(
299
  chat,
300
  inputs=[chat_state, br_state, sess_state,
301
  img_in, txt_in,
302
  max_tk, temp, top_p, top_k],
303
+ outputs=[chatbot, chat_state, br_state, sess_state, img_in, txt_in, log_status],
304
  )
305
 
306
  # ---- clear chat ---- #
307
  def reset():
308
+ return [], [], "", None, "", "Ready to log conversations..."
309
 
310
  clear_btn.click(
311
  reset,
312
  inputs=None,
313
+ outputs=[chatbot, chat_state, sess_state, img_in, txt_in, log_status],
314
  queue=False,
315
  )
316
 
317
+ # Add info about viewing logs
318
+ gr.Markdown(
319
+ f"""
320
+ ### 📊 Viewing Your Logs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
+ If HF logging is enabled, you can view your conversation logs at:
323
+ - **Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
324
+ - **Images**: Browse the `images/` folder in the dataset repository
325
 
326
+ **Local logs** (temporary): Saved in JSONL format, accessible until Space restarts.
327
+ """
328
+ )
329
 
330
+ # ====== Launch ====== #
331
+ if __name__ == "__main__":
332
+ # Validate configuration on startup
333
+ if not HF_TOKEN:
334
+ print("⚠️ WARNING: HF_TOKEN not set. Logging will be local only and will be lost on Space restart.")
335
+ print(" To enable persistent logging:")
336
+ print(" 1. Go to your Space settings → Repository secrets")
337
+ print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
338
+ print(" 3. Create a dataset repository and update REPO_ID in the code")
339
+ else:
340
+ print(f"✅ HF logging enabled. Logs will be saved to: {REPO_ID}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
+ demo.queue(max_size=100)
343
+ demo.launch(share=True)