MeghanaArakkal commited on
Commit
293183a
·
verified ·
1 Parent(s): c54b322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -91
app.py CHANGED
@@ -2,7 +2,7 @@
2
  """
3
  Multimodal chat front‑end for Amazon Bedrock Nova Premier v1
4
  (text and/or image ➜ assistant text) with per‑session JSONL logging.
5
- Logs conversations to HF Dataset and images to HF repo via Git LFS.
6
 
7
  Prereqs:
8
  pip install gradio==5.38 boto3 pillow datasets huggingface_hub
@@ -43,7 +43,7 @@ Never mention Amazon or Nova.
43
 
44
  # HuggingFace Configuration
45
  HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
46
- REPO_ID = "collinear-ai/nova-premier-redteaming-external" # Change this to your actual HF dataset repo
47
  HF_API = HfApi()
48
 
49
  # Local directories (for temporary storage)
@@ -101,19 +101,19 @@ def call_bedrock(
101
  messages.append({"role": "assistant", "content": [{"text": reply}]})
102
  return reply, messages
103
 
104
- def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[str]:
105
  """Upload image to HF repo and return the repo URL."""
106
  if not HF_TOKEN:
107
  print("Warning: HF_TOKEN not set, skipping image upload to HF repo")
108
  return None
109
 
110
  try:
111
- # Create unique filename
112
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
113
- filename = f"images/{session_id}_{ts}.png"
114
 
115
  # Save locally first
116
- local_path = IMG_DIR / f"{session_id}_{ts}.png"
117
  pil_img.save(local_path, format="PNG")
118
 
119
  # Upload to HF repo
@@ -123,7 +123,7 @@ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[s
123
  repo_id=REPO_ID,
124
  token=HF_TOKEN,
125
  repo_type="dataset",
126
- commit_message=f"Add image for session {session_id[:8]}"
127
  )
128
 
129
  # Return the HF repo URL
@@ -135,123 +135,151 @@ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[s
135
  print(f"Failed to upload image to HF repo: {e}")
136
  return None
137
 
138
- def upload_conversation_to_hf_dataset(session_id: str, user_text: str, assistant_text: str, hf_image_url: Optional[str] = None):
139
- """Upload conversation log to HF Dataset with image URL."""
140
  if not HF_TOKEN:
141
  print("Warning: HF_TOKEN not set, skipping upload to HF dataset")
142
  return
143
 
144
  try:
145
- record = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  "session_id": session_id,
147
  "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
148
- "user_message": user_text,
149
- "assistant_message": assistant_text,
150
- "image_url": hf_image_url if hf_image_url else "",
151
- "has_image": hf_image_url is not None
 
152
  }
153
 
154
- # Try to load existing dataset and append
155
  try:
156
  existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
157
- # Convert to list, add new record, convert back
158
  records = existing_dataset.to_list()
159
- records.append(record)
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  updated_dataset = Dataset.from_list(records)
 
161
  except Exception as load_error:
162
  print(f"Could not load existing dataset (creating new): {load_error}")
163
- # Dataset doesn't exist yet, create new one
164
- updated_dataset = Dataset.from_list([record])
165
 
166
  # Push updated dataset
167
  updated_dataset.push_to_hub(
168
  REPO_ID,
169
  token=HF_TOKEN,
170
  private=True,
171
- commit_message=f"Add conversation from session {session_id[:8]}"
172
  )
173
 
174
- print(f"Conversation logged to HF dataset: {REPO_ID}")
175
 
176
  except Exception as e:
177
- print(f"Failed to upload conversation log to HF dataset: {e}")
178
-
179
- def cache_image(session_id: str, pil_img: Image.Image) -> Tuple[str, Optional[str]]:
180
- """Save image locally AND upload to HF repo. Returns (local_path, hf_url)."""
181
- # Save locally for immediate use
182
- ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
183
- local_path = IMG_DIR / f"{session_id}_{ts}.png"
184
- pil_img.save(local_path, format="PNG")
185
-
186
- # Upload to HF repo
187
- hf_url = upload_image_to_hf_repo(session_id, pil_img)
188
-
189
- return str(local_path), hf_url
190
-
191
- def append_log(session_id: str, user_text: str, assistant_text: str, local_img_path: Optional[str] = None, hf_img_url: Optional[str] = None):
192
- """Log conversation locally AND to HF dataset."""
193
- # Local logging (existing functionality for immediate access)
194
- record = {
195
- "ts": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
196
- "user": user_text,
197
- "assistant": assistant_text,
198
  }
199
- if local_img_path:
200
- record["image_file"] = local_img_path
201
- if hf_img_url:
202
- record["hf_image_url"] = hf_img_url
203
-
204
- path = LOG_DIR / f"{session_id}.jsonl"
205
- with path.open("a", encoding="utf-8") as f:
206
- f.write(json.dumps(record, ensure_ascii=False) + "\n")
207
 
208
- # Upload to HF Dataset (persistent storage)
209
- upload_conversation_to_hf_dataset(session_id, user_text, assistant_text, hf_img_url)
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  # ====== Gradio UI ====== #
212
- with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
213
  gr.Markdown(
214
  f"""
215
- ## Multimodal Chat with Persistent Logging 📝
216
  Upload an image *(optional)*, ask a question, and continue the conversation.
217
 
218
- **Logging Status:**
219
- - 💾 Local logs: Always saved to temporary storage
 
 
 
 
 
220
  - 🤗 HF Dataset: {"✅ Enabled" if HF_TOKEN else "❌ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
221
  - 🖼️ Images: {"✅ Uploaded to HF repo" if HF_TOKEN else "❌ Local only"}
222
  """
223
  )
224
 
225
- chatbot = gr.Chatbot(height=420)
226
- chat_state = gr.State([]) # [(user, assistant), …]
227
- br_state = gr.State([]) # Bedrock message dicts
228
  sess_state = gr.State("") # UUID for this browser tab
 
229
 
230
  with gr.Row():
231
  img_in = gr.Image(label="Image (optional)", type="pil")
232
  txt_in = gr.Textbox(lines=3, label="Your message",
233
  placeholder="Ask something about the image… or just chat!")
234
 
235
- send_btn = gr.Button("Send", variant="primary")
236
  clear_btn = gr.Button("Clear chat")
237
 
238
  with gr.Accordion("Advanced generation settings", open=False):
239
  max_tk = gr.Slider(16, 1024, value=512, step=16, label="max_tokens")
240
- temp = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="temperature")
241
- top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
242
- top_k = gr.Slider(1, 100, value=50, step=1, label="top_k")
243
 
244
  # Status area for logging feedback
245
  with gr.Row():
246
  log_status = gr.Textbox(
247
- label="Logging Status",
248
- value="Ready to log conversations...",
249
  interactive=False,
250
- max_lines=2
251
  )
252
 
253
  # ---- main handler ---- #
254
- def chat(chat_log, br_history, sess_id,
255
  image, text,
256
  max_tokens, temperature, top_p, top_k):
257
 
@@ -261,9 +289,9 @@ with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
261
  if not sess_id:
262
  sess_id = str(uuid.uuid4())
263
 
264
- # Call Bedrock
265
  try:
266
- reply, new_br = call_bedrock(
267
  br_history, image, text.strip(),
268
  int(max_tokens), float(temperature),
269
  float(top_p), int(top_k)
@@ -271,59 +299,78 @@ with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
271
  except Exception as e:
272
  raise gr.Error(f"Bedrock API error: {str(e)}")
273
 
274
- # Handle image caching (both local and HF)
275
- local_img_path, hf_img_url = None, None
276
  if image:
277
- local_img_path, hf_img_url = cache_image(sess_id, image)
278
-
 
 
 
 
279
  display_user = text.strip() if text.strip() else "[image uploaded]"
280
  chat_log.append((display_user, reply))
281
 
282
- # Log with both local and HF image paths
283
- append_log(sess_id, display_user, reply, local_img_path, hf_img_url)
 
284
 
285
  # Update status message
286
- status_msg = f"✅ Logged conversation for session {sess_id[:8]}"
 
 
287
  if image:
288
  if hf_img_url:
289
- status_msg += f" | 🖼️ Image uploaded to HF repo"
290
  else:
291
- status_msg += f" | ⚠️ Image saved locally only"
292
 
293
  if not HF_TOKEN:
294
- status_msg += " | ❌ HF logging disabled (no token)"
295
 
296
- return chat_log, chat_log, new_br, sess_id, None, "", status_msg
297
 
298
  send_btn.click(
299
  chat,
300
- inputs=[chat_state, br_state, sess_state,
301
  img_in, txt_in,
302
  max_tk, temp, top_p, top_k],
303
- outputs=[chatbot, chat_state, br_state, sess_state, img_in, txt_in, log_status],
 
304
  )
305
 
306
  # ---- clear chat ---- #
307
  def reset():
308
- return [], [], "", None, "", "Ready to log conversations..."
309
 
310
  clear_btn.click(
311
  reset,
312
  inputs=None,
313
- outputs=[chatbot, chat_state, sess_state, img_in, txt_in, log_status],
 
314
  queue=False,
315
  )
316
 
317
- # Add info about viewing logs
318
  gr.Markdown(
319
  f"""
320
- ### 📊 Viewing Your Logs
 
 
 
 
 
 
 
 
 
321
 
322
- If HF logging is enabled, you can view your conversation logs at:
323
- - **Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
324
- - **Images**: Browse the `images/` folder in the dataset repository
 
325
 
326
- **Local logs** (temporary): Saved in JSONL format, accessible until Space restarts.
327
  """
328
  )
329
 
@@ -337,7 +384,8 @@ if __name__ == "__main__":
337
  print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
338
  print(" 3. Create a dataset repository and update REPO_ID in the code")
339
  else:
340
- print(f"✅ HF logging enabled. Logs will be saved to: {REPO_ID}")
 
341
 
342
  demo.queue(max_size=100)
343
  demo.launch(share=True)
 
2
  """
3
  Multimodal chat front‑end for Amazon Bedrock Nova Premier v1
4
  (text and/or image ➜ assistant text) with per‑session JSONL logging.
5
+ Logs FULL conversations to HF Dataset and images to HF repo via Git LFS.
6
 
7
  Prereqs:
8
  pip install gradio==5.38 boto3 pillow datasets huggingface_hub
 
43
 
44
  # HuggingFace Configuration
45
  HF_TOKEN = os.getenv("HF_TOKEN") # Set this in your Space's secrets
46
+ REPO_ID = "your-username/chat-logs" # Change this to your actual HF dataset repo
47
  HF_API = HfApi()
48
 
49
  # Local directories (for temporary storage)
 
101
  messages.append({"role": "assistant", "content": [{"text": reply}]})
102
  return reply, messages
103
 
104
+ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
105
  """Upload image to HF repo and return the repo URL."""
106
  if not HF_TOKEN:
107
  print("Warning: HF_TOKEN not set, skipping image upload to HF repo")
108
  return None
109
 
110
  try:
111
+ # Create unique filename with message index
112
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
113
+ filename = f"images/{session_id}_{message_index:03d}_{ts}.png"
114
 
115
  # Save locally first
116
+ local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
117
  pil_img.save(local_path, format="PNG")
118
 
119
  # Upload to HF repo
 
123
  repo_id=REPO_ID,
124
  token=HF_TOKEN,
125
  repo_type="dataset",
126
+ commit_message=f"Add image for session {session_id[:8]} message {message_index}"
127
  )
128
 
129
  # Return the HF repo URL
 
135
  print(f"Failed to upload image to HF repo: {e}")
136
  return None
137
 
138
+ def save_full_conversation_to_hf_dataset(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
139
+ """Save the ENTIRE conversation to HF Dataset."""
140
  if not HF_TOKEN:
141
  print("Warning: HF_TOKEN not set, skipping upload to HF dataset")
142
  return
143
 
144
  try:
145
+ # Convert chat history to structured format
146
+ messages = []
147
+ for i, (user_msg, assistant_msg) in enumerate(chat_history):
148
+ messages.append({
149
+ "message_index": i,
150
+ "role": "user",
151
+ "content": user_msg,
152
+ "image_url": image_urls.get(i, ""),
153
+ "has_image": i in image_urls
154
+ })
155
+ messages.append({
156
+ "message_index": i,
157
+ "role": "assistant",
158
+ "content": assistant_msg,
159
+ "image_url": "",
160
+ "has_image": False
161
+ })
162
+
163
+ # Create conversation record
164
+ conversation_record = {
165
  "session_id": session_id,
166
  "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
167
+ "message_count": len(chat_history),
168
+ "total_messages": len(messages),
169
+ "conversation_messages": messages,
170
+ "bedrock_history": br_history, # Full Bedrock conversation context
171
+ "images_count": len(image_urls)
172
  }
173
 
174
+ # Load existing dataset
175
  try:
176
  existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
 
177
  records = existing_dataset.to_list()
178
+
179
+ # Check if session already exists and update it
180
+ session_exists = False
181
+ for idx, record in enumerate(records):
182
+ if record.get("session_id") == session_id:
183
+ records[idx] = conversation_record
184
+ session_exists = True
185
+ print(f"Updated existing session {session_id[:8]} in dataset")
186
+ break
187
+
188
+ if not session_exists:
189
+ records.append(conversation_record)
190
+ print(f"Added new session {session_id[:8]} to dataset")
191
+
192
  updated_dataset = Dataset.from_list(records)
193
+
194
  except Exception as load_error:
195
  print(f"Could not load existing dataset (creating new): {load_error}")
196
+ updated_dataset = Dataset.from_list([conversation_record])
 
197
 
198
  # Push updated dataset
199
  updated_dataset.push_to_hub(
200
  REPO_ID,
201
  token=HF_TOKEN,
202
  private=True,
203
+ commit_message=f"Update full conversation for session {session_id[:8]} ({len(chat_history)} exchanges)"
204
  )
205
 
206
+ print(f"Full conversation logged to HF dataset: {REPO_ID}")
207
 
208
  except Exception as e:
209
+ print(f"Failed to upload full conversation to HF dataset: {e}")
210
+
211
+ def save_local_conversation_log(session_id: str, chat_history: List[Tuple], image_urls: Dict[int, str]):
212
+ """Save full conversation to local JSONL file."""
213
+ conversation_record = {
214
+ "session_id": session_id,
215
+ "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
216
+ "conversation": []
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  }
 
 
 
 
 
 
 
 
218
 
219
+ for i, (user_msg, assistant_msg) in enumerate(chat_history):
220
+ conversation_record["conversation"].append({
221
+ "turn": i + 1,
222
+ "user": user_msg,
223
+ "assistant": assistant_msg,
224
+ "image_url": image_urls.get(i, ""),
225
+ "has_image": i in image_urls
226
+ })
227
+
228
+ # Save to session-specific file (overwrite with full conversation each time)
229
+ path = LOG_DIR / f"{session_id}_full.jsonl"
230
+ with path.open("w", encoding="utf-8") as f:
231
+ f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
232
 
233
  # ====== Gradio UI ====== #
234
+ with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
235
  gr.Markdown(
236
  f"""
237
+ ## Multimodal Chat with Full Conversation Logging 📝
238
  Upload an image *(optional)*, ask a question, and continue the conversation.
239
 
240
+ **Logging Features:**
241
+ - 💾 **Full conversation history** saved after each message
242
+ - 🔄 **Context preservation** - entire chat context maintained
243
+ - 📸 **Image tracking** - all images linked to specific messages
244
+ - 🏷️ **Session management** - each browser tab gets unique session ID
245
+
246
+ **Storage:**
247
  - 🤗 HF Dataset: {"✅ Enabled" if HF_TOKEN else "❌ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
248
  - 🖼️ Images: {"✅ Uploaded to HF repo" if HF_TOKEN else "❌ Local only"}
249
  """
250
  )
251
 
252
+ chatbot = gr.Chatbot(height=420)
253
+ chat_state = gr.State([]) # [(user, assistant), …] - Full chat history
254
+ br_state = gr.State([]) # Bedrock message dicts - Full conversation context
255
  sess_state = gr.State("") # UUID for this browser tab
256
+ img_urls_state = gr.State({}) # Dict mapping message index to image URLs
257
 
258
  with gr.Row():
259
  img_in = gr.Image(label="Image (optional)", type="pil")
260
  txt_in = gr.Textbox(lines=3, label="Your message",
261
  placeholder="Ask something about the image… or just chat!")
262
 
263
+ send_btn = gr.Button("Send", variant="primary")
264
  clear_btn = gr.Button("Clear chat")
265
 
266
  with gr.Accordion("Advanced generation settings", open=False):
267
  max_tk = gr.Slider(16, 1024, value=512, step=16, label="max_tokens")
268
+ temp = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="temperature")
269
+ top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
270
+ top_k = gr.Slider(1, 100, value=50, step=1, label="top_k")
271
 
272
  # Status area for logging feedback
273
  with gr.Row():
274
  log_status = gr.Textbox(
275
+ label="Conversation Logging Status",
276
+ value="Ready to start logging full conversations...",
277
  interactive=False,
278
+ max_lines=3
279
  )
280
 
281
  # ---- main handler ---- #
282
+ def chat(chat_log, br_history, sess_id, img_urls_dict,
283
  image, text,
284
  max_tokens, temperature, top_p, top_k):
285
 
 
289
  if not sess_id:
290
  sess_id = str(uuid.uuid4())
291
 
292
+ # Call Bedrock with full conversation context
293
  try:
294
+ reply, new_br_history = call_bedrock(
295
  br_history, image, text.strip(),
296
  int(max_tokens), float(temperature),
297
  float(top_p), int(top_k)
 
299
  except Exception as e:
300
  raise gr.Error(f"Bedrock API error: {str(e)}")
301
 
302
+ # Handle image upload if present
303
+ hf_img_url = None
304
  if image:
305
+ message_index = len(chat_log) # Current message index
306
+ hf_img_url = upload_image_to_hf_repo(sess_id, image, message_index)
307
+ if hf_img_url:
308
+ img_urls_dict[message_index] = hf_img_url
309
+
310
+ # Update chat history
311
  display_user = text.strip() if text.strip() else "[image uploaded]"
312
  chat_log.append((display_user, reply))
313
 
314
+ # Save FULL conversation (local and HF)
315
+ save_local_conversation_log(sess_id, chat_log, img_urls_dict)
316
+ save_full_conversation_to_hf_dataset(sess_id, chat_log, new_br_history, img_urls_dict)
317
 
318
  # Update status message
319
+ status_msg = f"✅ Full conversation logged for session {sess_id[:8]}\n"
320
+ status_msg += f"📊 Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}"
321
+
322
  if image:
323
  if hf_img_url:
324
+ status_msg += f"\n🖼️ Image #{len(img_urls_dict)} uploaded to HF repo"
325
  else:
326
+ status_msg += f"\n⚠️ Image saved locally only"
327
 
328
  if not HF_TOKEN:
329
+ status_msg += "\n❌ HF logging disabled (no token) - local only"
330
 
331
+ return chat_log, chat_log, new_br_history, sess_id, img_urls_dict, None, "", status_msg
332
 
333
  send_btn.click(
334
  chat,
335
+ inputs=[chat_state, br_state, sess_state, img_urls_state,
336
  img_in, txt_in,
337
  max_tk, temp, top_p, top_k],
338
+ outputs=[chatbot, chat_state, br_state, sess_state, img_urls_state,
339
+ img_in, txt_in, log_status],
340
  )
341
 
342
  # ---- clear chat ---- #
343
  def reset():
344
+ return [], [], "", {}, None, "", "Ready to start logging full conversations..."
345
 
346
  clear_btn.click(
347
  reset,
348
  inputs=None,
349
+ outputs=[chatbot, chat_state, sess_state, img_urls_state,
350
+ img_in, txt_in, log_status],
351
  queue=False,
352
  )
353
 
354
+ # Add info about the logging structure
355
  gr.Markdown(
356
  f"""
357
+ ### 📊 Conversation Log Structure
358
+
359
+ Each conversation is saved with:
360
+ - **Session ID**: Unique identifier for this browser tab/conversation
361
+ - **Full message history**: All user and assistant messages in order
362
+ - **Bedrock context**: Complete conversation context sent to the AI
363
+ - **Image references**: URLs to all images uploaded during the conversation
364
+ - **Timestamps**: When the conversation was last updated
365
+
366
+ ### 🔍 Viewing Your Logs
367
 
368
+ {"**HF Dataset**: [https://huggingface.co/datasets/" + REPO_ID + "](https://huggingface.co/datasets/" + REPO_ID + ")" if HF_TOKEN else "**HF Dataset**: Not configured - set HF_TOKEN to enable"}
369
+ - Each record contains a complete conversation
370
+ - Images stored in `images/` folder with session and message indexing
371
+ - Download as JSON, CSV, or Parquet for analysis
372
 
373
+ **Local logs**: Saved as `{session_id}_full.jsonl` (temporary until Space restarts)
374
  """
375
  )
376
 
 
384
  print(" 2. Add HF_TOKEN with your HuggingFace token (write permissions)")
385
  print(" 3. Create a dataset repository and update REPO_ID in the code")
386
  else:
387
+ print(f"✅ HF logging enabled. Full conversations will be saved to: {REPO_ID}")
388
+ print(f"📸 Images will be stored in: {REPO_ID}/images/")
389
 
390
  demo.queue(max_size=100)
391
  demo.launch(share=True)