Spaces:

collinear-ai
/

Multimodal_Jailbreak_external_v1

Running

App Files Files Community

MeghanaArakkal commited on 20 days ago

Commit

293183a

verified ·

1 Parent(s): c54b322

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -91

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 """
 Multimodal chat front‑end for Amazon Bedrock Nova Premier v1
 (text and/or image ➜ assistant text) with per‑session JSONL logging.
-Logs conversations to HF Dataset and images to HF repo via Git LFS.
 Prereqs:
     pip install gradio==5.38 boto3 pillow datasets huggingface_hub
@@ -43,7 +43,7 @@ Never mention Amazon or Nova.
 # HuggingFace Configuration
 HF_TOKEN = os.getenv("HF_TOKEN")  # Set this in your Space's secrets
-REPO_ID = "collinear-ai/nova-premier-redteaming-external"  # Change this to your actual HF dataset repo
 HF_API = HfApi()
 # Local directories (for temporary storage)
@@ -101,19 +101,19 @@ def call_bedrock(
     messages.append({"role": "assistant", "content": [{"text": reply}]})
     return reply, messages
-def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[str]:
     """Upload image to HF repo and return the repo URL."""
     if not HF_TOKEN:
         print("Warning: HF_TOKEN not set, skipping image upload to HF repo")
         return None
     try:
-        # Create unique filename
         ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
-        filename = f"images/{session_id}_{ts}.png"
         # Save locally first
-        local_path = IMG_DIR / f"{session_id}_{ts}.png"
         pil_img.save(local_path, format="PNG")
         # Upload to HF repo
@@ -123,7 +123,7 @@ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[s
             repo_id=REPO_ID,
             token=HF_TOKEN,
             repo_type="dataset",
-            commit_message=f"Add image for session {session_id[:8]}"
         )
         # Return the HF repo URL
@@ -135,123 +135,151 @@ def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image) -> Optional[s
         print(f"Failed to upload image to HF repo: {e}")
         return None
-def upload_conversation_to_hf_dataset(session_id: str, user_text: str, assistant_text: str, hf_image_url: Optional[str] = None):
-    """Upload conversation log to HF Dataset with image URL."""
     if not HF_TOKEN:
         print("Warning: HF_TOKEN not set, skipping upload to HF dataset")
         return
     try:
-        record = {
             "session_id": session_id,
             "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
-            "user_message": user_text,
-            "assistant_message": assistant_text,
-            "image_url": hf_image_url if hf_image_url else "",
-            "has_image": hf_image_url is not None
         }
-        # Try to load existing dataset and append
         try:
             existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
-            # Convert to list, add new record, convert back
             records = existing_dataset.to_list()
-            records.append(record)
             updated_dataset = Dataset.from_list(records)
         except Exception as load_error:
             print(f"Could not load existing dataset (creating new): {load_error}")
-            # Dataset doesn't exist yet, create new one
-            updated_dataset = Dataset.from_list([record])
         # Push updated dataset
         updated_dataset.push_to_hub(
             REPO_ID,
             token=HF_TOKEN,
             private=True,
-            commit_message=f"Add conversation from session {session_id[:8]}"
         )
-        print(f"Conversation logged to HF dataset: {REPO_ID}")
     except Exception as e:
-        print(f"Failed to upload conversation log to HF dataset: {e}")
-def cache_image(session_id: str, pil_img: Image.Image) -> Tuple[str, Optional[str]]:
-    """Save image locally AND upload to HF repo. Returns (local_path, hf_url)."""
-    # Save locally for immediate use
-    ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
-    local_path = IMG_DIR / f"{session_id}_{ts}.png"
-    pil_img.save(local_path, format="PNG")
-    # Upload to HF repo
-    hf_url = upload_image_to_hf_repo(session_id, pil_img)
-    return str(local_path), hf_url
-def append_log(session_id: str, user_text: str, assistant_text: str, local_img_path: Optional[str] = None, hf_img_url: Optional[str] = None):
-    """Log conversation locally AND to HF dataset."""
-    # Local logging (existing functionality for immediate access)
-    record = {
-        "ts": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
-        "user": user_text,
-        "assistant": assistant_text,
     }
-    if local_img_path:
-        record["image_file"] = local_img_path
-    if hf_img_url:
-        record["hf_image_url"] = hf_img_url
-    path = LOG_DIR / f"{session_id}.jsonl"
-    with path.open("a", encoding="utf-8") as f:
-        f.write(json.dumps(record, ensure_ascii=False) + "\n")
-    # Upload to HF Dataset (persistent storage)
-    upload_conversation_to_hf_dataset(session_id, user_text, assistant_text, hf_img_url)
 # ====== Gradio UI ====== #
-with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
     gr.Markdown(
         f"""
-        ## Multimodal Chat with Persistent Logging 📝
         Upload an image *(optional)*, ask a question, and continue the conversation.
-        **Logging Status:**
-        - 💾 Local logs: Always saved to temporary storage
         - 🤗 HF Dataset: {"✅ Enabled" if HF_TOKEN else "❌ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
         - 🖼️ Images: {"✅ Uploaded to HF repo" if HF_TOKEN else "❌ Local only"}
         """
     )
-    chatbot    = gr.Chatbot(height=420)
-    chat_state = gr.State([])   # [(user, assistant), …]
-    br_state   = gr.State([])   # Bedrock message dicts
     sess_state = gr.State("")   # UUID for this browser tab
     with gr.Row():
         img_in = gr.Image(label="Image (optional)", type="pil")
         txt_in = gr.Textbox(lines=3, label="Your message",
                             placeholder="Ask something about the image… or just chat!")
-    send_btn  = gr.Button("Send",  variant="primary")
     clear_btn = gr.Button("Clear chat")
     with gr.Accordion("Advanced generation settings", open=False):
         max_tk = gr.Slider(16, 1024, value=512, step=16, label="max_tokens")
-        temp   = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="temperature")
-        top_p  = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
-        top_k  = gr.Slider(1,   100, value=50, step=1,   label="top_k")
     # Status area for logging feedback
     with gr.Row():
         log_status = gr.Textbox(
-            label="Logging Status",
-            value="Ready to log conversations...",
             interactive=False,
-            max_lines=2
         )
     # ---- main handler ---- #
-    def chat(chat_log, br_history, sess_id,
              image, text,
              max_tokens, temperature, top_p, top_k):
@@ -261,9 +289,9 @@ with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
         if not sess_id:
             sess_id = str(uuid.uuid4())
-        # Call Bedrock
         try:
-            reply, new_br = call_bedrock(
                 br_history, image, text.strip(),
                 int(max_tokens), float(temperature),
                 float(top_p), int(top_k)
@@ -271,59 +299,78 @@ with gr.Blocks(title="Multimodal Chat with HF Logging") as demo:
         except Exception as e:
             raise gr.Error(f"Bedrock API error: {str(e)}")
-        # Handle image caching (both local and HF)
-        local_img_path, hf_img_url = None, None
         if image:
-            local_img_path, hf_img_url = cache_image(sess_id, image)
         display_user = text.strip() if text.strip() else "[image uploaded]"
         chat_log.append((display_user, reply))
-        # Log with both local and HF image paths
-        append_log(sess_id, display_user, reply, local_img_path, hf_img_url)
         # Update status message
-        status_msg = f"✅ Logged conversation for session {sess_id[:8]}"
         if image:
             if hf_img_url:
-                status_msg += f" | 🖼️ Image uploaded to HF repo"
             else:
-                status_msg += f" | ⚠️ Image saved locally only"
         if not HF_TOKEN:
-            status_msg += " | ❌ HF logging disabled (no token)"
-        return chat_log, chat_log, new_br, sess_id, None, "", status_msg
     send_btn.click(
         chat,
-        inputs=[chat_state, br_state, sess_state,
                 img_in, txt_in,
                 max_tk, temp, top_p, top_k],
-        outputs=[chatbot, chat_state, br_state, sess_state, img_in, txt_in, log_status],
     )
     # ---- clear chat ---- #
     def reset():
-        return [], [], "", None, "", "Ready to log conversations..."
     clear_btn.click(
         reset,
         inputs=None,
-        outputs=[chatbot, chat_state, sess_state, img_in, txt_in, log_status],
         queue=False,
     )
-    # Add info about viewing logs
     gr.Markdown(
         f"""
-        ### 📊 Viewing Your Logs
-        If HF logging is enabled, you can view your conversation logs at:
-        - **Dataset**: [https://huggingface.co/datasets/{REPO_ID}](https://huggingface.co/datasets/{REPO_ID})
-        - **Images**: Browse the `images/` folder in the dataset repository
-        **Local logs** (temporary): Saved in JSONL format, accessible until Space restarts.
         """
     )
@@ -337,7 +384,8 @@ if __name__ == "__main__":
         print("   2. Add HF_TOKEN with your HuggingFace token (write permissions)")
         print("   3. Create a dataset repository and update REPO_ID in the code")
     else:
-        print(f"✅ HF logging enabled. Logs will be saved to: {REPO_ID}")
     demo.queue(max_size=100)
     demo.launch(share=True)

 """
 Multimodal chat front‑end for Amazon Bedrock Nova Premier v1
 (text and/or image ➜ assistant text) with per‑session JSONL logging.
+Logs FULL conversations to HF Dataset and images to HF repo via Git LFS.
 Prereqs:
     pip install gradio==5.38 boto3 pillow datasets huggingface_hub
 # HuggingFace Configuration
 HF_TOKEN = os.getenv("HF_TOKEN")  # Set this in your Space's secrets
+REPO_ID = "your-username/chat-logs"  # Change this to your actual HF dataset repo
 HF_API = HfApi()
 # Local directories (for temporary storage)
     messages.append({"role": "assistant", "content": [{"text": reply}]})
     return reply, messages
+def upload_image_to_hf_repo(session_id: str, pil_img: Image.Image, message_index: int) -> Optional[str]:
     """Upload image to HF repo and return the repo URL."""
     if not HF_TOKEN:
         print("Warning: HF_TOKEN not set, skipping image upload to HF repo")
         return None
     try:
+        # Create unique filename with message index
         ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S")
+        filename = f"images/{session_id}_{message_index:03d}_{ts}.png"
         # Save locally first
+        local_path = IMG_DIR / f"{session_id}_{message_index:03d}_{ts}.png"
         pil_img.save(local_path, format="PNG")
         # Upload to HF repo
             repo_id=REPO_ID,
             token=HF_TOKEN,
             repo_type="dataset",
+            commit_message=f"Add image for session {session_id[:8]} message {message_index}"
         )
         # Return the HF repo URL
         print(f"Failed to upload image to HF repo: {e}")
         return None
+def save_full_conversation_to_hf_dataset(session_id: str, chat_history: List[Tuple], br_history: List[Dict], image_urls: Dict[int, str]):
+    """Save the ENTIRE conversation to HF Dataset."""
     if not HF_TOKEN:
         print("Warning: HF_TOKEN not set, skipping upload to HF dataset")
         return
     try:
+        # Convert chat history to structured format
+        messages = []
+        for i, (user_msg, assistant_msg) in enumerate(chat_history):
+            messages.append({
+                "message_index": i,
+                "role": "user",
+                "content": user_msg,
+                "image_url": image_urls.get(i, ""),
+                "has_image": i in image_urls
+            })
+            messages.append({
+                "message_index": i,
+                "role": "assistant",
+                "content": assistant_msg,
+                "image_url": "",
+                "has_image": False
+            })
+        # Create conversation record
+        conversation_record = {
             "session_id": session_id,
             "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
+            "message_count": len(chat_history),
+            "total_messages": len(messages),
+            "conversation_messages": messages,
+            "bedrock_history": br_history,  # Full Bedrock conversation context
+            "images_count": len(image_urls)
         }
+        # Load existing dataset
         try:
             existing_dataset = Dataset.load_dataset(REPO_ID, token=HF_TOKEN, split="train")
             records = existing_dataset.to_list()
+            # Check if session already exists and update it
+            session_exists = False
+            for idx, record in enumerate(records):
+                if record.get("session_id") == session_id:
+                    records[idx] = conversation_record
+                    session_exists = True
+                    print(f"Updated existing session {session_id[:8]} in dataset")
+                    break
+            if not session_exists:
+                records.append(conversation_record)
+                print(f"Added new session {session_id[:8]} to dataset")
             updated_dataset = Dataset.from_list(records)
         except Exception as load_error:
             print(f"Could not load existing dataset (creating new): {load_error}")
+            updated_dataset = Dataset.from_list([conversation_record])
         # Push updated dataset
         updated_dataset.push_to_hub(
             REPO_ID,
             token=HF_TOKEN,
             private=True,
+            commit_message=f"Update full conversation for session {session_id[:8]} ({len(chat_history)} exchanges)"
         )
+        print(f"Full conversation logged to HF dataset: {REPO_ID}")
     except Exception as e:
+        print(f"Failed to upload full conversation to HF dataset: {e}")
+def save_local_conversation_log(session_id: str, chat_history: List[Tuple], image_urls: Dict[int, str]):
+    """Save full conversation to local JSONL file."""
+    conversation_record = {
+        "session_id": session_id,
+        "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
+        "conversation": []
     }
+    for i, (user_msg, assistant_msg) in enumerate(chat_history):
+        conversation_record["conversation"].append({
+            "turn": i + 1,
+            "user": user_msg,
+            "assistant": assistant_msg,
+            "image_url": image_urls.get(i, ""),
+            "has_image": i in image_urls
+        })
+    # Save to session-specific file (overwrite with full conversation each time)
+    path = LOG_DIR / f"{session_id}_full.jsonl"
+    with path.open("w", encoding="utf-8") as f:
+        f.write(json.dumps(conversation_record, ensure_ascii=False) + "\n")
 # ====== Gradio UI ====== #
+with gr.Blocks(title="Multimodal Chat with Full Conversation Logging") as demo:
     gr.Markdown(
         f"""
+        ## Multimodal Chat with Full Conversation Logging 📝
         Upload an image *(optional)*, ask a question, and continue the conversation.
+        **Logging Features:**
+        - 💾 **Full conversation history** saved after each message
+        - 🔄 **Context preservation** - entire chat context maintained
+        - 📸 **Image tracking** - all images linked to specific messages
+        - 🏷️ **Session management** - each browser tab gets unique session ID
+        **Storage:**
         - 🤗 HF Dataset: {"✅ Enabled" if HF_TOKEN else "❌ Disabled (set HF_TOKEN)"} - Repo: `{REPO_ID}`
         - 🖼️ Images: {"✅ Uploaded to HF repo" if HF_TOKEN else "❌ Local only"}
         """
     )
+    chatbot = gr.Chatbot(height=420)
+    chat_state = gr.State([])   # [(user, assistant), …] - Full chat history
+    br_state = gr.State([])     # Bedrock message dicts - Full conversation context
     sess_state = gr.State("")   # UUID for this browser tab
+    img_urls_state = gr.State({})  # Dict mapping message index to image URLs
     with gr.Row():
         img_in = gr.Image(label="Image (optional)", type="pil")
         txt_in = gr.Textbox(lines=3, label="Your message",
                             placeholder="Ask something about the image… or just chat!")
+    send_btn = gr.Button("Send", variant="primary")
     clear_btn = gr.Button("Clear chat")
     with gr.Accordion("Advanced generation settings", open=False):
         max_tk = gr.Slider(16, 1024, value=512, step=16, label="max_tokens")
+        temp = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="temperature")
+        top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
+        top_k = gr.Slider(1, 100, value=50, step=1, label="top_k")
     # Status area for logging feedback
     with gr.Row():
         log_status = gr.Textbox(
+            label="Conversation Logging Status",
+            value="Ready to start logging full conversations...",
             interactive=False,
+            max_lines=3
         )
     # ---- main handler ---- #
+    def chat(chat_log, br_history, sess_id, img_urls_dict,
              image, text,
              max_tokens, temperature, top_p, top_k):
         if not sess_id:
             sess_id = str(uuid.uuid4())
+        # Call Bedrock with full conversation context
         try:
+            reply, new_br_history = call_bedrock(
                 br_history, image, text.strip(),
                 int(max_tokens), float(temperature),
                 float(top_p), int(top_k)
         except Exception as e:
             raise gr.Error(f"Bedrock API error: {str(e)}")
+        # Handle image upload if present
+        hf_img_url = None
         if image:
+            message_index = len(chat_log)  # Current message index
+            hf_img_url = upload_image_to_hf_repo(sess_id, image, message_index)
+            if hf_img_url:
+                img_urls_dict[message_index] = hf_img_url
+        # Update chat history
         display_user = text.strip() if text.strip() else "[image uploaded]"
         chat_log.append((display_user, reply))
+        # Save FULL conversation (local and HF)
+        save_local_conversation_log(sess_id, chat_log, img_urls_dict)
+        save_full_conversation_to_hf_dataset(sess_id, chat_log, new_br_history, img_urls_dict)
         # Update status message
+        status_msg = f"✅ Full conversation logged for session {sess_id[:8]}\n"
+        status_msg += f"📊 Total exchanges: {len(chat_log)} | Messages in context: {len(new_br_history)}"
         if image:
             if hf_img_url:
+                status_msg += f"\n🖼️ Image #{len(img_urls_dict)} uploaded to HF repo"
             else:
+                status_msg += f"\n⚠️ Image saved locally only"
         if not HF_TOKEN:
+            status_msg += "\n❌ HF logging disabled (no token) - local only"
+        return chat_log, chat_log, new_br_history, sess_id, img_urls_dict, None, "", status_msg
     send_btn.click(
         chat,
+        inputs=[chat_state, br_state, sess_state, img_urls_state,
                 img_in, txt_in,
                 max_tk, temp, top_p, top_k],
+        outputs=[chatbot, chat_state, br_state, sess_state, img_urls_state,
+                img_in, txt_in, log_status],
     )
     # ---- clear chat ---- #
     def reset():
+        return [], [], "", {}, None, "", "Ready to start logging full conversations..."
     clear_btn.click(
         reset,
         inputs=None,
+        outputs=[chatbot, chat_state, sess_state, img_urls_state,
+                img_in, txt_in, log_status],
         queue=False,
     )
+    # Add info about the logging structure
     gr.Markdown(
         f"""
+        ### 📊 Conversation Log Structure
+        Each conversation is saved with:
+        - **Session ID**: Unique identifier for this browser tab/conversation
+        - **Full message history**: All user and assistant messages in order
+        - **Bedrock context**: Complete conversation context sent to the AI
+        - **Image references**: URLs to all images uploaded during the conversation
+        - **Timestamps**: When the conversation was last updated
+        ### 🔍 Viewing Your Logs
+        {"**HF Dataset**: [https://huggingface.co/datasets/" + REPO_ID + "](https://huggingface.co/datasets/" + REPO_ID + ")" if HF_TOKEN else "**HF Dataset**: Not configured - set HF_TOKEN to enable"}
+        - Each record contains a complete conversation
+        - Images stored in `images/` folder with session and message indexing
+        - Download as JSON, CSV, or Parquet for analysis
+        **Local logs**: Saved as `{session_id}_full.jsonl` (temporary until Space restarts)
         """
     )
         print("   2. Add HF_TOKEN with your HuggingFace token (write permissions)")
         print("   3. Create a dataset repository and update REPO_ID in the code")
     else:
+        print(f"✅ HF logging enabled. Full conversations will be saved to: {REPO_ID}")
+        print(f"📸 Images will be stored in: {REPO_ID}/images/")
     demo.queue(max_size=100)
     demo.launch(share=True)