computer-agent

Running on CPU Upgrade

App Files Files Community

M-Rique commited on May 13

Commit

8b6a4c9

1 Parent(s): 0afc4f1

Add streaming, more robust logs

Browse files

Files changed (4) hide show

app.py +105 -354
e2bqwen.py +3 -3
gradio_script.py +237 -0
scripts_and_styling.py +307 -0

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import json
 import os
 import shutil
 import time
 import uuid
 from io import BytesIO
 from threading import Timer
 import gradio as gr
 from dotenv import load_dotenv
@@ -13,9 +15,17 @@ from gradio_modal import Modal
 from huggingface_hub import login, upload_folder
 from PIL import Image
 from smolagents import CodeAgent, InferenceClientModel
-from smolagents.gradio_ui import GradioUI, stream_to_gradio
 from e2bqwen import E2BVisionAgent, get_agent_summary_erase_images
 load_dotenv(override=True)
@@ -28,11 +38,11 @@ EXAMPLES = [
 ]
 E2B_API_KEY = os.getenv("E2B_API_KEY")
-SANDBOXES = {}
-SANDBOX_METADATA = {}
 SANDBOX_TIMEOUT = 300
-WIDTH = 1024
-HEIGHT = 768
 TMP_DIR = "./tmp/"
 if not os.path.exists(TMP_DIR):
     os.makedirs(TMP_DIR)
@@ -40,298 +50,42 @@ if not os.path.exists(TMP_DIR):
 hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
 login(token=hf_token)
-custom_css = """
-.modal-container {
-    margin: var(--size-16) auto!important;
-}
-.sandbox-container {
-    position: relative;
-    width: 910px;
-    overflow: hidden;
-    margin: auto;
-}
-.sandbox-container {
-    height: 800px;
-}
-.sandbox-frame {
-    display: none;
-    position: absolute;
-    top: 0;
-    left: 0;
-    width: 910px;
-    height: 800px;
-    pointer-events:none;
-}
-.sandbox-iframe, .bsod-image {
-    position: absolute;
-    width: <<WIDTH>>px;
-    height: <<HEIGHT>>px;
-    border: 4px solid #444444;
-    transform-origin: 0 0;
-}
-/* Colored label for task textbox */
-.primary-color-label label span {
-    font-weight: bold;
-    color: var(--color-accent);
-}
-/* Status indicator light */
-.status-bar {
-    display: flex;
-    flex-direction: row;
-    align-items: center;
-    flex-align:center;
-    z-index: 100;
-}
-.status-indicator {
-    width: 15px;
-    height: 15px;
-    border-radius: 50%;
-}
-.status-text {
-    font-size: 16px;
-    font-weight: bold;
-    padding-left: 8px;
-    text-shadow: none;
-}
-.status-interactive {
-    background-color: #2ecc71;
-    animation: blink 2s infinite;
-}
-.status-view-only {
-    background-color: #e74c3c;
-}
-.status-error {
-    background-color: #e74c3c;
-    animation: blink-error 1s infinite;
-}
-@keyframes blink-error {
-    0% { background-color: rgba(231, 76, 60, 1); }
-    50% { background-color: rgba(231, 76, 60, 0.4); }
-    100% { background-color: rgba(231, 76, 60, 1); }
-}
-@keyframes blink {
-    0% { background-color: rgba(46, 204, 113, 1); }  /* Green at full opacity */
-    50% { background-color: rgba(46, 204, 113, 0.4); }  /* Green at 40% opacity */
-    100% { background-color: rgba(46, 204, 113, 1); }  /* Green at full opacity */
-}
-#chatbot {
-    height:1000px!important;
-}
-#chatbot .role {
-    max-width:95%
-}
-#chatbot .bubble-wrap {
-    overflow-y: visible;
-}
-.logo-container {
-    display: flex;
-    flex-direction: column;
-    align-items: flex-start;
-    width: 100%;
-    box-sizing: border-box;
-    gap: 5px;
-.logo-item {
-    display: flex;
-    align-items: center;
-    padding: 0 30px;
-    gap: 10px;
-    text-decoration: none!important;
-    color: #f59e0b;
-    font-size:17px;
-}
-.logo-item:hover {
-    color: #935f06!important;
-}
-""".replace("<<WIDTH>>", str(WIDTH + 15)).replace("<<HEIGHT>>", str(HEIGHT + 10))
-footer_html = """
-<h3 style="text-align: center; margin-top:50px;"><i>Powered by open source:</i></h2>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
-<div class="logo-container">
-    <a class="logo-item" href="https://github.com/huggingface/smolagents"><i class="fa fa-github"></i>smolagents</a>
-    <a class="logo-item" href="https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct"><i class="fa fa-github"></i>Qwen2-VL-72B</a>
-    <a class="logo-item" href="https://github.com/e2b-dev/desktop"><i class="fa fa-github"></i>E2B Desktop</a>
-</div>
-"""
-sandbox_html_template = """
-<style>
-@import url('https://fonts.googleapis.com/css2?family=Oxanium:[email protected]&display=swap');
-</style>
-    <h1 style="color:var(--color-accent);margin:0;">Open Computer Agent - <i>Powered by <a href="https://github.com/huggingface/smolagents">smolagents</a></i><h1>
-<div class="sandbox-container" style="margin:0;">
-    <div class="status-bar">
-        <div class="status-indicator {status_class}"></div>
-        <div class="status-text">{status_text}</div>
-    </div>
-    <iframe id="sandbox-iframe"
-        src="{stream_url}"
-        class="sandbox-iframe"
-        style="display: block;"
-        allowfullscreen>
-    </iframe>
-    <img src="https://huggingface.co/datasets/mfarre/servedfiles/resolve/main/blue_screen_of_death.gif" class="bsod-image" style="display: none;"/>
-    <img src="https://huggingface.co/datasets/m-ric/images/resolve/main/HUD_thom.png" class="sandbox-frame" />
-</div>
-""".replace("<<WIDTH>>", str(WIDTH + 15)).replace("<<HEIGHT>>", str(HEIGHT + 10))
-custom_js = """function() {
-    document.body.classList.add('dark');
-    // Function to check if sandbox is timing out
-    const checkSandboxTimeout = function() {
-        const timeElement = document.getElementById('sandbox-creation-time');
-        if (timeElement) {
-            const creationTime = parseFloat(timeElement.getAttribute('data-time'));
-            const timeoutValue = parseFloat(timeElement.getAttribute('data-timeout'));
-            const currentTime = Math.floor(Date.now() / 1000); // Current time in seconds
-            const elapsedTime = currentTime - creationTime;
-            console.log("Sandbox running for: " + elapsedTime + " seconds of " + timeoutValue + " seconds");
-            // If we've exceeded the timeout, show BSOD
-            if (elapsedTime >= timeoutValue) {
-                console.log("Sandbox timeout! Showing BSOD");
-                showBSOD('Error');
-                // Don't set another timeout, we're done checking
-                return;
-            }
-        }
-        // Continue checking every 5 seconds
-        setTimeout(checkSandboxTimeout, 5000);
-    };
-    const showBSOD = function(statusText = 'Error') {
-        console.log("Showing BSOD with status: " + statusText);
-        const iframe = document.getElementById('sandbox-iframe');
-        const bsod = document.getElementById('bsod-image');
-        if (iframe && bsod) {
-            iframe.style.display = 'none';
-            bsod.style.display = 'block';
-            // Update status indicator
-            const statusIndicator = document.querySelector('.status-indicator');
-            const statusTextElem = document.querySelector('.status-text');
-            if (statusIndicator) {
-                statusIndicator.className = 'status-indicator status-error';
-            }
-            if (statusTextElem) {
-                statusTextElem.innerText = statusText;
-            }
-        }
-    };
-    const resetBSOD = function() {
-        console.log("Resetting BSOD display");
-        const iframe = document.getElementById('sandbox-iframe');
-        const bsod = document.getElementById('bsod-image');
-        if (iframe && bsod) {
-            if (bsod.style.display === 'block') {
-                // BSOD is currently showing, reset it
-                iframe.style.display = 'block';
-                bsod.style.display = 'none';
-                console.log("BSOD reset complete");
-                return true; // Indicates reset was performed
-            }
-        }
-        return false; // No reset needed
-    };
-    // Function to monitor for error messages
-    const monitorForErrors = function() {
-        console.log("Error monitor started");
-        const resultsInterval = setInterval(function() {
-            const resultsElements = document.querySelectorAll('textarea, .output-text');
-            for (let elem of resultsElements) {
-                const content = elem.value || elem.innerText || '';
-                if (content.includes('Error running agent')) {
-                    console.log("Error detected!");
-                    showBSOD('Error');
-                    clearInterval(resultsInterval);
-                    break;
-                }
-            }
-        }, 1000);
-    };
-    // Start monitoring for timeouts immediately
-    checkSandboxTimeout();
-    // Start monitoring for errors
-    setTimeout(monitorForErrors, 3000);
-    // Also monitor for errors after button clicks
-    document.addEventListener('click', function(e) {
-        if (e.target.tagName === 'BUTTON') {
-            if (e.target.innerText === "Let's go!") {
-                resetBSOD();
-            }
-            setTimeout(monitorForErrors, 3000);
-        }
-    });
-    // Set up an interval to click the refresh button every 5 seconds
-    setInterval(function() {
-        const btn = document.getElementById('refresh-log-btn');
-        if (btn) btn.click();
-    }, 5000);
-    // Force dark mode
-    const params = new URLSearchParams(window.location.search);
-    if (!params.has('__theme')) {
-        params.set('__theme', 'dark');
-        window.location.search = params.toString();
-    }
-}
-"""
-def upload_to_hf_and_remove(folder_path):
-    repo_id = "smolagents/computer-agent-logs"
-    try:
-        folder_name = os.path.basename(os.path.normpath(folder_path))
-        # Upload the folder to Huggingface
-        print(f"Uploading {folder_path} to {repo_id}/{folder_name}...")
-        url = upload_folder(
-            folder_path=folder_path,
             repo_id=repo_id,
             repo_type="dataset",
-            path_in_repo=folder_name,
             ignore_patterns=[".git/*", ".gitignore"],
         )
-        # Remove the local folder after successful upload
-        print(f"Upload complete. Removing local folder {folder_path}...")
-        shutil.rmtree(folder_path)
-        print("Local folder removed successfully.")
-        return url
-    except Exception as e:
-        print(f"Error during upload or cleanup: {str(e)}")
-        raise
 def cleanup_sandboxes():
@@ -432,9 +186,10 @@ def generate_interaction_id(session_uuid):
 def save_final_status(folder, status: str, summary, error_message=None) -> None:
-    with open(os.path.join(folder, "metadata.json"), "w") as output_file:
         output_file.write(
-            json.dumps(
                 {"status": status, "summary": summary, "error_message": error_message},
             )
         )
@@ -468,13 +223,16 @@ def create_agent(data_dir, desktop):
         model=model,
         data_dir=data_dir,
         desktop=desktop,
-        max_steps=200,
         verbosity_level=2,
         # planning_interval=10,
         use_v1_prompt=True,
     )
 class EnrichedGradioUI(GradioUI):
     def log_user_message(self, text_input):
         import gradio as gr
@@ -495,8 +253,10 @@ class EnrichedGradioUI(GradioUI):
     ):
         interaction_id = generate_interaction_id(session_uuid)
         desktop = get_or_create_sandbox(session_uuid)
         data_dir = os.path.join(TMP_DIR, interaction_id)
         if not os.path.exists(data_dir):
             os.makedirs(data_dir)
@@ -504,20 +264,31 @@ class EnrichedGradioUI(GradioUI):
         session_state["agent"] = create_agent(data_dir=data_dir, desktop=desktop)
         try:
-            stored_messages.append(gr.ChatMessage(role="user", content=task_input))
             yield stored_messages
             screenshot_bytes = session_state["agent"].desktop.screenshot(format="bytes")
             initial_screenshot = Image.open(BytesIO(screenshot_bytes))
             for msg in stream_to_gradio(
                 session_state["agent"],
                 task=task_input,
-                task_images=[initial_screenshot],
                 reset_agent_memory=False,
             ):
                 if (
                     hasattr(session_state["agent"], "last_marked_screenshot")
                     and msg.content == "-----"
                 ):  # Append the last screenshot before the end of step
                     stored_messages.append(
@@ -529,35 +300,46 @@ class EnrichedGradioUI(GradioUI):
                                 ].last_marked_screenshot.to_string(),
                                 "mime_type": "image/png",
                             },
                         )
                     )
-                stored_messages.append(msg)
                 yield stored_messages
-            # THIS ERASES IMAGES FROM AGENT MEMORY, USE WITH CAUTION
-            if consent_storage and not task_input in EXAMPLES:
-                summary = get_agent_summary_erase_images(session_state["agent"])
-                save_final_status(data_dir, "completed", summary=summary)
             yield stored_messages
         except Exception as e:
             error_message = f"Error in interaction: {str(e)}"
-            raise e
             print(error_message)
             stored_messages.append(
                 gr.ChatMessage(
                     role="assistant", content="Run failed:\n" + error_message
                 )
             )
-            if consent_storage:
                 summary = get_agent_summary_erase_images(session_state["agent"])
                 save_final_status(
-                    data_dir, "failed", summary=summary, error_message=error_message
                 )
-            yield stored_messages
-        finally:
-            if consent_storage:
-                upload_to_hf_and_remove(data_dir)
 theme = gr.themes.Default(
@@ -565,7 +347,7 @@ theme = gr.themes.Default(
 )
 # Create a Gradio app with Blocks
-with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
     # Storing session hash in a state variable
     session_uuid_state = gr.State(None)
     print("Starting the app!")
@@ -588,7 +370,7 @@ In this app, you'll be able to interact with an agent powered by [smolagents](ht
 _Please note that we store the task logs by default so **do not write any personal information**; you can uncheck the logs storing on the task bar._
 """)
             task_input = gr.Textbox(
-                value="Find me pictures of cute puppies",
                 label="Enter your task below:",
                 elem_classes="primary-color-label",
             )
@@ -620,54 +402,13 @@ _Please note that we store the task logs by default so **do not write any person
                 """.strip()
             )
-            def apply_theme(minimalist_mode: bool):
-                if not minimalist_mode:
-                    return """
-                        <style>
-                        .sandbox-frame {
-                            display: block!important;
-                        }
-                        .sandbox-iframe, .bsod-image {
-                            /* top: 73px; */
-                            top: 99px;
-                            /* left: 74px; */
-                            left: 110px;
-                        }
-                        .sandbox-iframe {
-                            transform: scale(0.667);
-                            /* transform: scale(0.59); */
-                        }
-                        .status-bar {
-                            position: absolute;
-                            bottom: 88px;
-                            left: 355px;
-                        }
-                        .status-text {
-                            color: #fed244;
-                        }
-                        </style>
-                    """
-                else:
-                    return """
-                        <style>
-                        .sandbox-container {
-                            height: 700px!important;
-                        }
-                        .sandbox-iframe {
-                            transform: scale(0.65);
-                        }
-                        </style>
-                    """
             # Hidden HTML element to inject CSS dynamically
             theme_styles = gr.HTML(apply_theme(False), visible=False)
             minimalist_toggle.change(
                 fn=apply_theme, inputs=[minimalist_toggle], outputs=[theme_styles]
             )
-            footer = gr.HTML(value=footer_html, label="Header")
     chatbot_display = gr.Chatbot(
         elem_id="chatbot",
@@ -738,15 +479,13 @@ _Please note that we store the task logs by default so **do not write any person
     def interrupt_agent(session_state):
         if not session_state["agent"].interrupt_switch:
             session_state["agent"].interrupt()
             return gr.Button("Stopping agent... (could take time)", variant="secondary")
         else:
             return gr.Button("Stop the agent!", variant="huggingface")
     stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
-    def set_logs_source(session_state):
-        session_state["replay_log"] = "udupp2fyavq_1743170323"
     demo.load(
         fn=lambda: True,  # dummy to trigger the load
         outputs=[is_interactive],
@@ -757,6 +496,18 @@ _Please note that we store the task logs by default so **do not write any person
         outputs=[sandbox_html, session_uuid_state],
     )
 # Launch the app
 if __name__ == "__main__":
     Timer(60, cleanup_sandboxes).start()  # Run every minute

 import json
 import os
 import shutil
+import tempfile
 import time
 import uuid
 from io import BytesIO
 from threading import Timer
+from typing import Any
 import gradio as gr
 from dotenv import load_dotenv
 from huggingface_hub import login, upload_folder
 from PIL import Image
 from smolagents import CodeAgent, InferenceClientModel
+from smolagents.gradio_ui import GradioUI
 from e2bqwen import E2BVisionAgent, get_agent_summary_erase_images
+from gradio_script import stream_to_gradio
+from scripts_and_styling import (
+    CUSTOM_JS,
+    FOOTER_HTML,
+    SANDBOX_CSS_TEMPLATE,
+    SANDBOX_HTML_TEMPLATE,
+    apply_theme,
+)
 load_dotenv(override=True)
 ]
 E2B_API_KEY = os.getenv("E2B_API_KEY")
+SANDBOXES: dict[str, Sandbox] = {}
+SANDBOX_METADATA: dict[str, dict[str, Any]] = {}
 SANDBOX_TIMEOUT = 300
+WIDTH = 1280
+HEIGHT = 960
 TMP_DIR = "./tmp/"
 if not os.path.exists(TMP_DIR):
     os.makedirs(TMP_DIR)
 hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
 login(token=hf_token)
+custom_css = SANDBOX_CSS_TEMPLATE.replace("<<WIDTH>>", str(WIDTH + 15)).replace(
+    "<<HEIGHT>>", str(HEIGHT + 10)
+)
+sandbox_html_template = SANDBOX_HTML_TEMPLATE.replace(
+    "<<WIDTH>>", str(WIDTH + 15)
+).replace("<<HEIGHT>>", str(HEIGHT + 10))
+def upload_to_hf_and_remove(folder_paths: list[str]):
+    repo_id = "smolagents/computer-agent-logs-2"
+    with tempfile.TemporaryDirectory(dir=TMP_DIR) as temp_dir:
+        print(f"Preparing to upload {len(folder_paths)} folders to {repo_id}...")
+        # Copy all folders into the temporary directory
+        for folder_path in folder_paths:
+            folder_name = os.path.basename(os.path.normpath(folder_path))
+            target_path = os.path.join(temp_dir, folder_name)
+            print(f"Copying {folder_path} to temporary directory...")
+            shutil.copytree(folder_path, target_path)
+            # Remove the original folder after copying
+            shutil.rmtree(folder_path)
+            print(f"Original folder {folder_path} removed.")
+        # Upload the entire temporary directory
+        print(f"Uploading all folders to {repo_id}...")
+        upload_folder(
+            folder_path=temp_dir,
             repo_id=repo_id,
             repo_type="dataset",
             ignore_patterns=[".git/*", ".gitignore"],
         )
+        print("Upload complete.")
+        return f"Successfully uploaded {len(folder_paths)} folders to {repo_id}"
 def cleanup_sandboxes():
 def save_final_status(folder, status: str, summary, error_message=None) -> None:
+    with open(os.path.join(folder, "metadata.jsonl"), "a") as output_file:
         output_file.write(
+            "\n"
+            + json.dumps(
                 {"status": status, "summary": summary, "error_message": error_message},
             )
         )
         model=model,
         data_dir=data_dir,
         desktop=desktop,
+        max_steps=20,
         verbosity_level=2,
         # planning_interval=10,
         use_v1_prompt=True,
     )
+INTERACTION_IDS = {}
 class EnrichedGradioUI(GradioUI):
     def log_user_message(self, text_input):
         import gradio as gr
     ):
         interaction_id = generate_interaction_id(session_uuid)
         desktop = get_or_create_sandbox(session_uuid)
+        INTERACTION_IDS[interaction_id] = session_uuid
         data_dir = os.path.join(TMP_DIR, interaction_id)
         if not os.path.exists(data_dir):
             os.makedirs(data_dir)
         session_state["agent"] = create_agent(data_dir=data_dir, desktop=desktop)
         try:
+            stored_messages.append(
+                gr.ChatMessage(
+                    role="user", content=task_input, metadata={"status": "done"}
+                )
+            )
             yield stored_messages
+            with open(os.path.join(data_dir, "metadata.jsonl"), "w") as output_file:
+                output_file.write(
+                    json.dumps(
+                        {"task": task_input},
+                    )
+                )
             screenshot_bytes = session_state["agent"].desktop.screenshot(format="bytes")
             initial_screenshot = Image.open(BytesIO(screenshot_bytes))
             for msg in stream_to_gradio(
                 session_state["agent"],
                 task=task_input,
                 reset_agent_memory=False,
+                task_images=[initial_screenshot],
             ):
                 if (
                     hasattr(session_state["agent"], "last_marked_screenshot")
+                    and isinstance(msg, gr.ChatMessage)
                     and msg.content == "-----"
                 ):  # Append the last screenshot before the end of step
                     stored_messages.append(
                                 ].last_marked_screenshot.to_string(),
                                 "mime_type": "image/png",
                             },
+                            metadata={"status": "done"},
                         )
                     )
+                if isinstance(msg, gr.ChatMessage):
+                    stored_messages.append(msg)
+                elif isinstance(msg, str):  # Then it's only a completion delta
+                    try:
+                        if stored_messages[-1].metadata["status"] == "pending":
+                            stored_messages[-1].content = msg
+                        else:
+                            stored_messages.append(
+                                gr.ChatMessage(
+                                    role="assistant",
+                                    content=msg,
+                                    metadata={"status": "pending"},
+                                )
+                            )
+                    except Exception as e:
+                        raise e
                 yield stored_messages
+            status = "completed"
             yield stored_messages
         except Exception as e:
             error_message = f"Error in interaction: {str(e)}"
             print(error_message)
             stored_messages.append(
                 gr.ChatMessage(
                     role="assistant", content="Run failed:\n" + error_message
                 )
             )
+            status = "failed"
+            yield stored_messages
+        finally:
+            if consent_storage and task_input not in EXAMPLES:
                 summary = get_agent_summary_erase_images(session_state["agent"])
                 save_final_status(
+                    data_dir, status, summary=summary, error_message=error_message
                 )
 theme = gr.themes.Default(
 )
 # Create a Gradio app with Blocks
+with gr.Blocks(theme=theme, css=custom_css, js=CUSTOM_JS) as demo:
     # Storing session hash in a state variable
     session_uuid_state = gr.State(None)
     print("Starting the app!")
 _Please note that we store the task logs by default so **do not write any personal information**; you can uncheck the logs storing on the task bar._
 """)
             task_input = gr.Textbox(
+                placeholder="Find me pictures of cute puppies",
                 label="Enter your task below:",
                 elem_classes="primary-color-label",
             )
                 """.strip()
             )
             # Hidden HTML element to inject CSS dynamically
             theme_styles = gr.HTML(apply_theme(False), visible=False)
             minimalist_toggle.change(
                 fn=apply_theme, inputs=[minimalist_toggle], outputs=[theme_styles]
             )
+            footer = gr.HTML(value=FOOTER_HTML, label="Footer")
     chatbot_display = gr.Chatbot(
         elem_id="chatbot",
     def interrupt_agent(session_state):
         if not session_state["agent"].interrupt_switch:
             session_state["agent"].interrupt()
+            print("Stopping agent...")
             return gr.Button("Stopping agent... (could take time)", variant="secondary")
         else:
             return gr.Button("Stop the agent!", variant="huggingface")
     stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
     demo.load(
         fn=lambda: True,  # dummy to trigger the load
         outputs=[is_interactive],
         outputs=[sandbox_html, session_uuid_state],
     )
+    def upload_interaction_logs():
+        data_dirs = []
+        for interaction_id in list(INTERACTION_IDS.keys()):
+            data_dir = os.path.join(TMP_DIR, interaction_id)
+            if os.path.exists(data_dir):
+                data_dirs.append(data_dir)
+                INTERACTION_IDS.pop(interaction_id)
+        upload_to_hf_and_remove(data_dirs)
+    demo.unload(fn=upload_interaction_logs)
 # Launch the app
 if __name__ == "__main__":
     Timer(60, cleanup_sandboxes).start()  # Run every minute

e2bqwen.py CHANGED Viewed

@@ -3,7 +3,7 @@ import time
 import unicodedata
 from datetime import datetime
 from io import BytesIO
-from typing import Any, Dict, List, Optional
 # E2B imports
 from e2b_desktop import Sandbox
@@ -13,7 +13,6 @@ from PIL import Image, ImageDraw
 from smolagents import CodeAgent, HfApiModel, tool
 from smolagents.agent_types import AgentImage
 from smolagents.memory import ActionStep, TaskStep
-from smolagents.models import ChatMessage, Model
 from smolagents.monitoring import LogLevel
 E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment. The current date is <<current_date>>.
@@ -189,6 +188,7 @@ class E2BVisionAgent(CodeAgent):
             max_steps=max_steps,
             verbosity_level=verbosity_level,
             planning_interval=self.planning_interval,
             **kwargs,
         )
         self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace(
@@ -456,4 +456,4 @@ class E2BVisionAgent(CodeAgent):
             print("Stopping e2b stream and killing sandbox...")
             self.desktop.stream.stop()
             self.desktop.kill()
-            print("E2B sandbox terminated")

 import unicodedata
 from datetime import datetime
 from io import BytesIO
+from typing import List
 # E2B imports
 from e2b_desktop import Sandbox
 from smolagents import CodeAgent, HfApiModel, tool
 from smolagents.agent_types import AgentImage
 from smolagents.memory import ActionStep, TaskStep
 from smolagents.monitoring import LogLevel
 E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment. The current date is <<current_date>>.
             max_steps=max_steps,
             verbosity_level=verbosity_level,
             planning_interval=self.planning_interval,
+            stream_outputs=True,
             **kwargs,
         )
         self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace(
             print("Stopping e2b stream and killing sandbox...")
             self.desktop.stream.stop()
             self.desktop.kill()
+            print("E2B sandbox terminated")

gradio_script.py ADDED Viewed

	@@ -0,0 +1,237 @@

+import re
+from smolagents.agent_types import AgentAudio, AgentImage, AgentText
+from smolagents.agents import PlanningStep
+from smolagents.gradio_ui import get_step_footnote_content
+from smolagents.memory import ActionStep, FinalAnswerStep, MemoryStep
+from smolagents.models import ChatMessageStreamDelta
+from smolagents.utils import _is_package_available
+def pull_messages_from_step(step_log: MemoryStep, skip_model_outputs: bool = False):
+    """Extract ChatMessage objects from agent steps with proper nesting.
+    Args:
+        step_log: The step log to display as gr.ChatMessage objects.
+        skip_model_outputs: If True, skip the model outputs when creating the gr.ChatMessage objects:
+            This is used for instance when streaming model outputs have already been displayed.
+    """
+    if not _is_package_available("gradio"):
+        raise ModuleNotFoundError(
+            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
+        )
+    import gradio as gr
+    if isinstance(step_log, ActionStep):
+        # Output the step number
+        step_number = (
+            f"Step {step_log.step_number}"
+            if step_log.step_number is not None
+            else "Step"
+        )
+        if not skip_model_outputs:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=f"**{step_number}**",
+                metadata={"status": "done"},
+            )
+        # First yield the thought/reasoning from the LLM
+        if (
+            not skip_model_outputs
+            and hasattr(step_log, "model_output")
+            and step_log.model_output is not None
+        ):
+            model_output = step_log.model_output.strip()
+            # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
+            model_output = re.sub(
+                r"```\s*<end_code>", "```", model_output
+            )  # handles ```<end_code>
+            model_output = re.sub(
+                r"<end_code>\s*```", "```", model_output
+            )  # handles <end_code>```
+            model_output = re.sub(
+                r"```\s*\n\s*<end_code>", "```", model_output
+            )  # handles ```\n<end_code>
+            model_output = model_output.strip()
+            yield gr.ChatMessage(
+                role="assistant", content=model_output, metadata={"status": "done"}
+            )
+        # For tool calls, create a parent message
+        if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None:
+            first_tool_call = step_log.tool_calls[0]
+            used_code = first_tool_call.name == "python_interpreter"
+            # Tool call becomes the parent message with timing info
+            # First we will handle arguments based on type
+            args = first_tool_call.arguments
+            if isinstance(args, dict):
+                content = str(args.get("answer", str(args)))
+            else:
+                content = str(args).strip()
+            if used_code:
+                # Clean up the content by removing any end code tags
+                content = re.sub(
+                    r"```.*?\n", "", content
+                )  # Remove existing code blocks
+                content = re.sub(
+                    r"\s*<end_code>\s*", "", content
+                )  # Remove end_code tags
+                content = content.strip()
+                if not content.startswith("```python"):
+                    content = f"```python\n{content}\n```"
+            parent_message_tool = gr.ChatMessage(
+                role="assistant",
+                content=content,
+                metadata={
+                    "title": f"🛠️ Used tool {first_tool_call.name}",
+                    "status": "done",
+                },
+            )
+            yield parent_message_tool
+        # Display execution logs if they exist
+        if hasattr(step_log, "observations") and (
+            step_log.observations is not None and step_log.observations.strip()
+        ):  # Only yield execution logs if there's actual content
+            log_content = step_log.observations.strip()
+            if log_content:
+                log_content = re.sub(r"^Execution logs:\s*", "", log_content)
+                yield gr.ChatMessage(
+                    role="assistant",
+                    content=f"```bash\n{log_content}\n",
+                    metadata={"title": "📝 Execution Logs", "status": "done"},
+                )
+        # Display any errors
+        if hasattr(step_log, "error") and step_log.error is not None:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=str(step_log.error),
+                metadata={"title": "💥 Error", "status": "done"},
+            )
+        # Update parent message metadata to done status without yielding a new message
+        if getattr(step_log, "observations_images", []):
+            for image in step_log.observations_images:
+                path_image = AgentImage(image).to_string()
+                yield gr.ChatMessage(
+                    role="assistant",
+                    content={
+                        "path": path_image,
+                        "mime_type": f"image/{path_image.split('.')[-1]}",
+                    },
+                    metadata={"title": "🖼️ Output Image", "status": "done"},
+                )
+        # Handle standalone errors but not from tool calls
+        if hasattr(step_log, "error") and step_log.error is not None:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=str(step_log.error),
+                metadata={"title": "💥 Error", "status": "done"},
+            )
+        yield gr.ChatMessage(
+            role="assistant",
+            content=get_step_footnote_content(step_log, step_number),
+            metadata={"status": "done"},
+        )
+        yield gr.ChatMessage(
+            role="assistant", content="-----", metadata={"status": "done"}
+        )
+    elif isinstance(step_log, PlanningStep):
+        yield gr.ChatMessage(
+            role="assistant", content="**Planning step**", metadata={"status": "done"}
+        )
+        yield gr.ChatMessage(
+            role="assistant", content=step_log.plan, metadata={"status": "done"}
+        )
+        yield gr.ChatMessage(
+            role="assistant",
+            content=get_step_footnote_content(step_log, "Planning step"),
+            metadata={"status": "done"},
+        )
+        yield gr.ChatMessage(
+            role="assistant", content="-----", metadata={"status": "done"}
+        )
+    elif isinstance(step_log, FinalAnswerStep):
+        final_answer = step_log.final_answer
+        if isinstance(final_answer, AgentText):
+            yield gr.ChatMessage(
+                role="assistant",
+                content=f"**Final answer:**\n{final_answer.to_string()}\n",
+                metadata={"status": "done"},
+            )
+        elif isinstance(final_answer, AgentImage):
+            yield gr.ChatMessage(
+                role="assistant",
+                content={"path": final_answer.to_string(), "mime_type": "image/png"},
+                metadata={"status": "done"},
+            )
+        elif isinstance(final_answer, AgentAudio):
+            yield gr.ChatMessage(
+                role="assistant",
+                content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
+                metadata={"status": "done"},
+            )
+        else:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=f"**Final answer:** {str(final_answer)}",
+                metadata={"status": "done"},
+            )
+    else:
+        raise ValueError(f"Unsupported step type: {type(step_log)}")
+def stream_to_gradio(
+    agent,
+    task: str,
+    task_images: list | None = None,
+    reset_agent_memory: bool = False,
+    additional_args: dict | None = None,
+):
+    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
+    total_input_tokens = 0
+    total_output_tokens = 0
+    if not _is_package_available("gradio"):
+        raise ModuleNotFoundError(
+            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
+        )
+    intermediate_text = ""
+    for step_log in agent.run(
+        task,
+        images=task_images,
+        stream=True,
+        reset=reset_agent_memory,
+        additional_args=additional_args,
+    ):
+        # Track tokens if model provides them
+        if getattr(agent.model, "last_input_token_count", None) is not None:
+            total_input_tokens += agent.model.last_input_token_count
+            total_output_tokens += agent.model.last_output_token_count
+            if isinstance(step_log, (ActionStep, PlanningStep)):
+                step_log.input_token_count = agent.model.last_input_token_count
+                step_log.output_token_count = agent.model.last_output_token_count
+        if isinstance(step_log, MemoryStep):
+            intermediate_text = ""
+            for message in pull_messages_from_step(
+                step_log,
+                # If we're streaming model outputs, no need to display them twice
+                skip_model_outputs=getattr(agent, "stream_outputs", False),
+            ):
+                yield message
+        elif isinstance(step_log, ChatMessageStreamDelta):
+            intermediate_text += step_log.content or ""
+            yield intermediate_text

scripts_and_styling.py ADDED Viewed

	@@ -0,0 +1,307 @@

+SANDBOX_HTML_TEMPLATE = """
+<style>
+@import url('https://fonts.googleapis.com/css2?family=Oxanium:[email protected]&display=swap');
+</style>
+    <h1 style="color:var(--color-accent);margin:0;">Open Computer Agent - <i>Powered by <a href="https://github.com/huggingface/smolagents">smolagents</a></i><h1>
+<div class="sandbox-container" style="margin:0;">
+    <div class="status-bar">
+        <div class="status-indicator {status_class}"></div>
+        <div class="status-text">{status_text}</div>
+    </div>
+    <iframe id="sandbox-iframe"
+        src="{stream_url}"
+        class="sandbox-iframe"
+        style="display: block;"
+        allowfullscreen>
+    </iframe>
+    <img src="https://huggingface.co/datasets/mfarre/servedfiles/resolve/main/blue_screen_of_death.gif" class="bsod-image" style="display: none;"/>
+    <img src="https://huggingface.co/datasets/m-ric/images/resolve/main/HUD_thom.png" class="sandbox-frame" />
+</div>
+"""
+SANDBOX_CSS_TEMPLATE = """
+.modal-container {
+    margin: var(--size-16) auto!important;
+}
+.sandbox-container {
+    position: relative;
+    width: 910px;
+    overflow: hidden;
+    margin: auto;
+}
+.sandbox-container {
+    height: 800px;
+}
+.sandbox-frame {
+    display: none;
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 910px;
+    height: 800px;
+    pointer-events:none;
+}
+.sandbox-iframe, .bsod-image {
+    position: absolute;
+    width: <<WIDTH>>px;
+    height: <<HEIGHT>>px;
+    border: 4px solid #444444;
+    transform-origin: 0 0;
+}
+/* Colored label for task textbox */
+.primary-color-label label span {
+    font-weight: bold;
+    color: var(--color-accent);
+}
+/* Status indicator light */
+.status-bar {
+    display: flex;
+    flex-direction: row;
+    align-items: center;
+    flex-align:center;
+    z-index: 100;
+}
+.status-indicator {
+    width: 15px;
+    height: 15px;
+    border-radius: 50%;
+}
+.status-text {
+    font-size: 16px;
+    font-weight: bold;
+    padding-left: 8px;
+    text-shadow: none;
+}
+.status-interactive {
+    background-color: #2ecc71;
+    animation: blink 2s infinite;
+}
+.status-view-only {
+    background-color: #e74c3c;
+}
+.status-error {
+    background-color: #e74c3c;
+    animation: blink-error 1s infinite;
+}
+@keyframes blink-error {
+    0% { background-color: rgba(231, 76, 60, 1); }
+    50% { background-color: rgba(231, 76, 60, 0.4); }
+    100% { background-color: rgba(231, 76, 60, 1); }
+}
+@keyframes blink {
+    0% { background-color: rgba(46, 204, 113, 1); }  /* Green at full opacity */
+    50% { background-color: rgba(46, 204, 113, 0.4); }  /* Green at 40% opacity */
+    100% { background-color: rgba(46, 204, 113, 1); }  /* Green at full opacity */
+}
+#chatbot {
+    height:1000px!important;
+}
+#chatbot .role {
+    max-width:95%
+}
+#chatbot .bubble-wrap {
+    overflow-y: visible;
+}
+.logo-container {
+    display: flex;
+    flex-direction: column;
+    align-items: flex-start;
+    width: 100%;
+    box-sizing: border-box;
+    gap: 5px;
+.logo-item {
+    display: flex;
+    align-items: center;
+    padding: 0 30px;
+    gap: 10px;
+    text-decoration: none!important;
+    color: #f59e0b;
+    font-size:17px;
+}
+.logo-item:hover {
+    color: #935f06!important;
+}
+"""
+FOOTER_HTML = """
+<h3 style="text-align: center; margin-top:50px;"><i>Powered by open source:</i></h2>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
+<div class="logo-container">
+    <a class="logo-item" href="https://github.com/huggingface/smolagents"><i class="fa fa-github"></i>smolagents</a>
+    <a class="logo-item" href="https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct"><i class="fa fa-github"></i>Qwen2-VL-72B</a>
+    <a class="logo-item" href="https://github.com/e2b-dev/desktop"><i class="fa fa-github"></i>E2B Desktop</a>
+</div>
+"""
+CUSTOM_JS = """function() {
+    document.body.classList.add('dark');
+    // Function to check if sandbox is timing out
+    const checkSandboxTimeout = function() {
+        const timeElement = document.getElementById('sandbox-creation-time');
+        if (timeElement) {
+            const creationTime = parseFloat(timeElement.getAttribute('data-time'));
+            const timeoutValue = parseFloat(timeElement.getAttribute('data-timeout'));
+            const currentTime = Math.floor(Date.now() / 1000); // Current time in seconds
+            const elapsedTime = currentTime - creationTime;
+            console.log("Sandbox running for: " + elapsedTime + " seconds of " + timeoutValue + " seconds");
+            // If we've exceeded the timeout, show BSOD
+            if (elapsedTime >= timeoutValue) {
+                console.log("Sandbox timeout! Showing BSOD");
+                showBSOD('Error');
+                // Don't set another timeout, we're done checking
+                return;
+            }
+        }
+        // Continue checking every 5 seconds
+        setTimeout(checkSandboxTimeout, 5000);
+    };
+    const showBSOD = function(statusText = 'Error') {
+        console.log("Showing BSOD with status: " + statusText);
+        const iframe = document.getElementById('sandbox-iframe');
+        const bsod = document.getElementById('bsod-image');
+        if (iframe && bsod) {
+            iframe.style.display = 'none';
+            bsod.style.display = 'block';
+            // Update status indicator
+            const statusIndicator = document.querySelector('.status-indicator');
+            const statusTextElem = document.querySelector('.status-text');
+            if (statusIndicator) {
+                statusIndicator.className = 'status-indicator status-error';
+            }
+            if (statusTextElem) {
+                statusTextElem.innerText = statusText;
+            }
+        }
+    };
+    const resetBSOD = function() {
+        console.log("Resetting BSOD display");
+        const iframe = document.getElementById('sandbox-iframe');
+        const bsod = document.getElementById('bsod-image');
+        if (iframe && bsod) {
+            if (bsod.style.display === 'block') {
+                // BSOD is currently showing, reset it
+                iframe.style.display = 'block';
+                bsod.style.display = 'none';
+                console.log("BSOD reset complete");
+                return true; // Indicates reset was performed
+            }
+        }
+        return false; // No reset needed
+    };
+    // Function to monitor for error messages
+    const monitorForErrors = function() {
+        console.log("Error monitor started");
+        const resultsInterval = setInterval(function() {
+            const resultsElements = document.querySelectorAll('textarea, .output-text');
+            for (let elem of resultsElements) {
+                const content = elem.value || elem.innerText || '';
+                if (content.includes('Error running agent')) {
+                    console.log("Error detected!");
+                    showBSOD('Error');
+                    clearInterval(resultsInterval);
+                    break;
+                }
+            }
+        }, 1000);
+    };
+    // Start monitoring for timeouts immediately
+    checkSandboxTimeout();
+    // Start monitoring for errors
+    setTimeout(monitorForErrors, 3000);
+    // Also monitor for errors after button clicks
+    document.addEventListener('click', function(e) {
+        if (e.target.tagName === 'BUTTON') {
+            if (e.target.innerText === "Let's go!") {
+                resetBSOD();
+            }
+            setTimeout(monitorForErrors, 3000);
+        }
+    });
+    // Set up an interval to click the refresh button every 5 seconds
+    setInterval(function() {
+        const btn = document.getElementById('refresh-log-btn');
+        if (btn) btn.click();
+    }, 5000);
+    // Force dark mode
+    const params = new URLSearchParams(window.location.search);
+    if (!params.has('__theme')) {
+        params.set('__theme', 'dark');
+        window.location.search = params.toString();
+    }
+}
+"""
+def apply_theme(minimalist_mode: bool):
+    if not minimalist_mode:
+        return """
+            <style>
+            .sandbox-frame {
+                display: block!important;
+            }
+            .sandbox-iframe, .bsod-image {
+                /* top: 73px; */
+                top: 99px;
+                /* left: 74px; */
+                left: 110px;
+            }
+            .sandbox-iframe {
+                transform: scale(0.535);
+            }
+            .status-bar {
+                position: absolute;
+                bottom: 88px;
+                left: 355px;
+            }
+            .status-text {
+                color: #fed244;
+            }
+            </style>
+        """
+    else:
+        return """
+            <style>
+            .sandbox-container {
+                height: 700px!important;
+            }
+            .sandbox-iframe {
+                transform: scale(0.7);
+            }
+            </style>
+        """