import os import gradio as gr import asyncio import tempfile import subprocess import shutil from dotenv import find_dotenv, load_dotenv from langchain.chat_models import init_chat_model # Auto-install Deno if not found (for Hugging Face Spaces) def ensure_deno_installed(): """Install Deno if not already installed (for Hugging Face Spaces compatibility)""" try: # Check if Deno is already installed result = subprocess.run(['deno', '--version'], capture_output=True, text=True) if result.returncode == 0: print(f"βœ… Deno already installed: {result.stdout.split()[1]}") return True except FileNotFoundError: pass print("πŸ”§ Deno not found. Installing Deno for PyodideSandbox...") try: # Install Deno using the official installer install_cmd = "curl -fsSL https://deno.land/install.sh | sh" result = subprocess.run(install_cmd, shell=True, capture_output=True, text=True) if result.returncode == 0: # Add Deno to PATH deno_path = os.path.expanduser("~/.deno/bin") if deno_path not in os.environ.get("PATH", ""): os.environ["PATH"] = f"{deno_path}:{os.environ.get('PATH', '')}" print("βœ… Deno installed successfully!") return True else: print(f"❌ Deno installation failed: {result.stderr}") return False except Exception as e: print(f"❌ Error installing Deno: {e}") return False # Install Deno before importing sandbox dependencies print("πŸ” Checking Deno installation...") deno_available = ensure_deno_installed() # Import the CodeAct agent functionality from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact # Import the new guided analysis functionality from graph import analyze_file_with_guidance_sync, guided_analysis_graph from graph_streaming import streaming_analyze_file_with_guidance # Load environment variables load_dotenv(find_dotenv()) # Initialize model for file analysis codeact_model = init_chat_model("o3-2025-04-16", model_provider="openai") # Store uploaded file path globally uploaded_file_path = None def handle_file_upload(file): """Handle file upload and store the path globally""" global uploaded_file_path try: if file is not None: # With type="filepath", Gradio returns the file path as a string uploaded_file_path = file filename = os.path.basename(file) return f"βœ… File uploaded successfully: {filename}" else: uploaded_file_path = None return "❌ No file uploaded" except Exception as e: uploaded_file_path = None return f"❌ Upload error: {str(e)}" def streaming_analyze_file_with_question(user_question): """ Streaming version that yields progress updates in real-time """ global uploaded_file_path, deno_available try: if not uploaded_file_path or not os.path.exists(uploaded_file_path): yield "❌ No file uploaded or file not found. Please upload a file first." return if not user_question or user_question.strip() == "": user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights." # Check if Deno is available for sandbox operations if not deno_available: yield """❌ Deno runtime not available. This is required for code execution in the sandbox. πŸ“‹ Troubleshooting: 1. This usually happens on deployment platforms that don't have Deno pre-installed 2. The app attempted to install Deno automatically but failed 3. Try restarting the space or contact support πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited.""" return # Use the streaming guided analysis approach for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question): yield chunk except Exception as e: error_msg = str(e) if "Deno" in error_msg or "deno" in error_msg: yield f"""❌ Deno-related error in analysis: {error_msg} πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution. Try restarting the application or contact support if this persists.""" else: yield f"❌ Error in guided analysis: {error_msg}" def analyze_file_with_question(user_question): """ Non-streaming version for backward compatibility """ global uploaded_file_path, deno_available try: if not uploaded_file_path or not os.path.exists(uploaded_file_path): return "❌ No file uploaded or file not found. Please upload a file first." if not user_question or user_question.strip() == "": user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights." # Check if Deno is available for sandbox operations if not deno_available: return """❌ Deno runtime not available. This is required for code execution in the sandbox. πŸ“‹ Troubleshooting: 1. This usually happens on deployment platforms that don't have Deno pre-installed 2. The app attempted to install Deno automatically but failed 3. Try restarting the space or contact support πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited.""" # Use the new guided analysis approach result = analyze_file_with_guidance_sync(uploaded_file_path, user_question) return result except Exception as e: error_msg = str(e) if "Deno" in error_msg or "deno" in error_msg: return f"""❌ Deno-related error in analysis: {error_msg} πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution. Try restarting the application or contact support if this persists.""" else: return f"❌ Error in guided analysis: {error_msg}" async def analyze_uploaded_file(): """Legacy function - kept for backward compatibility""" return analyze_file_with_question("Provide a comprehensive analysis of this file.") def run_file_analysis(): """Wrapper to run async file analysis in sync context""" return asyncio.run(analyze_uploaded_file()) # Create the Gradio interface with gr.Blocks(title="DataForge - AI CodeAct Agent") as demo: gr.Markdown("# πŸ€– DataForge - AI CodeAct Agent") # Demo Video Section gr.Markdown(""" ## πŸŽ₯ **Demo Video - See DataForge in Action!** **πŸ“Ί [Watch the full demo on YouTube](https://www.youtube.com/watch?v=f5jp2i3engM)** - Learn how to use DataForge in just a few minutes! --- """) gr.Markdown(""" ## πŸ”‘ **AI Writes Code to Analyze Your Data Locally** **Why DataForge handles massive files when other AI tools fail:** ❌ **Other AI Tools**: Upload data to LLM β†’ Hit limits β†’ Fail on large files βœ… **DataForge**: AI writes code β†’ Code processes data locally β†’ No limits! ### πŸ’ͺ **Key Benefits:** - **♾️ No Size Limits** - Process GB+ files locally - **πŸ›‘οΈ Complete Privacy** - Data never leaves your machine - **⚑ Lightning Fast** - No uploads, pure local processing - **🎯 Custom Analysis** - Code written for your specific question """) # Supported File Types - Simple Version gr.Markdown("## πŸ“‹ **Supported Files**") gr.Markdown(""" **πŸ“Š Data:** CSV, JSON, XML, TSV **πŸ“ Logs:** Application, access, error, audit logs **πŸ—‚οΈ Text:** Any text file, code files, configs **πŸ’Ύ Size:** No limits - handles multi-GB files locally """) with gr.Row(): with gr.Column(scale=1): # File Upload Section gr.Markdown("### πŸ“€ File Upload") file_upload = gr.File( label="Upload File for Analysis", type="filepath" ) upload_status = gr.Textbox( label="Upload Status", value="No file uploaded", interactive=False ) # Question Section gr.Markdown("### ❓ Ask Your Question") user_question = gr.Textbox( label="Your Question about the File", placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')", lines=4, value="" ) analyze_btn = gr.Button("πŸ€– Activate CodeAct Agent", variant="primary", size="lg") # How it works gr.Markdown("### πŸ”¬ **How It Works**") gr.Markdown(""" 1. **πŸ” AI samples** your file structure 2. **⚑ AI writes** custom analysis code 3. **πŸš€ Code processes** your entire file locally 4. **πŸ“Š Results** delivered to you **Your data never leaves your machine!** """) with gr.Column(scale=2): analysis_output = gr.Textbox( label="πŸ€– CodeAct Agent Analysis Results", lines=25, max_lines=35, placeholder="Upload a file, ask your question, and click 'Activate CodeAct Agent' to watch the AI write and execute custom analysis code in real-time...", interactive=False ) # Event handlers file_upload.change( fn=handle_file_upload, inputs=[file_upload], outputs=[upload_status] ) analyze_btn.click( fn=streaming_analyze_file_with_question, inputs=[user_question], outputs=[analysis_output] ) gr.Markdown("---") gr.Markdown("## πŸ’‘ **Real CodeAct Use Cases - When LLMs Fail, Code Succeeds**") gr.Markdown(""" **πŸ”₯ Real Problem:** You have a 500MB server log file and want to ask: *"Which IP addresses made the most requests yesterday?"* ❌ **Traditional LLM:** "File too large, please upload smaller chunks" βœ… **DataForge CodeAct:** AI writes Python code with regex + Counter to process entire file β†’ Gets exact answer **πŸ”₯ Real Problem:** You have a 2GB CSV with sales data and ask: *"How many orders were placed in each month of 2024?"* ❌ **Traditional LLM:** Crashes on upload or hits token limits βœ… **DataForge CodeAct:** AI writes pandas code to parse dates and count by month β†’ Simple monthly breakdown **πŸ”₯ Real Problem:** You have 1GB of JSON API logs and ask: *"Find all 500 errors and group them by endpoint and time of day"* ❌ **Traditional LLM:** "Cannot process this much data" βœ… **DataForge CodeAct:** AI writes JSON parsing + datetime analysis code β†’ Complete error breakdown **🎯 The Key:** Instead of sending your data to an LLM, the LLM writes code that processes your data locally! """) if __name__ == "__main__": print("πŸ€– Starting DataForge CodeAct Agent Application...") print("πŸš€ Initializing advanced AI-powered file analysis capabilities...") demo.launch()