DataForge / app.py
ai-puppy
Revert "update better model"
b7e87c1
import os
import gradio as gr
import asyncio
import tempfile
import subprocess
import shutil
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model
# Auto-install Deno if not found (for Hugging Face Spaces)
def ensure_deno_installed():
"""Install Deno if not already installed (for Hugging Face Spaces compatibility)"""
try:
# Check if Deno is already installed
result = subprocess.run(['deno', '--version'], capture_output=True, text=True)
if result.returncode == 0:
print(f"βœ… Deno already installed: {result.stdout.split()[1]}")
return True
except FileNotFoundError:
pass
print("πŸ”§ Deno not found. Installing Deno for PyodideSandbox...")
try:
# Install Deno using the official installer
install_cmd = "curl -fsSL https://deno.land/install.sh | sh"
result = subprocess.run(install_cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
# Add Deno to PATH
deno_path = os.path.expanduser("~/.deno/bin")
if deno_path not in os.environ.get("PATH", ""):
os.environ["PATH"] = f"{deno_path}:{os.environ.get('PATH', '')}"
print("βœ… Deno installed successfully!")
return True
else:
print(f"❌ Deno installation failed: {result.stderr}")
return False
except Exception as e:
print(f"❌ Error installing Deno: {e}")
return False
# Install Deno before importing sandbox dependencies
print("πŸ” Checking Deno installation...")
deno_available = ensure_deno_installed()
# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
from graph_streaming import streaming_analyze_file_with_guidance
# Load environment variables
load_dotenv(find_dotenv())
# Initialize model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
# Store uploaded file path globally
uploaded_file_path = None
def handle_file_upload(file):
"""Handle file upload and store the path globally"""
global uploaded_file_path
try:
if file is not None:
# With type="filepath", Gradio returns the file path as a string
uploaded_file_path = file
filename = os.path.basename(file)
return f"βœ… File uploaded successfully: {filename}"
else:
uploaded_file_path = None
return "❌ No file uploaded"
except Exception as e:
uploaded_file_path = None
return f"❌ Upload error: {str(e)}"
def streaming_analyze_file_with_question(user_question):
"""
Streaming version that yields progress updates in real-time
"""
global uploaded_file_path, deno_available
try:
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
yield "❌ No file uploaded or file not found. Please upload a file first."
return
if not user_question or user_question.strip() == "":
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
# Check if Deno is available for sandbox operations
if not deno_available:
yield """❌ Deno runtime not available. This is required for code execution in the sandbox.
πŸ“‹ Troubleshooting:
1. This usually happens on deployment platforms that don't have Deno pre-installed
2. The app attempted to install Deno automatically but failed
3. Try restarting the space or contact support
πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
return
# Use the streaming guided analysis approach
for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question):
yield chunk
except Exception as e:
error_msg = str(e)
if "Deno" in error_msg or "deno" in error_msg:
yield f"""❌ Deno-related error in analysis: {error_msg}
πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
Try restarting the application or contact support if this persists."""
else:
yield f"❌ Error in guided analysis: {error_msg}"
def analyze_file_with_question(user_question):
"""
Non-streaming version for backward compatibility
"""
global uploaded_file_path, deno_available
try:
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
return "❌ No file uploaded or file not found. Please upload a file first."
if not user_question or user_question.strip() == "":
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
# Check if Deno is available for sandbox operations
if not deno_available:
return """❌ Deno runtime not available. This is required for code execution in the sandbox.
πŸ“‹ Troubleshooting:
1. This usually happens on deployment platforms that don't have Deno pre-installed
2. The app attempted to install Deno automatically but failed
3. Try restarting the space or contact support
πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
# Use the new guided analysis approach
result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
return result
except Exception as e:
error_msg = str(e)
if "Deno" in error_msg or "deno" in error_msg:
return f"""❌ Deno-related error in analysis: {error_msg}
πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
Try restarting the application or contact support if this persists."""
else:
return f"❌ Error in guided analysis: {error_msg}"
async def analyze_uploaded_file():
"""Legacy function - kept for backward compatibility"""
return analyze_file_with_question("Provide a comprehensive analysis of this file.")
def run_file_analysis():
"""Wrapper to run async file analysis in sync context"""
return asyncio.run(analyze_uploaded_file())
# Create the Gradio interface
with gr.Blocks(title="DataForge - AI CodeAct Agent") as demo:
gr.Markdown("# πŸ€– DataForge - AI CodeAct Agent")
# Demo Video Section
gr.Markdown("""
## πŸŽ₯ **Demo Video - See DataForge in Action!**
**πŸ“Ί [Watch the full demo on YouTube](https://www.youtube.com/watch?v=f5jp2i3engM)** - Learn how to use DataForge in just a few minutes!
---
""")
gr.Markdown("""
## πŸ”‘ **AI Writes Code to Analyze Your Data Locally**
**Why DataForge handles massive files when other AI tools fail:**
❌ **Other AI Tools**: Upload data to LLM β†’ Hit limits β†’ Fail on large files
βœ… **DataForge**: AI writes code β†’ Code processes data locally β†’ No limits!
### πŸ’ͺ **Key Benefits:**
- **♾️ No Size Limits** - Process GB+ files locally
- **πŸ›‘οΈ Complete Privacy** - Data never leaves your machine
- **⚑ Lightning Fast** - No uploads, pure local processing
- **🎯 Custom Analysis** - Code written for your specific question
""")
# Supported File Types - Simple Version
gr.Markdown("## πŸ“‹ **Supported Files**")
gr.Markdown("""
**πŸ“Š Data:** CSV, JSON, XML, TSV
**πŸ“ Logs:** Application, access, error, audit logs
**πŸ—‚οΈ Text:** Any text file, code files, configs
**πŸ’Ύ Size:** No limits - handles multi-GB files locally
""")
with gr.Row():
with gr.Column(scale=1):
# File Upload Section
gr.Markdown("### πŸ“€ File Upload")
file_upload = gr.File(
label="Upload File for Analysis",
type="filepath"
)
upload_status = gr.Textbox(
label="Upload Status",
value="No file uploaded",
interactive=False
)
# Question Section
gr.Markdown("### ❓ Ask Your Question")
user_question = gr.Textbox(
label="Your Question about the File",
placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
lines=4,
value=""
)
analyze_btn = gr.Button("πŸ€– Activate CodeAct Agent", variant="primary", size="lg")
# How it works
gr.Markdown("### πŸ”¬ **How It Works**")
gr.Markdown("""
1. **πŸ” AI samples** your file structure
2. **⚑ AI writes** custom analysis code
3. **πŸš€ Code processes** your entire file locally
4. **πŸ“Š Results** delivered to you
**Your data never leaves your machine!**
""")
with gr.Column(scale=2):
analysis_output = gr.Textbox(
label="πŸ€– CodeAct Agent Analysis Results",
lines=25,
max_lines=35,
placeholder="Upload a file, ask your question, and click 'Activate CodeAct Agent' to watch the AI write and execute custom analysis code in real-time...",
interactive=False
)
# Event handlers
file_upload.change(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_status]
)
analyze_btn.click(
fn=streaming_analyze_file_with_question,
inputs=[user_question],
outputs=[analysis_output]
)
gr.Markdown("---")
gr.Markdown("## πŸ’‘ **Example Questions**")
gr.Markdown("""
**πŸ” Security:** "Find failed login attempts and suspicious IPs"
**⚑ Performance:** "Identify slowest API endpoints and bottlenecks"
**πŸ“Š Data:** "Analyze statistical patterns and outliers"
**πŸ” General:** "Summarize key insights and anomalies"
""")
if __name__ == "__main__":
print("πŸ€– Starting DataForge CodeAct Agent Application...")
print("πŸš€ Initializing advanced AI-powered file analysis capabilities...")
demo.launch()