DataForge / app.py
ai-puppy
save
bb43287
raw
history blame
11.4 kB
import os
import gradio as gr
import asyncio
import tempfile
import subprocess
import shutil
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model
# Auto-install Deno if not found (for Hugging Face Spaces)
def ensure_deno_installed():
"""Install Deno if not already installed (for Hugging Face Spaces compatibility)"""
try:
# Check if Deno is already installed
result = subprocess.run(['deno', '--version'], capture_output=True, text=True)
if result.returncode == 0:
print(f"βœ… Deno already installed: {result.stdout.split()[1]}")
return True
except FileNotFoundError:
pass
print("πŸ”§ Deno not found. Installing Deno for PyodideSandbox...")
try:
# Install Deno using the official installer
install_cmd = "curl -fsSL https://deno.land/install.sh | sh"
result = subprocess.run(install_cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
# Add Deno to PATH
deno_path = os.path.expanduser("~/.deno/bin")
if deno_path not in os.environ.get("PATH", ""):
os.environ["PATH"] = f"{deno_path}:{os.environ.get('PATH', '')}"
print("βœ… Deno installed successfully!")
return True
else:
print(f"❌ Deno installation failed: {result.stderr}")
return False
except Exception as e:
print(f"❌ Error installing Deno: {e}")
return False
# Install Deno before importing sandbox dependencies
print("πŸ” Checking Deno installation...")
deno_available = ensure_deno_installed()
# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
from graph_streaming import streaming_analyze_file_with_guidance
# Load environment variables
load_dotenv(find_dotenv())
# Initialize model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
# Store uploaded file path globally
uploaded_file_path = None
# Chat functionality removed - focusing on file analysis
def handle_file_upload(file):
"""Handle file upload and store the path globally"""
global uploaded_file_path
try:
if file is not None:
# With type="filepath", Gradio returns the file path as a string
uploaded_file_path = file
filename = os.path.basename(file)
return f"βœ… File uploaded successfully: {filename}"
else:
uploaded_file_path = None
return "❌ No file uploaded"
except Exception as e:
uploaded_file_path = None
return f"❌ Upload error: {str(e)}"
def streaming_analyze_file_with_question(user_question):
"""
Streaming version that yields progress updates in real-time
"""
global uploaded_file_path, deno_available
try:
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
yield "❌ No file uploaded or file not found. Please upload a file first."
return
if not user_question or user_question.strip() == "":
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
# Check if Deno is available for sandbox operations
if not deno_available:
yield """❌ Deno runtime not available. This is required for code execution in the sandbox.
πŸ“‹ Troubleshooting:
1. This usually happens on deployment platforms that don't have Deno pre-installed
2. The app attempted to install Deno automatically but failed
3. Try restarting the space or contact support
πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
return
# Use the streaming guided analysis approach
for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question):
yield chunk
except Exception as e:
error_msg = str(e)
if "Deno" in error_msg or "deno" in error_msg:
yield f"""❌ Deno-related error in analysis: {error_msg}
πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
Try restarting the application or contact support if this persists."""
else:
yield f"❌ Error in guided analysis: {error_msg}"
def analyze_file_with_question(user_question):
"""
Non-streaming version for backward compatibility
"""
global uploaded_file_path, deno_available
try:
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
return "❌ No file uploaded or file not found. Please upload a file first."
if not user_question or user_question.strip() == "":
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
# Check if Deno is available for sandbox operations
if not deno_available:
return """❌ Deno runtime not available. This is required for code execution in the sandbox.
πŸ“‹ Troubleshooting:
1. This usually happens on deployment platforms that don't have Deno pre-installed
2. The app attempted to install Deno automatically but failed
3. Try restarting the space or contact support
πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
# Use the new guided analysis approach
result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
return result
except Exception as e:
error_msg = str(e)
if "Deno" in error_msg or "deno" in error_msg:
return f"""❌ Deno-related error in analysis: {error_msg}
πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
Try restarting the application or contact support if this persists."""
else:
return f"❌ Error in guided analysis: {error_msg}"
async def analyze_uploaded_file():
"""Legacy function - kept for backward compatibility"""
return analyze_file_with_question("Provide a comprehensive analysis of this file.")
def run_file_analysis():
"""Wrapper to run async file analysis in sync context"""
return asyncio.run(analyze_uploaded_file())
# Create the Gradio interface
with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
gr.Markdown("# πŸ” DataForge - AI-Powered File Analysis")
gr.Markdown("""
Upload any file and ask specific questions for targeted AI analysis. Our guided approach:
1. πŸ“‹ **Examines** your file structure and patterns automatically
2. 🎯 **Generates** specific code guidance based on your question
3. πŸš€ **Executes** enhanced analysis with improved accuracy
**Simply upload a file and ask any question you want!**
""")
with gr.Row():
with gr.Column(scale=1):
# File Upload Section
gr.Markdown("### πŸ“€ File Upload")
file_upload = gr.File(
label="Upload File for Analysis",
type="filepath"
)
upload_status = gr.Textbox(
label="Upload Status",
value="No file uploaded",
interactive=False
)
# Question Section
gr.Markdown("### ❓ Ask Your Question")
user_question = gr.Textbox(
label="Your Question about the File",
placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
lines=4,
value=""
)
analyze_btn = gr.Button("πŸ” Run Guided Analysis", variant="primary", size="lg")
# Analysis Info
gr.Markdown("### ℹ️ How It Works")
gr.Markdown("""
**Guided Analysis Process:**
- 🎯 **Question-aware**: Code generation tailored to your specific question
- πŸ“‹ **Smart examination**: Automatically detects file structure and patterns
- πŸš€ **Dynamic optimization**: Creates targeted analysis approach
- βœ… **Higher accuracy**: Prevents common code generation errors
- πŸ”§ **Quality control**: Built-in validation to avoid syntax issues
""")
with gr.Column(scale=2):
analysis_output = gr.Textbox(
label="πŸ“Š Guided Analysis Results",
lines=25,
max_lines=35,
placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...",
interactive=False
)
# Event handlers
file_upload.change(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_status]
)
analyze_btn.click(
fn=streaming_analyze_file_with_question,
inputs=[user_question],
outputs=[analysis_output]
)
gr.Markdown("---")
gr.Markdown("## πŸ’‘ Example Questions by File Type")
with gr.Accordion("πŸ” Security Analysis Questions", open=False):
gr.Markdown("""
**For Log Files:**
- "Find any failed login attempts and suspicious IP addresses"
- "Identify potential security threats or anomalies"
- "Show me authentication errors and user access patterns"
- "Are there any brute force attacks or repeated failures?"
**For Access Logs:**
- "Detect unusual access patterns or potential intrusions"
- "Find requests with suspicious user agents or payloads"
- "Identify high-frequency requests from single IPs"
""")
with gr.Accordion("⚑ Performance Analysis Questions", open=False):
gr.Markdown("""
**For Application Logs:**
- "Which API endpoints are slowest and why?"
- "Find performance bottlenecks and response time issues"
- "Show me timeout errors and failed requests"
- "What are the peak usage times and load patterns?"
**For System Logs:**
- "Identify resource usage spikes and memory issues"
- "Find database query performance problems"
- "Show me error rates and system health indicators"
""")
with gr.Accordion("πŸ“ˆ Data Analysis Questions", open=False):
gr.Markdown("""
**For CSV/Data Files:**
- "Analyze data distribution and find statistical insights"
- "Identify outliers and anomalies in the dataset"
- "What correlations exist between different columns?"
- "Generate a comprehensive data quality report"
**For JSON Files:**
- "Parse the structure and extract key information"
- "Find patterns in nested data and relationships"
- "Summarize the main data points and values"
""")
if __name__ == "__main__":
print("Starting DataForge application...")
demo.launch()