Spaces:

Agents-MCP-Hackathon
/

DataForge

Runtime error

DataForge / app.py

ai-puppy

save

bb43287 19 days ago

11.4 kB

	import os
	import gradio as gr
	import asyncio
	import tempfile
	import subprocess
	import shutil
	from dotenv import find_dotenv, load_dotenv
	from langchain.chat_models import init_chat_model

	# Auto-install Deno if not found (for Hugging Face Spaces)
	def ensure_deno_installed():
	"""Install Deno if not already installed (for Hugging Face Spaces compatibility)"""
	try:
	# Check if Deno is already installed
	result = subprocess.run(['deno', '--version'], capture_output=True, text=True)
	if result.returncode == 0:
	print(f"✅ Deno already installed: {result.stdout.split()[1]}")
	return True
	except FileNotFoundError:
	pass

	print("🔧 Deno not found. Installing Deno for PyodideSandbox...")

	try:
	# Install Deno using the official installer
	install_cmd = "curl -fsSL https://deno.land/install.sh \| sh"
	result = subprocess.run(install_cmd, shell=True, capture_output=True, text=True)

	if result.returncode == 0:
	# Add Deno to PATH
	deno_path = os.path.expanduser("~/.deno/bin")
	if deno_path not in os.environ.get("PATH", ""):
	os.environ["PATH"] = f"{deno_path}:{os.environ.get('PATH', '')}"

	print("✅ Deno installed successfully!")
	return True
	else:
	print(f"❌ Deno installation failed: {result.stderr}")
	return False

	except Exception as e:
	print(f"❌ Error installing Deno: {e}")
	return False

	# Install Deno before importing sandbox dependencies
	print("🔍 Checking Deno installation...")
	deno_available = ensure_deno_installed()

	# Import the CodeAct agent functionality
	from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

	# Import the new guided analysis functionality
	from graph import analyze_file_with_guidance_sync, guided_analysis_graph
	from graph_streaming import streaming_analyze_file_with_guidance

	# Load environment variables
	load_dotenv(find_dotenv())

	# Initialize model for file analysis
	codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

	# Store uploaded file path globally
	uploaded_file_path = None

	# Chat functionality removed - focusing on file analysis

	def handle_file_upload(file):
	"""Handle file upload and store the path globally"""
	global uploaded_file_path
	try:
	if file is not None:
	# With type="filepath", Gradio returns the file path as a string
	uploaded_file_path = file
	filename = os.path.basename(file)
	return f"✅ File uploaded successfully: {filename}"
	else:
	uploaded_file_path = None
	return "❌ No file uploaded"
	except Exception as e:
	uploaded_file_path = None
	return f"❌ Upload error: {str(e)}"

	def streaming_analyze_file_with_question(user_question):
	"""
	Streaming version that yields progress updates in real-time
	"""
	global uploaded_file_path, deno_available

	try:
	if not uploaded_file_path or not os.path.exists(uploaded_file_path):
	yield "❌ No file uploaded or file not found. Please upload a file first."
	return

	if not user_question or user_question.strip() == "":
	user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."

	# Check if Deno is available for sandbox operations
	if not deno_available:
	yield """❌ Deno runtime not available. This is required for code execution in the sandbox.

	📋 Troubleshooting:
	1. This usually happens on deployment platforms that don't have Deno pre-installed
	2. The app attempted to install Deno automatically but failed
	3. Try restarting the space or contact support

	🔄 Alternative: You can still upload files, but advanced code analysis may be limited."""
	return

	# Use the streaming guided analysis approach
	for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question):
	yield chunk

	except Exception as e:
	error_msg = str(e)
	if "Deno" in error_msg or "deno" in error_msg:
	yield f"""❌ Deno-related error in analysis: {error_msg}

	🔧 This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
	Try restarting the application or contact support if this persists."""
	else:
	yield f"❌ Error in guided analysis: {error_msg}"

	def analyze_file_with_question(user_question):
	"""
	Non-streaming version for backward compatibility
	"""
	global uploaded_file_path, deno_available

	try:
	if not uploaded_file_path or not os.path.exists(uploaded_file_path):
	return "❌ No file uploaded or file not found. Please upload a file first."

	if not user_question or user_question.strip() == "":
	user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."

	# Check if Deno is available for sandbox operations
	if not deno_available:
	return """❌ Deno runtime not available. This is required for code execution in the sandbox.

	📋 Troubleshooting:
	1. This usually happens on deployment platforms that don't have Deno pre-installed
	2. The app attempted to install Deno automatically but failed
	3. Try restarting the space or contact support

	🔄 Alternative: You can still upload files, but advanced code analysis may be limited."""

	# Use the new guided analysis approach
	result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
	return result

	except Exception as e:
	error_msg = str(e)
	if "Deno" in error_msg or "deno" in error_msg:
	return f"""❌ Deno-related error in analysis: {error_msg}

	🔧 This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
	Try restarting the application or contact support if this persists."""
	else:
	return f"❌ Error in guided analysis: {error_msg}"

	async def analyze_uploaded_file():
	"""Legacy function - kept for backward compatibility"""
	return analyze_file_with_question("Provide a comprehensive analysis of this file.")

	def run_file_analysis():
	"""Wrapper to run async file analysis in sync context"""
	return asyncio.run(analyze_uploaded_file())

	# Create the Gradio interface
	with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
	gr.Markdown("# 🔍 DataForge - AI-Powered File Analysis")
	gr.Markdown("""
	Upload any file and ask specific questions for targeted AI analysis. Our guided approach:

	1. 📋 Examines your file structure and patterns automatically
	2. 🎯 Generates specific code guidance based on your question
	3. 🚀 Executes enhanced analysis with improved accuracy

	Simply upload a file and ask any question you want!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# File Upload Section
	gr.Markdown("### 📤 File Upload")
	file_upload = gr.File(
	label="Upload File for Analysis",
	type="filepath"
	)
	upload_status = gr.Textbox(
	label="Upload Status",
	value="No file uploaded",
	interactive=False
	)

	# Question Section
	gr.Markdown("### ❓ Ask Your Question")
	user_question = gr.Textbox(
	label="Your Question about the File",
	placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
	lines=4,
	value=""
	)

	analyze_btn = gr.Button("🔍 Run Guided Analysis", variant="primary", size="lg")

	# Analysis Info
	gr.Markdown("### ℹ️ How It Works")
	gr.Markdown("""
	Guided Analysis Process:
	- 🎯 Question-aware: Code generation tailored to your specific question
	- 📋 Smart examination: Automatically detects file structure and patterns
	- 🚀 Dynamic optimization: Creates targeted analysis approach
	- ✅ Higher accuracy: Prevents common code generation errors
	- 🔧 Quality control: Built-in validation to avoid syntax issues
	""")

	with gr.Column(scale=2):
	analysis_output = gr.Textbox(
	label="📊 Guided Analysis Results",
	lines=25,
	max_lines=35,
	placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...",
	interactive=False
	)

	# Event handlers
	file_upload.change(
	fn=handle_file_upload,
	inputs=[file_upload],
	outputs=[upload_status]
	)

	analyze_btn.click(
	fn=streaming_analyze_file_with_question,
	inputs=[user_question],
	outputs=[analysis_output]
	)

	gr.Markdown("---")
	gr.Markdown("## 💡 Example Questions by File Type")

	with gr.Accordion("🔐 Security Analysis Questions", open=False):
	gr.Markdown("""
	For Log Files:
	- "Find any failed login attempts and suspicious IP addresses"
	- "Identify potential security threats or anomalies"
	- "Show me authentication errors and user access patterns"
	- "Are there any brute force attacks or repeated failures?"

	For Access Logs:
	- "Detect unusual access patterns or potential intrusions"
	- "Find requests with suspicious user agents or payloads"
	- "Identify high-frequency requests from single IPs"
	""")

	with gr.Accordion("⚡ Performance Analysis Questions", open=False):
	gr.Markdown("""
	For Application Logs:
	- "Which API endpoints are slowest and why?"
	- "Find performance bottlenecks and response time issues"
	- "Show me timeout errors and failed requests"
	- "What are the peak usage times and load patterns?"

	For System Logs:
	- "Identify resource usage spikes and memory issues"
	- "Find database query performance problems"
	- "Show me error rates and system health indicators"
	""")

	with gr.Accordion("📈 Data Analysis Questions", open=False):
	gr.Markdown("""
	For CSV/Data Files:
	- "Analyze data distribution and find statistical insights"
	- "Identify outliers and anomalies in the dataset"
	- "What correlations exist between different columns?"
	- "Generate a comprehensive data quality report"

	For JSON Files:
	- "Parse the structure and extract key information"
	- "Find patterns in nested data and relationships"
	- "Summarize the main data points and values"
	""")

	if __name__ == "__main__":
	print("Starting DataForge application...")
	demo.launch()