Spaces:

Agents-MCP-Hackathon
/

DataForge

Running

DataForge / app.py

ai-puppy

Update app.py

c7ebfd3 6 days ago

7.26 kB

	import os
	import gradio as gr
	import asyncio
	import tempfile
	from dotenv import find_dotenv, load_dotenv
	from langchain.chat_models import init_chat_model
	# Simplified imports - focusing on file analysis

	# Import the CodeAct agent functionality
	from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

	# Import the new guided analysis functionality
	from graph import analyze_file_with_guidance_sync, guided_analysis_graph

	# Load environment variables
	load_dotenv(find_dotenv())

	# Initialize model for file analysis
	codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

	# Store uploaded file path globally
	uploaded_file_path = None

	# Chat functionality removed - focusing on file analysis

	def handle_file_upload(file):
	"""Handle file upload and store the path globally"""
	global uploaded_file_path
	try:
	if file is not None:
	# With type="filepath", Gradio returns the file path as a string
	uploaded_file_path = file
	filename = os.path.basename(file)
	return f"✅ File uploaded successfully: {filename}"
	else:
	uploaded_file_path = None
	return "❌ No file uploaded"
	except Exception as e:
	uploaded_file_path = None
	return f"❌ Upload error: {str(e)}"

	def analyze_file_with_question(user_question):
	"""
	Analyze the uploaded file using the new guided approach with user question
	"""
	global uploaded_file_path

	try:
	if not uploaded_file_path or not os.path.exists(uploaded_file_path):
	return "❌ No file uploaded or file not found. Please upload a file first."

	if not user_question or user_question.strip() == "":
	user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."

	# Use the new guided analysis approach
	result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
	return result

	except Exception as e:
	return f"❌ Error in guided analysis: {str(e)}"

	async def analyze_uploaded_file():
	"""Legacy function - kept for backward compatibility"""
	return analyze_file_with_question("Provide a comprehensive analysis of this file.")

	def run_file_analysis():
	"""Wrapper to run async file analysis in sync context"""
	return asyncio.run(analyze_uploaded_file())

	# Create the Gradio interface
	with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
	gr.Markdown("# 🔍 DataForge - AI-Powered File Analysis")
	gr.Markdown("""
	Upload any file and ask specific questions for targeted AI analysis. Our guided approach:

	1. 📋 Examines your file structure and patterns automatically
	2. 🎯 Generates specific code guidance based on your question
	3. 🚀 Executes enhanced analysis with improved accuracy

	Simply upload a file and ask any question you want!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# File Upload Section
	gr.Markdown("### 📤 File Upload")
	file_upload = gr.File(
	label="Upload File for Analysis",
	type="filepath"
	)
	upload_status = gr.Textbox(
	label="Upload Status",
	value="No file uploaded",
	interactive=False
	)

	# Question Section
	gr.Markdown("### ❓ Ask Your Question")
	user_question = gr.Textbox(
	label="Your Question about the File",
	placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
	lines=4,
	value=""
	)

	analyze_btn = gr.Button("🔍 Run Guided Analysis", variant="primary", size="lg")

	# Analysis Info
	gr.Markdown("### ℹ️ How It Works")
	gr.Markdown("""
	Guided Analysis Process:
	- 🎯 Question-aware: Code generation tailored to your specific question
	- 📋 Smart examination: Automatically detects file structure and patterns
	- 🚀 Dynamic optimization: Creates targeted analysis approach
	- ✅ Higher accuracy: Prevents common code generation errors
	- 🔧 Quality control: Built-in validation to avoid syntax issues
	""")

	with gr.Column(scale=2):
	analysis_output = gr.Textbox(
	label="📊 Guided Analysis Results",
	lines=25,
	max_lines=35,
	placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...",
	interactive=False
	)

	# Event handlers
	file_upload.change(
	fn=handle_file_upload,
	inputs=[file_upload],
	outputs=[upload_status]
	)

	analyze_btn.click(
	fn=analyze_file_with_question,
	inputs=[user_question],
	outputs=[analysis_output]
	)

	gr.Markdown("---")
	gr.Markdown("## 💡 Example Questions by File Type")

	with gr.Accordion("🔐 Security Analysis Questions", open=False):
	gr.Markdown("""
	For Log Files:
	- "Find any failed login attempts and suspicious IP addresses"
	- "Identify potential security threats or anomalies"
	- "Show me authentication errors and user access patterns"
	- "Are there any brute force attacks or repeated failures?"

	For Access Logs:
	- "Detect unusual access patterns or potential intrusions"
	- "Find requests with suspicious user agents or payloads"
	- "Identify high-frequency requests from single IPs"
	""")

	with gr.Accordion("⚡ Performance Analysis Questions", open=False):
	gr.Markdown("""
	For Application Logs:
	- "Which API endpoints are slowest and why?"
	- "Find performance bottlenecks and response time issues"
	- "Show me timeout errors and failed requests"
	- "What are the peak usage times and load patterns?"

	For System Logs:
	- "Identify resource usage spikes and memory issues"
	- "Find database query performance problems"
	- "Show me error rates and system health indicators"
	""")

	with gr.Accordion("📈 Data Analysis Questions", open=False):
	gr.Markdown("""
	For CSV/Data Files:
	- "Analyze data distribution and find statistical insights"
	- "Identify outliers and anomalies in the dataset"
	- "What correlations exist between different columns?"
	- "Generate a comprehensive data quality report"

	For JSON Files:
	- "Parse the structure and extract key information"
	- "Find patterns in nested data and relationships"
	- "Summarize the main data points and values"
	""")

	if __name__ == "__main__":
	print("Starting DataForge application...")
	demo.launch()