Spaces:

Agents-MCP-Hackathon
/

DataForge

Runtime error

DataForge / app.py

ai-puppy

save

3774bab 20 days ago

12.6 kB

	import os
	import gradio as gr
	import asyncio
	import tempfile
	from dotenv import find_dotenv, load_dotenv
	from langchain.chat_models import init_chat_model
	from langchain.schema import HumanMessage, SystemMessage
	from langgraph.prebuilt import create_react_agent
	from langsmith import traceable

	# Import the CodeAct agent functionality
	from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

	# Import the new guided analysis functionality
	from graph import analyze_file_with_guidance_sync, guided_analysis_graph

	# Load environment variables
	load_dotenv(find_dotenv())

	# Initialize OpenAI model
	openai_model = init_chat_model(
	model="gpt-4.1-nano-2025-04-14",
	api_key=os.getenv("OPENAI_API_KEY"),
	)

	# Create the basic chat agent
	chat_agent = create_react_agent(openai_model, tools=[])

	# Initialize CodeAct model for file analysis
	codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

	# Store uploaded file path globally
	uploaded_file_path = None

	@traceable
	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	"""
	Main chat function that processes user input and returns AI response
	"""
	try:
	# Convert history to LangChain message format
	messages = [SystemMessage(content=system_message)]

	# Add conversation history
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append(HumanMessage(content=user_msg))
	if assistant_msg:
	messages.append(SystemMessage(content=assistant_msg))

	# Add current user message
	messages.append(HumanMessage(content=message))

	# Prepare input for the agent
	input_data = {"messages": messages}

	# Stream the response
	response_text = ""
	for chunk in chat_agent.stream(input_data, stream_mode="values"):
	if "messages" in chunk and chunk["messages"]:
	latest_message = chunk["messages"][-1]
	if hasattr(latest_message, 'content'):
	current_content = latest_message.content
	if current_content and len(current_content) > len(response_text):
	response_text = current_content
	yield response_text

	# Ensure we return something even if streaming doesn't work
	if not response_text:
	yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key."

	except Exception as e:
	yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly."

	def handle_file_upload(file):
	"""Handle file upload and store the path globally"""
	global uploaded_file_path
	if file is not None:
	uploaded_file_path = file.name
	return f"✅ File uploaded successfully: {os.path.basename(file.name)}"
	else:
	uploaded_file_path = None
	return "❌ No file uploaded"

	def analyze_file_with_question(user_question):
	"""
	Analyze the uploaded file using the new guided approach with user question
	"""
	global uploaded_file_path

	if not uploaded_file_path or not os.path.exists(uploaded_file_path):
	return "❌ No file uploaded or file not found. Please upload a file first."

	if not user_question or user_question.strip() == "":
	user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."

	try:
	# Use the new guided analysis approach
	result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
	return result

	except Exception as e:
	return f"❌ Error in guided analysis: {str(e)}"

	def get_question_suggestions(file_path):
	"""
	Generate suggested questions based on file type and structure
	"""
	if not file_path or not os.path.exists(file_path):
	return []

	file_ext = os.path.splitext(file_path)[1].lower()
	base_suggestions = [
	"What are the main patterns in this file?",
	"Are there any security issues or anomalies?",
	"Provide a statistical summary of the data",
	"What insights can you extract from this file?"
	]

	if file_ext in ['.log', '.txt']:
	return [
	"Find any security threats or failed login attempts",
	"Identify performance bottlenecks and slow operations",
	"What errors or warnings are present?",
	"Show me time-based trends in the data",
	"Are there any suspicious IP addresses or user activities?"
	] + base_suggestions
	elif file_ext == '.csv':
	return [
	"Analyze the data distribution and statistics",
	"Find correlations between columns",
	"Identify outliers or anomalies in the data",
	"What are the key insights from this dataset?"
	] + base_suggestions
	elif file_ext == '.json':
	return [
	"Parse and analyze the JSON structure",
	"What are the key data fields and their values?",
	"Find any nested patterns or relationships"
	] + base_suggestions
	else:
	return base_suggestions

	async def analyze_uploaded_file():
	"""Legacy function - kept for backward compatibility"""
	return analyze_file_with_question("Provide a comprehensive analysis of this file.")

	def run_file_analysis():
	"""Wrapper to run async file analysis in sync context"""
	return asyncio.run(analyze_uploaded_file())

	def update_question_suggestions():
	"""Update question suggestions based on uploaded file"""
	global uploaded_file_path
	suggestions = get_question_suggestions(uploaded_file_path)
	return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "")

	# Create the Gradio interface
	with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo:
	gr.Markdown("# 🔍 DataForge - AI Assistant with Advanced File Analysis")
	gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.")

	with gr.Tab("💬 Chat Assistant"):
	chat_interface = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.",
	label="System message"
	),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	title="Chat with AI Assistant",
	description="Ask questions or get help with any topic."
	)

	with gr.Tab("📁 Advanced File Analysis"):
	gr.Markdown("## 🚀 Guided File Analysis with LangGraph")
	gr.Markdown("""
	Upload files and ask specific questions for targeted AI analysis. Our guided approach:

	1. 📋 Examines your file structure and patterns
	2. 🎯 Generates specific code guidance based on your question
	3. 🚀 Executes enhanced analysis with improved accuracy
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# File Upload Section
	gr.Markdown("### 📤 File Upload")
	file_upload = gr.File(
	label="Upload File for Analysis",
	file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
	type="filepath"
	)
	upload_status = gr.Textbox(
	label="Upload Status",
	value="No file uploaded",
	interactive=False
	)

	# Question Section
	gr.Markdown("### ❓ Ask Your Question")
	question_suggestions = gr.Dropdown(
	label="Question Suggestions (select or type your own)",
	choices=[],
	allow_custom_value=True,
	value=""
	)

	user_question = gr.Textbox(
	label="Your Question about the File",
	placeholder="What would you like to know about this file?",
	lines=3
	)

	analyze_btn = gr.Button("🔍 Run Guided Analysis", variant="primary", size="lg")

	# Analysis Info
	gr.Markdown("### ℹ️ Analysis Method")
	gr.Markdown("""
	Guided Analysis Features:
	- 🎯 Question-aware code generation
	- 📋 File structure examination
	- 🚀 Dynamic prompt optimization
	- ✅ Higher accuracy than generic analysis
	""")

	with gr.Column(scale=2):
	analysis_output = gr.Textbox(
	label="📊 Guided Analysis Results",
	lines=25,
	max_lines=35,
	placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...",
	interactive=False
	)

	# Event handlers
	file_upload.change(
	fn=handle_file_upload,
	inputs=[file_upload],
	outputs=[upload_status]
	).then(
	fn=update_question_suggestions,
	inputs=[],
	outputs=[question_suggestions]
	)

	question_suggestions.change(
	fn=lambda x: x,
	inputs=[question_suggestions],
	outputs=[user_question]
	)

	analyze_btn.click(
	fn=analyze_file_with_question,
	inputs=[user_question],
	outputs=[analysis_output]
	)

	with gr.Tab("📊 Analysis Examples"):
	gr.Markdown("## 💡 Example Questions by File Type")

	with gr.Accordion("🔐 Security Analysis Questions", open=False):
	gr.Markdown("""
	For Log Files:
	- "Find any failed login attempts and suspicious IP addresses"
	- "Identify potential security threats or anomalies"
	- "Show me authentication errors and user access patterns"
	- "Are there any brute force attacks or repeated failures?"

	For Access Logs:
	- "Detect unusual access patterns or potential intrusions"
	- "Find requests with suspicious user agents or payloads"
	- "Identify high-frequency requests from single IPs"
	""")

	with gr.Accordion("⚡ Performance Analysis Questions", open=False):
	gr.Markdown("""
	For Application Logs:
	- "Which API endpoints are slowest and why?"
	- "Find performance bottlenecks and response time issues"
	- "Show me timeout errors and failed requests"
	- "What are the peak usage times and load patterns?"

	For System Logs:
	- "Identify resource usage spikes and memory issues"
	- "Find database query performance problems"
	- "Show me error rates and system health indicators"
	""")

	with gr.Accordion("📈 Data Analysis Questions", open=False):
	gr.Markdown("""
	For CSV/Data Files:
	- "Analyze data distribution and find statistical insights"
	- "Identify outliers and anomalies in the dataset"
	- "What correlations exist between different columns?"
	- "Generate a comprehensive data quality report"

	For JSON Files:
	- "Parse the structure and extract key information"
	- "Find patterns in nested data and relationships"
	- "Summarize the main data points and values"
	""")

	if __name__ == "__main__":
	demo.launch()