Spaces:

Agents-MCP-Hackathon
/

DataForge

Running

DataForge / app.py

ai-puppy

save

b2ca056 10 days ago

8.93 kB

	import os
	import gradio as gr
	import asyncio
	import tempfile
	from dotenv import find_dotenv, load_dotenv
	from langchain.chat_models import init_chat_model
	from langchain.schema import HumanMessage, SystemMessage
	from langgraph.prebuilt import create_react_agent
	from langsmith import traceable

	# Import the CodeAct agent functionality
	from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

	# Load environment variables
	load_dotenv(find_dotenv())

	# Initialize OpenAI model
	openai_model = init_chat_model(
	model="gpt-4.1-nano-2025-04-14",
	api_key=os.getenv("OPENAI_API_KEY"),
	)

	# Create the basic chat agent
	chat_agent = create_react_agent(openai_model, tools=[])

	# Initialize CodeAct model for file analysis
	codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

	# Store uploaded file path globally
	uploaded_file_path = None

	@traceable
	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	"""
	Main chat function that processes user input and returns AI response
	"""
	try:
	# Convert history to LangChain message format
	messages = [SystemMessage(content=system_message)]

	# Add conversation history
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append(HumanMessage(content=user_msg))
	if assistant_msg:
	messages.append(SystemMessage(content=assistant_msg))

	# Add current user message
	messages.append(HumanMessage(content=message))

	# Prepare input for the agent
	input_data = {"messages": messages}

	# Stream the response
	response_text = ""
	for chunk in chat_agent.stream(input_data, stream_mode="values"):
	if "messages" in chunk and chunk["messages"]:
	latest_message = chunk["messages"][-1]
	if hasattr(latest_message, 'content'):
	current_content = latest_message.content
	if current_content and len(current_content) > len(response_text):
	response_text = current_content
	yield response_text

	# Ensure we return something even if streaming doesn't work
	if not response_text:
	yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key."

	except Exception as e:
	yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly."

	def handle_file_upload(file):
	"""Handle file upload and store the path globally"""
	global uploaded_file_path
	if file is not None:
	uploaded_file_path = file.name
	return f"✅ File uploaded successfully: {os.path.basename(file.name)}"
	else:
	uploaded_file_path = None
	return "❌ No file uploaded"

	async def analyze_uploaded_file():
	"""Analyze the uploaded file using CodeAct agent"""
	global uploaded_file_path

	if not uploaded_file_path or not os.path.exists(uploaded_file_path):
	return "❌ No file uploaded or file not found. Please upload a file first."

	try:
	# Create sandbox with the uploaded file
	sandbox = FileInjectedPyodideSandbox(
	file_path=uploaded_file_path,
	virtual_path="/uploaded_file.log",
	sessions_dir=None, # Will create temp directory automatically
	allow_net=True
	)

	eval_fn = create_pyodide_eval_fn(sandbox)
	code_act = create_codeact(codeact_model, [], eval_fn)
	agent = code_act.compile()

	# Create analysis query based on file type
	file_ext = os.path.splitext(uploaded_file_path)[1].lower()

	if file_ext in ['.log', '.txt']:
	query = """
	Analyze this uploaded file and provide:
	1. Content Overview - What type of data/logs this file contains
	2. Key Patterns - Important patterns, trends, or anomalies found
	3. Statistical Summary - Basic statistics (line count, data distribution, etc.)
	4. Insights & Findings - Key takeaways from the analysis
	5. Recommendations - Suggested actions based on the analysis

	DATA SOURCES AVAILABLE:
	- `file_content`: Raw file content as a string
	- `log_lines`: List of individual lines
	- `total_lines`: Number of lines in the file
	- File path: `/uploaded_file.log` (can be read with open('/uploaded_file.log', 'r'))

	Generate Python code to analyze the file and provide comprehensive insights.
	"""
	else:
	query = f"""
	Analyze this uploaded {file_ext} file and provide:
	1. File Type Analysis - What type of file this is and its structure
	2. Content Summary - Overview of the file contents
	3. Key Information - Important data points or patterns found
	4. Statistical Analysis - Basic statistics and data distribution
	5. Recommendations - Suggested next steps or insights

	DATA SOURCES AVAILABLE:
	- `file_content`: Raw file content as a string
	- `log_lines`: List of individual lines
	- `total_lines`: Number of lines in the file
	- File path: `/uploaded_file.log`

	Generate Python code to analyze this file and provide comprehensive insights.
	"""

	# Run the analysis
	result_parts = []
	async for typ, chunk in agent.astream(
	{"messages": query},
	stream_mode=["values", "messages"],
	):
	if typ == "messages":
	result_parts.append(chunk[0].content)
	elif typ == "values":
	if chunk and "messages" in chunk:
	final_message = chunk["messages"][-1]
	if hasattr(final_message, 'content'):
	result_parts.append(f"\n\nFinal Analysis:\n{final_message.content}")

	return "\n".join(result_parts) if result_parts else "Analysis completed but no output generated."

	except Exception as e:
	return f"❌ Error analyzing file: {str(e)}"

	def run_file_analysis():
	"""Wrapper to run async file analysis in sync context"""
	return asyncio.run(analyze_uploaded_file())

	# Create the Gradio interface
	with gr.Blocks(title="DataForge - AI Assistant with File Analysis") as demo:
	gr.Markdown("# 🔍 DataForge - AI Assistant with File Analysis")
	gr.Markdown("Upload files for analysis or chat with the AI assistant.")

	with gr.Tab("💬 Chat Assistant"):
	chat_interface = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.",
	label="System message"
	),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	title="Chat with AI Assistant",
	description="Ask questions or get help with any topic."
	)

	with gr.Tab("📁 File Analysis"):
	gr.Markdown("## Upload and Analyze Files")
	gr.Markdown("Upload log files, text files, or other data files for comprehensive AI-powered analysis.")

	with gr.Row():
	with gr.Column(scale=1):
	file_upload = gr.File(
	label="Upload File for Analysis",
	file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
	type="filepath"
	)
	upload_status = gr.Textbox(
	label="Upload Status",
	value="No file uploaded",
	interactive=False
	)
	analyze_btn = gr.Button("🔍 Analyze File", variant="primary", size="lg")

	with gr.Column(scale=2):
	analysis_output = gr.Textbox(
	label="Analysis Results",
	lines=20,
	max_lines=30,
	placeholder="Upload a file and click 'Analyze File' to see detailed analysis results here...",
	interactive=False
	)

	# Event handlers
	file_upload.change(
	fn=handle_file_upload,
	inputs=[file_upload],
	outputs=[upload_status]
	)

	analyze_btn.click(
	fn=run_file_analysis,
	inputs=[],
	outputs=[analysis_output]
	)

	if __name__ == "__main__":
	demo.launch()