DataForge / app.py
ai-puppy
save
3774bab
raw
history blame
12.6 kB
import os
import gradio as gr
import asyncio
import tempfile
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model
from langchain.schema import HumanMessage, SystemMessage
from langgraph.prebuilt import create_react_agent
from langsmith import traceable
# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
# Load environment variables
load_dotenv(find_dotenv())
# Initialize OpenAI model
openai_model = init_chat_model(
model="gpt-4.1-nano-2025-04-14",
api_key=os.getenv("OPENAI_API_KEY"),
)
# Create the basic chat agent
chat_agent = create_react_agent(openai_model, tools=[])
# Initialize CodeAct model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
# Store uploaded file path globally
uploaded_file_path = None
@traceable
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
"""
Main chat function that processes user input and returns AI response
"""
try:
# Convert history to LangChain message format
messages = [SystemMessage(content=system_message)]
# Add conversation history
for user_msg, assistant_msg in history:
if user_msg:
messages.append(HumanMessage(content=user_msg))
if assistant_msg:
messages.append(SystemMessage(content=assistant_msg))
# Add current user message
messages.append(HumanMessage(content=message))
# Prepare input for the agent
input_data = {"messages": messages}
# Stream the response
response_text = ""
for chunk in chat_agent.stream(input_data, stream_mode="values"):
if "messages" in chunk and chunk["messages"]:
latest_message = chunk["messages"][-1]
if hasattr(latest_message, 'content'):
current_content = latest_message.content
if current_content and len(current_content) > len(response_text):
response_text = current_content
yield response_text
# Ensure we return something even if streaming doesn't work
if not response_text:
yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key."
except Exception as e:
yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly."
def handle_file_upload(file):
"""Handle file upload and store the path globally"""
global uploaded_file_path
if file is not None:
uploaded_file_path = file.name
return f"βœ… File uploaded successfully: {os.path.basename(file.name)}"
else:
uploaded_file_path = None
return "❌ No file uploaded"
def analyze_file_with_question(user_question):
"""
Analyze the uploaded file using the new guided approach with user question
"""
global uploaded_file_path
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
return "❌ No file uploaded or file not found. Please upload a file first."
if not user_question or user_question.strip() == "":
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
try:
# Use the new guided analysis approach
result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
return result
except Exception as e:
return f"❌ Error in guided analysis: {str(e)}"
def get_question_suggestions(file_path):
"""
Generate suggested questions based on file type and structure
"""
if not file_path or not os.path.exists(file_path):
return []
file_ext = os.path.splitext(file_path)[1].lower()
base_suggestions = [
"What are the main patterns in this file?",
"Are there any security issues or anomalies?",
"Provide a statistical summary of the data",
"What insights can you extract from this file?"
]
if file_ext in ['.log', '.txt']:
return [
"Find any security threats or failed login attempts",
"Identify performance bottlenecks and slow operations",
"What errors or warnings are present?",
"Show me time-based trends in the data",
"Are there any suspicious IP addresses or user activities?"
] + base_suggestions
elif file_ext == '.csv':
return [
"Analyze the data distribution and statistics",
"Find correlations between columns",
"Identify outliers or anomalies in the data",
"What are the key insights from this dataset?"
] + base_suggestions
elif file_ext == '.json':
return [
"Parse and analyze the JSON structure",
"What are the key data fields and their values?",
"Find any nested patterns or relationships"
] + base_suggestions
else:
return base_suggestions
async def analyze_uploaded_file():
"""Legacy function - kept for backward compatibility"""
return analyze_file_with_question("Provide a comprehensive analysis of this file.")
def run_file_analysis():
"""Wrapper to run async file analysis in sync context"""
return asyncio.run(analyze_uploaded_file())
def update_question_suggestions():
"""Update question suggestions based on uploaded file"""
global uploaded_file_path
suggestions = get_question_suggestions(uploaded_file_path)
return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "")
# Create the Gradio interface
with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo:
gr.Markdown("# πŸ” DataForge - AI Assistant with Advanced File Analysis")
gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.")
with gr.Tab("πŸ’¬ Chat Assistant"):
chat_interface = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.",
label="System message"
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
title="Chat with AI Assistant",
description="Ask questions or get help with any topic."
)
with gr.Tab("πŸ“ Advanced File Analysis"):
gr.Markdown("## πŸš€ Guided File Analysis with LangGraph")
gr.Markdown("""
Upload files and ask specific questions for targeted AI analysis. Our guided approach:
1. πŸ“‹ **Examines** your file structure and patterns
2. 🎯 **Generates** specific code guidance based on your question
3. πŸš€ **Executes** enhanced analysis with improved accuracy
""")
with gr.Row():
with gr.Column(scale=1):
# File Upload Section
gr.Markdown("### πŸ“€ File Upload")
file_upload = gr.File(
label="Upload File for Analysis",
file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
type="filepath"
)
upload_status = gr.Textbox(
label="Upload Status",
value="No file uploaded",
interactive=False
)
# Question Section
gr.Markdown("### ❓ Ask Your Question")
question_suggestions = gr.Dropdown(
label="Question Suggestions (select or type your own)",
choices=[],
allow_custom_value=True,
value=""
)
user_question = gr.Textbox(
label="Your Question about the File",
placeholder="What would you like to know about this file?",
lines=3
)
analyze_btn = gr.Button("πŸ” Run Guided Analysis", variant="primary", size="lg")
# Analysis Info
gr.Markdown("### ℹ️ Analysis Method")
gr.Markdown("""
**Guided Analysis Features:**
- 🎯 Question-aware code generation
- πŸ“‹ File structure examination
- πŸš€ Dynamic prompt optimization
- βœ… Higher accuracy than generic analysis
""")
with gr.Column(scale=2):
analysis_output = gr.Textbox(
label="πŸ“Š Guided Analysis Results",
lines=25,
max_lines=35,
placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...",
interactive=False
)
# Event handlers
file_upload.change(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_status]
).then(
fn=update_question_suggestions,
inputs=[],
outputs=[question_suggestions]
)
question_suggestions.change(
fn=lambda x: x,
inputs=[question_suggestions],
outputs=[user_question]
)
analyze_btn.click(
fn=analyze_file_with_question,
inputs=[user_question],
outputs=[analysis_output]
)
with gr.Tab("πŸ“Š Analysis Examples"):
gr.Markdown("## πŸ’‘ Example Questions by File Type")
with gr.Accordion("πŸ” Security Analysis Questions", open=False):
gr.Markdown("""
**For Log Files:**
- "Find any failed login attempts and suspicious IP addresses"
- "Identify potential security threats or anomalies"
- "Show me authentication errors and user access patterns"
- "Are there any brute force attacks or repeated failures?"
**For Access Logs:**
- "Detect unusual access patterns or potential intrusions"
- "Find requests with suspicious user agents or payloads"
- "Identify high-frequency requests from single IPs"
""")
with gr.Accordion("⚑ Performance Analysis Questions", open=False):
gr.Markdown("""
**For Application Logs:**
- "Which API endpoints are slowest and why?"
- "Find performance bottlenecks and response time issues"
- "Show me timeout errors and failed requests"
- "What are the peak usage times and load patterns?"
**For System Logs:**
- "Identify resource usage spikes and memory issues"
- "Find database query performance problems"
- "Show me error rates and system health indicators"
""")
with gr.Accordion("πŸ“ˆ Data Analysis Questions", open=False):
gr.Markdown("""
**For CSV/Data Files:**
- "Analyze data distribution and find statistical insights"
- "Identify outliers and anomalies in the dataset"
- "What correlations exist between different columns?"
- "Generate a comprehensive data quality report"
**For JSON Files:**
- "Parse the structure and extract key information"
- "Find patterns in nested data and relationships"
- "Summarize the main data points and values"
""")
if __name__ == "__main__":
demo.launch()