DataForge / app.py
ai-puppy
Update app.py
c7ebfd3
raw
history blame
7.26 kB
import os
import gradio as gr
import asyncio
import tempfile
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model
# Simplified imports - focusing on file analysis
# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
# Load environment variables
load_dotenv(find_dotenv())
# Initialize model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
# Store uploaded file path globally
uploaded_file_path = None
# Chat functionality removed - focusing on file analysis
def handle_file_upload(file):
"""Handle file upload and store the path globally"""
global uploaded_file_path
try:
if file is not None:
# With type="filepath", Gradio returns the file path as a string
uploaded_file_path = file
filename = os.path.basename(file)
return f"βœ… File uploaded successfully: {filename}"
else:
uploaded_file_path = None
return "❌ No file uploaded"
except Exception as e:
uploaded_file_path = None
return f"❌ Upload error: {str(e)}"
def analyze_file_with_question(user_question):
"""
Analyze the uploaded file using the new guided approach with user question
"""
global uploaded_file_path
try:
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
return "❌ No file uploaded or file not found. Please upload a file first."
if not user_question or user_question.strip() == "":
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
# Use the new guided analysis approach
result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
return result
except Exception as e:
return f"❌ Error in guided analysis: {str(e)}"
async def analyze_uploaded_file():
"""Legacy function - kept for backward compatibility"""
return analyze_file_with_question("Provide a comprehensive analysis of this file.")
def run_file_analysis():
"""Wrapper to run async file analysis in sync context"""
return asyncio.run(analyze_uploaded_file())
# Create the Gradio interface
with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
gr.Markdown("# πŸ” DataForge - AI-Powered File Analysis")
gr.Markdown("""
Upload any file and ask specific questions for targeted AI analysis. Our guided approach:
1. πŸ“‹ **Examines** your file structure and patterns automatically
2. 🎯 **Generates** specific code guidance based on your question
3. πŸš€ **Executes** enhanced analysis with improved accuracy
**Simply upload a file and ask any question you want!**
""")
with gr.Row():
with gr.Column(scale=1):
# File Upload Section
gr.Markdown("### πŸ“€ File Upload")
file_upload = gr.File(
label="Upload File for Analysis",
type="filepath"
)
upload_status = gr.Textbox(
label="Upload Status",
value="No file uploaded",
interactive=False
)
# Question Section
gr.Markdown("### ❓ Ask Your Question")
user_question = gr.Textbox(
label="Your Question about the File",
placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
lines=4,
value=""
)
analyze_btn = gr.Button("πŸ” Run Guided Analysis", variant="primary", size="lg")
# Analysis Info
gr.Markdown("### ℹ️ How It Works")
gr.Markdown("""
**Guided Analysis Process:**
- 🎯 **Question-aware**: Code generation tailored to your specific question
- πŸ“‹ **Smart examination**: Automatically detects file structure and patterns
- πŸš€ **Dynamic optimization**: Creates targeted analysis approach
- βœ… **Higher accuracy**: Prevents common code generation errors
- πŸ”§ **Quality control**: Built-in validation to avoid syntax issues
""")
with gr.Column(scale=2):
analysis_output = gr.Textbox(
label="πŸ“Š Guided Analysis Results",
lines=25,
max_lines=35,
placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...",
interactive=False
)
# Event handlers
file_upload.change(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_status]
)
analyze_btn.click(
fn=analyze_file_with_question,
inputs=[user_question],
outputs=[analysis_output]
)
gr.Markdown("---")
gr.Markdown("## πŸ’‘ Example Questions by File Type")
with gr.Accordion("πŸ” Security Analysis Questions", open=False):
gr.Markdown("""
**For Log Files:**
- "Find any failed login attempts and suspicious IP addresses"
- "Identify potential security threats or anomalies"
- "Show me authentication errors and user access patterns"
- "Are there any brute force attacks or repeated failures?"
**For Access Logs:**
- "Detect unusual access patterns or potential intrusions"
- "Find requests with suspicious user agents or payloads"
- "Identify high-frequency requests from single IPs"
""")
with gr.Accordion("⚑ Performance Analysis Questions", open=False):
gr.Markdown("""
**For Application Logs:**
- "Which API endpoints are slowest and why?"
- "Find performance bottlenecks and response time issues"
- "Show me timeout errors and failed requests"
- "What are the peak usage times and load patterns?"
**For System Logs:**
- "Identify resource usage spikes and memory issues"
- "Find database query performance problems"
- "Show me error rates and system health indicators"
""")
with gr.Accordion("πŸ“ˆ Data Analysis Questions", open=False):
gr.Markdown("""
**For CSV/Data Files:**
- "Analyze data distribution and find statistical insights"
- "Identify outliers and anomalies in the dataset"
- "What correlations exist between different columns?"
- "Generate a comprehensive data quality report"
**For JSON Files:**
- "Parse the structure and extract key information"
- "Find patterns in nested data and relationships"
- "Summarize the main data points and values"
""")
if __name__ == "__main__":
print("Starting DataForge application...")
demo.launch()