Spaces:

Agents-MCP-Hackathon
/

DataForge

Runtime error

File size: 12,559 Bytes

import os
import gradio as gr
import asyncio
import tempfile
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model
from langchain.schema import HumanMessage, SystemMessage
from langgraph.prebuilt import create_react_agent
from langsmith import traceable

# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph

# Load environment variables
load_dotenv(find_dotenv())

# Initialize OpenAI model
openai_model = init_chat_model(
    model="gpt-4.1-nano-2025-04-14",
    api_key=os.getenv("OPENAI_API_KEY"),
)

# Create the basic chat agent
chat_agent = create_react_agent(openai_model, tools=[])

# Initialize CodeAct model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

# Store uploaded file path globally
uploaded_file_path = None

@traceable
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    """
    Main chat function that processes user input and returns AI response
    """
    try:
        # Convert history to LangChain message format
        messages = [SystemMessage(content=system_message)]
        
        # Add conversation history
        for user_msg, assistant_msg in history:
            if user_msg:
                messages.append(HumanMessage(content=user_msg))
            if assistant_msg:
                messages.append(SystemMessage(content=assistant_msg))
        
        # Add current user message
        messages.append(HumanMessage(content=message))
        
        # Prepare input for the agent
        input_data = {"messages": messages}
        
        # Stream the response
        response_text = ""
        for chunk in chat_agent.stream(input_data, stream_mode="values"):
            if "messages" in chunk and chunk["messages"]:
                latest_message = chunk["messages"][-1]
                if hasattr(latest_message, 'content'):
                    current_content = latest_message.content
                    if current_content and len(current_content) > len(response_text):
                        response_text = current_content
                        yield response_text
        
        # Ensure we return something even if streaming doesn't work
        if not response_text:
            yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key."
            
    except Exception as e:
        yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly."

def handle_file_upload(file):
    """Handle file upload and store the path globally"""
    global uploaded_file_path
    if file is not None:
        uploaded_file_path = file.name
        return f"✅ File uploaded successfully: {os.path.basename(file.name)}"
    else:
        uploaded_file_path = None
        return "❌ No file uploaded"

def analyze_file_with_question(user_question):
    """
    Analyze the uploaded file using the new guided approach with user question
    """
    global uploaded_file_path
    
    if not uploaded_file_path or not os.path.exists(uploaded_file_path):
        return "❌ No file uploaded or file not found. Please upload a file first."
    
    if not user_question or user_question.strip() == "":
        user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
    
    try:
        # Use the new guided analysis approach
        result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
        return result
        
    except Exception as e:
        return f"❌ Error in guided analysis: {str(e)}"

def get_question_suggestions(file_path):
    """
    Generate suggested questions based on file type and structure
    """
    if not file_path or not os.path.exists(file_path):
        return []
    
    file_ext = os.path.splitext(file_path)[1].lower()
    base_suggestions = [
        "What are the main patterns in this file?",
        "Are there any security issues or anomalies?", 
        "Provide a statistical summary of the data",
        "What insights can you extract from this file?"
    ]
    
    if file_ext in ['.log', '.txt']:
        return [
            "Find any security threats or failed login attempts",
            "Identify performance bottlenecks and slow operations", 
            "What errors or warnings are present?",
            "Show me time-based trends in the data",
            "Are there any suspicious IP addresses or user activities?"
        ] + base_suggestions
    elif file_ext == '.csv':
        return [
            "Analyze the data distribution and statistics",
            "Find correlations between columns",
            "Identify outliers or anomalies in the data",
            "What are the key insights from this dataset?"
        ] + base_suggestions
    elif file_ext == '.json':
        return [
            "Parse and analyze the JSON structure",
            "What are the key data fields and their values?",
            "Find any nested patterns or relationships"
        ] + base_suggestions
    else:
        return base_suggestions

async def analyze_uploaded_file():
    """Legacy function - kept for backward compatibility"""
    return analyze_file_with_question("Provide a comprehensive analysis of this file.")

def run_file_analysis():
    """Wrapper to run async file analysis in sync context"""
    return asyncio.run(analyze_uploaded_file())

def update_question_suggestions():
    """Update question suggestions based on uploaded file"""
    global uploaded_file_path
    suggestions = get_question_suggestions(uploaded_file_path)
    return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "")

# Create the Gradio interface
with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo:
    gr.Markdown("# 🔍 DataForge - AI Assistant with Advanced File Analysis")
    gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.")
    
    with gr.Tab("💬 Chat Assistant"):
        chat_interface = gr.ChatInterface(
            respond,
            additional_inputs=[
                gr.Textbox(
                    value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.", 
                    label="System message"
                ),
                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p (nucleus sampling)",
                ),
            ],
            title="Chat with AI Assistant",
            description="Ask questions or get help with any topic."
        )
    
    with gr.Tab("📁 Advanced File Analysis"):
        gr.Markdown("## 🚀 Guided File Analysis with LangGraph")
        gr.Markdown("""
        Upload files and ask specific questions for targeted AI analysis. Our guided approach:
        
        1. 📋 **Examines** your file structure and patterns
        2. 🎯 **Generates** specific code guidance based on your question  
        3. 🚀 **Executes** enhanced analysis with improved accuracy
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                # File Upload Section
                gr.Markdown("### 📤 File Upload")
                file_upload = gr.File(
                    label="Upload File for Analysis",
                    file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
                    type="filepath"
                )
                upload_status = gr.Textbox(
                    label="Upload Status",
                    value="No file uploaded",
                    interactive=False
                )
                
                # Question Section
                gr.Markdown("### ❓ Ask Your Question")
                question_suggestions = gr.Dropdown(
                    label="Question Suggestions (select or type your own)",
                    choices=[],
                    allow_custom_value=True,
                    value=""
                )
                
                user_question = gr.Textbox(
                    label="Your Question about the File",
                    placeholder="What would you like to know about this file?",
                    lines=3
                )
                
                analyze_btn = gr.Button("🔍 Run Guided Analysis", variant="primary", size="lg")
                
                # Analysis Info
                gr.Markdown("### ℹ️ Analysis Method")
                gr.Markdown("""
                **Guided Analysis Features:**
                - 🎯 Question-aware code generation
                - 📋 File structure examination  
                - 🚀 Dynamic prompt optimization
                - ✅ Higher accuracy than generic analysis
                """)
            
            with gr.Column(scale=2):
                analysis_output = gr.Textbox(
                    label="📊 Guided Analysis Results",
                    lines=25,
                    max_lines=35,
                    placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...",
                    interactive=False
                )
        
        # Event handlers
        file_upload.change(
            fn=handle_file_upload,
            inputs=[file_upload],
            outputs=[upload_status]
        ).then(
            fn=update_question_suggestions,
            inputs=[],
            outputs=[question_suggestions]
        )
        
        question_suggestions.change(
            fn=lambda x: x,
            inputs=[question_suggestions],
            outputs=[user_question]
        )
        
        analyze_btn.click(
            fn=analyze_file_with_question,
            inputs=[user_question],
            outputs=[analysis_output]
        )

    with gr.Tab("📊 Analysis Examples"):
        gr.Markdown("## 💡 Example Questions by File Type")
        
        with gr.Accordion("🔐 Security Analysis Questions", open=False):
            gr.Markdown("""
            **For Log Files:**
            - "Find any failed login attempts and suspicious IP addresses"
            - "Identify potential security threats or anomalies"
            - "Show me authentication errors and user access patterns"
            - "Are there any brute force attacks or repeated failures?"
            
            **For Access Logs:**
            - "Detect unusual access patterns or potential intrusions"
            - "Find requests with suspicious user agents or payloads"
            - "Identify high-frequency requests from single IPs"
            """)
        
        with gr.Accordion("⚡ Performance Analysis Questions", open=False):
            gr.Markdown("""
            **For Application Logs:**
            - "Which API endpoints are slowest and why?"
            - "Find performance bottlenecks and response time issues"
            - "Show me timeout errors and failed requests"
            - "What are the peak usage times and load patterns?"
            
            **For System Logs:**
            - "Identify resource usage spikes and memory issues"
            - "Find database query performance problems"
            - "Show me error rates and system health indicators"
            """)
        
        with gr.Accordion("📈 Data Analysis Questions", open=False):
            gr.Markdown("""
            **For CSV/Data Files:**
            - "Analyze data distribution and find statistical insights"
            - "Identify outliers and anomalies in the dataset"
            - "What correlations exist between different columns?"
            - "Generate a comprehensive data quality report"
            
            **For JSON Files:**
            - "Parse the structure and extract key information"
            - "Find patterns in nested data and relationships"
            - "Summarize the main data points and values"
            """)

if __name__ == "__main__":
    demo.launch()