Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import asyncio | |
import tempfile | |
from dotenv import find_dotenv, load_dotenv | |
from langchain.chat_models import init_chat_model | |
from langchain.schema import HumanMessage, SystemMessage | |
from langgraph.prebuilt import create_react_agent | |
from langsmith import traceable | |
# Import the CodeAct agent functionality | |
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact | |
# Import the new guided analysis functionality | |
from graph import analyze_file_with_guidance_sync, guided_analysis_graph | |
# Load environment variables | |
load_dotenv(find_dotenv()) | |
# Initialize OpenAI model | |
openai_model = init_chat_model( | |
model="gpt-4.1-nano-2025-04-14", | |
api_key=os.getenv("OPENAI_API_KEY"), | |
) | |
# Create the basic chat agent | |
chat_agent = create_react_agent(openai_model, tools=[]) | |
# Initialize CodeAct model for file analysis | |
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai") | |
# Store uploaded file path globally | |
uploaded_file_path = None | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
""" | |
Main chat function that processes user input and returns AI response | |
""" | |
try: | |
# Convert history to LangChain message format | |
messages = [SystemMessage(content=system_message)] | |
# Add conversation history | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append(HumanMessage(content=user_msg)) | |
if assistant_msg: | |
messages.append(SystemMessage(content=assistant_msg)) | |
# Add current user message | |
messages.append(HumanMessage(content=message)) | |
# Prepare input for the agent | |
input_data = {"messages": messages} | |
# Stream the response | |
response_text = "" | |
for chunk in chat_agent.stream(input_data, stream_mode="values"): | |
if "messages" in chunk and chunk["messages"]: | |
latest_message = chunk["messages"][-1] | |
if hasattr(latest_message, 'content'): | |
current_content = latest_message.content | |
if current_content and len(current_content) > len(response_text): | |
response_text = current_content | |
yield response_text | |
# Ensure we return something even if streaming doesn't work | |
if not response_text: | |
yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key." | |
except Exception as e: | |
yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly." | |
def handle_file_upload(file): | |
"""Handle file upload and store the path globally""" | |
global uploaded_file_path | |
if file is not None: | |
uploaded_file_path = file.name | |
return f"β File uploaded successfully: {os.path.basename(file.name)}" | |
else: | |
uploaded_file_path = None | |
return "β No file uploaded" | |
def analyze_file_with_question(user_question): | |
""" | |
Analyze the uploaded file using the new guided approach with user question | |
""" | |
global uploaded_file_path | |
if not uploaded_file_path or not os.path.exists(uploaded_file_path): | |
return "β No file uploaded or file not found. Please upload a file first." | |
if not user_question or user_question.strip() == "": | |
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights." | |
try: | |
# Use the new guided analysis approach | |
result = analyze_file_with_guidance_sync(uploaded_file_path, user_question) | |
return result | |
except Exception as e: | |
return f"β Error in guided analysis: {str(e)}" | |
def get_question_suggestions(file_path): | |
""" | |
Generate suggested questions based on file type and structure | |
""" | |
if not file_path or not os.path.exists(file_path): | |
return [] | |
file_ext = os.path.splitext(file_path)[1].lower() | |
base_suggestions = [ | |
"What are the main patterns in this file?", | |
"Are there any security issues or anomalies?", | |
"Provide a statistical summary of the data", | |
"What insights can you extract from this file?" | |
] | |
if file_ext in ['.log', '.txt']: | |
return [ | |
"Find any security threats or failed login attempts", | |
"Identify performance bottlenecks and slow operations", | |
"What errors or warnings are present?", | |
"Show me time-based trends in the data", | |
"Are there any suspicious IP addresses or user activities?" | |
] + base_suggestions | |
elif file_ext == '.csv': | |
return [ | |
"Analyze the data distribution and statistics", | |
"Find correlations between columns", | |
"Identify outliers or anomalies in the data", | |
"What are the key insights from this dataset?" | |
] + base_suggestions | |
elif file_ext == '.json': | |
return [ | |
"Parse and analyze the JSON structure", | |
"What are the key data fields and their values?", | |
"Find any nested patterns or relationships" | |
] + base_suggestions | |
else: | |
return base_suggestions | |
async def analyze_uploaded_file(): | |
"""Legacy function - kept for backward compatibility""" | |
return analyze_file_with_question("Provide a comprehensive analysis of this file.") | |
def run_file_analysis(): | |
"""Wrapper to run async file analysis in sync context""" | |
return asyncio.run(analyze_uploaded_file()) | |
def update_question_suggestions(): | |
"""Update question suggestions based on uploaded file""" | |
global uploaded_file_path | |
suggestions = get_question_suggestions(uploaded_file_path) | |
return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "") | |
# Create the Gradio interface | |
with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo: | |
gr.Markdown("# π DataForge - AI Assistant with Advanced File Analysis") | |
gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.") | |
with gr.Tab("π¬ Chat Assistant"): | |
chat_interface = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox( | |
value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.", | |
label="System message" | |
), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p (nucleus sampling)", | |
), | |
], | |
title="Chat with AI Assistant", | |
description="Ask questions or get help with any topic." | |
) | |
with gr.Tab("π Advanced File Analysis"): | |
gr.Markdown("## π Guided File Analysis with LangGraph") | |
gr.Markdown(""" | |
Upload files and ask specific questions for targeted AI analysis. Our guided approach: | |
1. π **Examines** your file structure and patterns | |
2. π― **Generates** specific code guidance based on your question | |
3. π **Executes** enhanced analysis with improved accuracy | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
# File Upload Section | |
gr.Markdown("### π€ File Upload") | |
file_upload = gr.File( | |
label="Upload File for Analysis", | |
file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"], | |
type="filepath" | |
) | |
upload_status = gr.Textbox( | |
label="Upload Status", | |
value="No file uploaded", | |
interactive=False | |
) | |
# Question Section | |
gr.Markdown("### β Ask Your Question") | |
question_suggestions = gr.Dropdown( | |
label="Question Suggestions (select or type your own)", | |
choices=[], | |
allow_custom_value=True, | |
value="" | |
) | |
user_question = gr.Textbox( | |
label="Your Question about the File", | |
placeholder="What would you like to know about this file?", | |
lines=3 | |
) | |
analyze_btn = gr.Button("π Run Guided Analysis", variant="primary", size="lg") | |
# Analysis Info | |
gr.Markdown("### βΉοΈ Analysis Method") | |
gr.Markdown(""" | |
**Guided Analysis Features:** | |
- π― Question-aware code generation | |
- π File structure examination | |
- π Dynamic prompt optimization | |
- β Higher accuracy than generic analysis | |
""") | |
with gr.Column(scale=2): | |
analysis_output = gr.Textbox( | |
label="π Guided Analysis Results", | |
lines=25, | |
max_lines=35, | |
placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...", | |
interactive=False | |
) | |
# Event handlers | |
file_upload.change( | |
fn=handle_file_upload, | |
inputs=[file_upload], | |
outputs=[upload_status] | |
).then( | |
fn=update_question_suggestions, | |
inputs=[], | |
outputs=[question_suggestions] | |
) | |
question_suggestions.change( | |
fn=lambda x: x, | |
inputs=[question_suggestions], | |
outputs=[user_question] | |
) | |
analyze_btn.click( | |
fn=analyze_file_with_question, | |
inputs=[user_question], | |
outputs=[analysis_output] | |
) | |
with gr.Tab("π Analysis Examples"): | |
gr.Markdown("## π‘ Example Questions by File Type") | |
with gr.Accordion("π Security Analysis Questions", open=False): | |
gr.Markdown(""" | |
**For Log Files:** | |
- "Find any failed login attempts and suspicious IP addresses" | |
- "Identify potential security threats or anomalies" | |
- "Show me authentication errors and user access patterns" | |
- "Are there any brute force attacks or repeated failures?" | |
**For Access Logs:** | |
- "Detect unusual access patterns or potential intrusions" | |
- "Find requests with suspicious user agents or payloads" | |
- "Identify high-frequency requests from single IPs" | |
""") | |
with gr.Accordion("β‘ Performance Analysis Questions", open=False): | |
gr.Markdown(""" | |
**For Application Logs:** | |
- "Which API endpoints are slowest and why?" | |
- "Find performance bottlenecks and response time issues" | |
- "Show me timeout errors and failed requests" | |
- "What are the peak usage times and load patterns?" | |
**For System Logs:** | |
- "Identify resource usage spikes and memory issues" | |
- "Find database query performance problems" | |
- "Show me error rates and system health indicators" | |
""") | |
with gr.Accordion("π Data Analysis Questions", open=False): | |
gr.Markdown(""" | |
**For CSV/Data Files:** | |
- "Analyze data distribution and find statistical insights" | |
- "Identify outliers and anomalies in the dataset" | |
- "What correlations exist between different columns?" | |
- "Generate a comprehensive data quality report" | |
**For JSON Files:** | |
- "Parse the structure and extract key information" | |
- "Find patterns in nested data and relationships" | |
- "Summarize the main data points and values" | |
""") | |
if __name__ == "__main__": | |
demo.launch() | |