Spaces:
Running
Running
import os | |
import gradio as gr | |
import asyncio | |
import tempfile | |
from dotenv import find_dotenv, load_dotenv | |
from langchain.chat_models import init_chat_model | |
# Simplified imports - focusing on file analysis | |
# Import the CodeAct agent functionality | |
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact | |
# Import the new guided analysis functionality | |
from graph import analyze_file_with_guidance_sync, guided_analysis_graph | |
# Load environment variables | |
load_dotenv(find_dotenv()) | |
# Initialize model for file analysis | |
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai") | |
# Store uploaded file path globally | |
uploaded_file_path = None | |
# Chat functionality removed - focusing on file analysis | |
def handle_file_upload(file): | |
"""Handle file upload and store the path globally""" | |
global uploaded_file_path | |
try: | |
if file is not None: | |
# With type="filepath", Gradio returns the file path as a string | |
uploaded_file_path = file | |
filename = os.path.basename(file) | |
return f"β File uploaded successfully: {filename}" | |
else: | |
uploaded_file_path = None | |
return "β No file uploaded" | |
except Exception as e: | |
uploaded_file_path = None | |
return f"β Upload error: {str(e)}" | |
def analyze_file_with_question(user_question): | |
""" | |
Analyze the uploaded file using the new guided approach with user question | |
""" | |
global uploaded_file_path | |
try: | |
if not uploaded_file_path or not os.path.exists(uploaded_file_path): | |
return "β No file uploaded or file not found. Please upload a file first." | |
if not user_question or user_question.strip() == "": | |
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights." | |
# Use the new guided analysis approach | |
result = analyze_file_with_guidance_sync(uploaded_file_path, user_question) | |
return result | |
except Exception as e: | |
return f"β Error in guided analysis: {str(e)}" | |
async def analyze_uploaded_file(): | |
"""Legacy function - kept for backward compatibility""" | |
return analyze_file_with_question("Provide a comprehensive analysis of this file.") | |
def run_file_analysis(): | |
"""Wrapper to run async file analysis in sync context""" | |
return asyncio.run(analyze_uploaded_file()) | |
# Create the Gradio interface | |
with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo: | |
gr.Markdown("# π DataForge - AI-Powered File Analysis") | |
gr.Markdown(""" | |
Upload any file and ask specific questions for targeted AI analysis. Our guided approach: | |
1. π **Examines** your file structure and patterns automatically | |
2. π― **Generates** specific code guidance based on your question | |
3. π **Executes** enhanced analysis with improved accuracy | |
**Simply upload a file and ask any question you want!** | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
# File Upload Section | |
gr.Markdown("### π€ File Upload") | |
file_upload = gr.File( | |
label="Upload File for Analysis", | |
type="filepath" | |
) | |
upload_status = gr.Textbox( | |
label="Upload Status", | |
value="No file uploaded", | |
interactive=False | |
) | |
# Question Section | |
gr.Markdown("### β Ask Your Question") | |
user_question = gr.Textbox( | |
label="Your Question about the File", | |
placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')", | |
lines=4, | |
value="" | |
) | |
analyze_btn = gr.Button("π Run Guided Analysis", variant="primary", size="lg") | |
# Analysis Info | |
gr.Markdown("### βΉοΈ How It Works") | |
gr.Markdown(""" | |
**Guided Analysis Process:** | |
- π― **Question-aware**: Code generation tailored to your specific question | |
- π **Smart examination**: Automatically detects file structure and patterns | |
- π **Dynamic optimization**: Creates targeted analysis approach | |
- β **Higher accuracy**: Prevents common code generation errors | |
- π§ **Quality control**: Built-in validation to avoid syntax issues | |
""") | |
with gr.Column(scale=2): | |
analysis_output = gr.Textbox( | |
label="π Guided Analysis Results", | |
lines=25, | |
max_lines=35, | |
placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...", | |
interactive=False | |
) | |
# Event handlers | |
file_upload.change( | |
fn=handle_file_upload, | |
inputs=[file_upload], | |
outputs=[upload_status] | |
) | |
analyze_btn.click( | |
fn=analyze_file_with_question, | |
inputs=[user_question], | |
outputs=[analysis_output] | |
) | |
gr.Markdown("---") | |
gr.Markdown("## π‘ Example Questions by File Type") | |
with gr.Accordion("π Security Analysis Questions", open=False): | |
gr.Markdown(""" | |
**For Log Files:** | |
- "Find any failed login attempts and suspicious IP addresses" | |
- "Identify potential security threats or anomalies" | |
- "Show me authentication errors and user access patterns" | |
- "Are there any brute force attacks or repeated failures?" | |
**For Access Logs:** | |
- "Detect unusual access patterns or potential intrusions" | |
- "Find requests with suspicious user agents or payloads" | |
- "Identify high-frequency requests from single IPs" | |
""") | |
with gr.Accordion("β‘ Performance Analysis Questions", open=False): | |
gr.Markdown(""" | |
**For Application Logs:** | |
- "Which API endpoints are slowest and why?" | |
- "Find performance bottlenecks and response time issues" | |
- "Show me timeout errors and failed requests" | |
- "What are the peak usage times and load patterns?" | |
**For System Logs:** | |
- "Identify resource usage spikes and memory issues" | |
- "Find database query performance problems" | |
- "Show me error rates and system health indicators" | |
""") | |
with gr.Accordion("π Data Analysis Questions", open=False): | |
gr.Markdown(""" | |
**For CSV/Data Files:** | |
- "Analyze data distribution and find statistical insights" | |
- "Identify outliers and anomalies in the dataset" | |
- "What correlations exist between different columns?" | |
- "Generate a comprehensive data quality report" | |
**For JSON Files:** | |
- "Parse the structure and extract key information" | |
- "Find patterns in nested data and relationships" | |
- "Summarize the main data points and values" | |
""") | |
if __name__ == "__main__": | |
print("Starting DataForge application...") | |
demo.launch() | |