File size: 11,427 Bytes
2473fee
e7edd2e
b2ca056
 
4be3026
 
2473fee
 
4be3026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7edd2e
b2ca056
 
 
3774bab
 
bb43287
3774bab
2473fee
 
 
3c3b761
b2ca056
 
 
 
e7edd2e
3c3b761
e7edd2e
b2ca056
 
 
879668d
 
c7ebfd3
879668d
 
 
 
 
 
 
b2ca056
879668d
b2ca056
bb43287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3774bab
 
bb43287
3774bab
4be3026
b2ca056
 
879668d
 
 
 
 
 
4be3026
 
 
 
 
 
 
 
 
 
 
3774bab
 
 
b2ca056
 
4be3026
 
 
 
 
 
 
 
3774bab
 
 
 
e7edd2e
b2ca056
 
 
 
 
3c3b761
 
 
 
b2ca056
3c3b761
 
 
b2ca056
3c3b761
 
 
 
 
 
 
 
c7ebfd3
 
3c3b761
 
 
 
 
 
b2ca056
3c3b761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2ca056
3c3b761
 
 
 
 
 
 
 
 
 
c7ebfd3
3c3b761
c7ebfd3
 
3c3b761
 
 
bb43287
c7ebfd3
 
3c3b761
4e7bcf3
 
 
 
 
 
 
 
 
 
 
3774bab
4e7bcf3
 
 
 
 
 
 
 
 
 
 
 
 
3774bab
4e7bcf3
 
 
 
 
 
 
 
 
 
 
 
 
3774bab
4e7bcf3
 
 
 
 
3774bab
e7edd2e
879668d
c7ebfd3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import os
import gradio as gr
import asyncio
import tempfile
import subprocess
import shutil
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model

# Auto-install Deno if not found (for Hugging Face Spaces)
def ensure_deno_installed():
    """Install Deno if not already installed (for Hugging Face Spaces compatibility)"""
    try:
        # Check if Deno is already installed
        result = subprocess.run(['deno', '--version'], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"βœ… Deno already installed: {result.stdout.split()[1]}")
            return True
    except FileNotFoundError:
        pass
    
    print("πŸ”§ Deno not found. Installing Deno for PyodideSandbox...")
    
    try:
        # Install Deno using the official installer
        install_cmd = "curl -fsSL https://deno.land/install.sh | sh"
        result = subprocess.run(install_cmd, shell=True, capture_output=True, text=True)
        
        if result.returncode == 0:
            # Add Deno to PATH
            deno_path = os.path.expanduser("~/.deno/bin")
            if deno_path not in os.environ.get("PATH", ""):
                os.environ["PATH"] = f"{deno_path}:{os.environ.get('PATH', '')}"
            
            print("βœ… Deno installed successfully!")
            return True
        else:
            print(f"❌ Deno installation failed: {result.stderr}")
            return False
            
    except Exception as e:
        print(f"❌ Error installing Deno: {e}")
        return False

# Install Deno before importing sandbox dependencies
print("πŸ” Checking Deno installation...")
deno_available = ensure_deno_installed()

# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
from graph_streaming import streaming_analyze_file_with_guidance

# Load environment variables
load_dotenv(find_dotenv())

# Initialize model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

# Store uploaded file path globally
uploaded_file_path = None

# Chat functionality removed - focusing on file analysis

def handle_file_upload(file):
    """Handle file upload and store the path globally"""
    global uploaded_file_path
    try:
        if file is not None:
            # With type="filepath", Gradio returns the file path as a string
            uploaded_file_path = file
            filename = os.path.basename(file)
            return f"βœ… File uploaded successfully: {filename}"
        else:
            uploaded_file_path = None
            return "❌ No file uploaded"
    except Exception as e:
        uploaded_file_path = None
        return f"❌ Upload error: {str(e)}"

def streaming_analyze_file_with_question(user_question):
    """
    Streaming version that yields progress updates in real-time
    """
    global uploaded_file_path, deno_available
    
    try:
        if not uploaded_file_path or not os.path.exists(uploaded_file_path):
            yield "❌ No file uploaded or file not found. Please upload a file first."
            return
        
        if not user_question or user_question.strip() == "":
            user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
        
        # Check if Deno is available for sandbox operations
        if not deno_available:
            yield """❌ Deno runtime not available. This is required for code execution in the sandbox.
            
πŸ“‹ Troubleshooting:
1. This usually happens on deployment platforms that don't have Deno pre-installed
2. The app attempted to install Deno automatically but failed
3. Try restarting the space or contact support

πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
            return
        
        # Use the streaming guided analysis approach
        for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question):
            yield chunk
        
    except Exception as e:
        error_msg = str(e)
        if "Deno" in error_msg or "deno" in error_msg:
            yield f"""❌ Deno-related error in analysis: {error_msg}

πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
Try restarting the application or contact support if this persists."""
        else:
            yield f"❌ Error in guided analysis: {error_msg}"

def analyze_file_with_question(user_question):
    """
    Non-streaming version for backward compatibility
    """
    global uploaded_file_path, deno_available
    
    try:
        if not uploaded_file_path or not os.path.exists(uploaded_file_path):
            return "❌ No file uploaded or file not found. Please upload a file first."
        
        if not user_question or user_question.strip() == "":
            user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
        
        # Check if Deno is available for sandbox operations
        if not deno_available:
            return """❌ Deno runtime not available. This is required for code execution in the sandbox.
            
πŸ“‹ Troubleshooting:
1. This usually happens on deployment platforms that don't have Deno pre-installed
2. The app attempted to install Deno automatically but failed
3. Try restarting the space or contact support

πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
        
        # Use the new guided analysis approach
        result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
        return result
        
    except Exception as e:
        error_msg = str(e)
        if "Deno" in error_msg or "deno" in error_msg:
            return f"""❌ Deno-related error in analysis: {error_msg}

πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
Try restarting the application or contact support if this persists."""
        else:
            return f"❌ Error in guided analysis: {error_msg}"

async def analyze_uploaded_file():
    """Legacy function - kept for backward compatibility"""
    return analyze_file_with_question("Provide a comprehensive analysis of this file.")

def run_file_analysis():
    """Wrapper to run async file analysis in sync context"""
    return asyncio.run(analyze_uploaded_file())

# Create the Gradio interface
with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
    gr.Markdown("# πŸ” DataForge - AI-Powered File Analysis")
    gr.Markdown("""
    Upload any file and ask specific questions for targeted AI analysis. Our guided approach:
    
    1. πŸ“‹ **Examines** your file structure and patterns automatically
    2. 🎯 **Generates** specific code guidance based on your question  
    3. πŸš€ **Executes** enhanced analysis with improved accuracy
    
    **Simply upload a file and ask any question you want!**
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            # File Upload Section
            gr.Markdown("### πŸ“€ File Upload")
            file_upload = gr.File(
                label="Upload File for Analysis",
                type="filepath"
            )
            upload_status = gr.Textbox(
                label="Upload Status",
                value="No file uploaded",
                interactive=False
            )
            
            # Question Section
            gr.Markdown("### ❓ Ask Your Question")
            user_question = gr.Textbox(
                label="Your Question about the File",
                placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
                lines=4,
                value=""
            )
            
            analyze_btn = gr.Button("πŸ” Run Guided Analysis", variant="primary", size="lg")
            
            # Analysis Info
            gr.Markdown("### ℹ️ How It Works")
            gr.Markdown("""
            **Guided Analysis Process:**
            - 🎯 **Question-aware**: Code generation tailored to your specific question
            - πŸ“‹ **Smart examination**: Automatically detects file structure and patterns  
            - πŸš€ **Dynamic optimization**: Creates targeted analysis approach
            - βœ… **Higher accuracy**: Prevents common code generation errors
            - πŸ”§ **Quality control**: Built-in validation to avoid syntax issues
            """)
        
        with gr.Column(scale=2):
            analysis_output = gr.Textbox(
                label="πŸ“Š Guided Analysis Results",
                lines=25,
                max_lines=35,
                placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...",
                interactive=False
            )
    
    # Event handlers
    file_upload.change(
        fn=handle_file_upload,
        inputs=[file_upload],
        outputs=[upload_status]
    )
    
    analyze_btn.click(
        fn=streaming_analyze_file_with_question,
        inputs=[user_question],
        outputs=[analysis_output]
    )
    
    gr.Markdown("---")
    gr.Markdown("## πŸ’‘ Example Questions by File Type")
    
    with gr.Accordion("πŸ” Security Analysis Questions", open=False):
        gr.Markdown("""
        **For Log Files:**
        - "Find any failed login attempts and suspicious IP addresses"
        - "Identify potential security threats or anomalies"
        - "Show me authentication errors and user access patterns"
        - "Are there any brute force attacks or repeated failures?"
        
        **For Access Logs:**
        - "Detect unusual access patterns or potential intrusions"
        - "Find requests with suspicious user agents or payloads"
        - "Identify high-frequency requests from single IPs"
        """)
    
    with gr.Accordion("⚑ Performance Analysis Questions", open=False):
        gr.Markdown("""
        **For Application Logs:**
        - "Which API endpoints are slowest and why?"
        - "Find performance bottlenecks and response time issues"
        - "Show me timeout errors and failed requests"
        - "What are the peak usage times and load patterns?"
        
        **For System Logs:**
        - "Identify resource usage spikes and memory issues"
        - "Find database query performance problems"
        - "Show me error rates and system health indicators"
        """)
    
    with gr.Accordion("πŸ“ˆ Data Analysis Questions", open=False):
        gr.Markdown("""
        **For CSV/Data Files:**
        - "Analyze data distribution and find statistical insights"
        - "Identify outliers and anomalies in the dataset"
        - "What correlations exist between different columns?"
        - "Generate a comprehensive data quality report"
        
        **For JSON Files:**
        - "Parse the structure and extract key information"
        - "Find patterns in nested data and relationships"
        - "Summarize the main data points and values"
        """)

if __name__ == "__main__":
    print("Starting DataForge application...")
    demo.launch()