File size: 12,559 Bytes
2473fee
e7edd2e
b2ca056
 
2473fee
 
 
 
 
e7edd2e
b2ca056
 
 
3774bab
 
 
2473fee
 
 
 
 
 
 
 
 
b2ca056
2473fee
e7edd2e
b2ca056
 
 
 
 
e7edd2e
2473fee
e7edd2e
 
 
 
 
 
 
 
2473fee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7edd2e
b2ca056
 
 
 
 
 
 
 
 
 
3774bab
 
 
 
b2ca056
 
 
 
 
3774bab
 
 
b2ca056
3774bab
 
 
b2ca056
 
3774bab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7edd2e
b2ca056
 
 
 
3774bab
 
 
 
 
 
b2ca056
3774bab
 
 
b2ca056
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3774bab
 
 
 
 
 
 
 
 
b2ca056
 
 
3774bab
 
b2ca056
 
 
 
 
 
 
 
 
 
3774bab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2ca056
 
 
3774bab
 
 
 
b2ca056
 
 
 
 
 
 
 
3774bab
 
 
 
 
 
 
 
 
 
b2ca056
 
 
3774bab
 
b2ca056
 
e7edd2e
3774bab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7edd2e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import os
import gradio as gr
import asyncio
import tempfile
from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import init_chat_model
from langchain.schema import HumanMessage, SystemMessage
from langgraph.prebuilt import create_react_agent
from langsmith import traceable

# Import the CodeAct agent functionality
from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact

# Import the new guided analysis functionality
from graph import analyze_file_with_guidance_sync, guided_analysis_graph

# Load environment variables
load_dotenv(find_dotenv())

# Initialize OpenAI model
openai_model = init_chat_model(
    model="gpt-4.1-nano-2025-04-14",
    api_key=os.getenv("OPENAI_API_KEY"),
)

# Create the basic chat agent
chat_agent = create_react_agent(openai_model, tools=[])

# Initialize CodeAct model for file analysis
codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")

# Store uploaded file path globally
uploaded_file_path = None

@traceable
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    """
    Main chat function that processes user input and returns AI response
    """
    try:
        # Convert history to LangChain message format
        messages = [SystemMessage(content=system_message)]
        
        # Add conversation history
        for user_msg, assistant_msg in history:
            if user_msg:
                messages.append(HumanMessage(content=user_msg))
            if assistant_msg:
                messages.append(SystemMessage(content=assistant_msg))
        
        # Add current user message
        messages.append(HumanMessage(content=message))
        
        # Prepare input for the agent
        input_data = {"messages": messages}
        
        # Stream the response
        response_text = ""
        for chunk in chat_agent.stream(input_data, stream_mode="values"):
            if "messages" in chunk and chunk["messages"]:
                latest_message = chunk["messages"][-1]
                if hasattr(latest_message, 'content'):
                    current_content = latest_message.content
                    if current_content and len(current_content) > len(response_text):
                        response_text = current_content
                        yield response_text
        
        # Ensure we return something even if streaming doesn't work
        if not response_text:
            yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key."
            
    except Exception as e:
        yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly."

def handle_file_upload(file):
    """Handle file upload and store the path globally"""
    global uploaded_file_path
    if file is not None:
        uploaded_file_path = file.name
        return f"βœ… File uploaded successfully: {os.path.basename(file.name)}"
    else:
        uploaded_file_path = None
        return "❌ No file uploaded"

def analyze_file_with_question(user_question):
    """
    Analyze the uploaded file using the new guided approach with user question
    """
    global uploaded_file_path
    
    if not uploaded_file_path or not os.path.exists(uploaded_file_path):
        return "❌ No file uploaded or file not found. Please upload a file first."
    
    if not user_question or user_question.strip() == "":
        user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
    
    try:
        # Use the new guided analysis approach
        result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
        return result
        
    except Exception as e:
        return f"❌ Error in guided analysis: {str(e)}"

def get_question_suggestions(file_path):
    """
    Generate suggested questions based on file type and structure
    """
    if not file_path or not os.path.exists(file_path):
        return []
    
    file_ext = os.path.splitext(file_path)[1].lower()
    base_suggestions = [
        "What are the main patterns in this file?",
        "Are there any security issues or anomalies?", 
        "Provide a statistical summary of the data",
        "What insights can you extract from this file?"
    ]
    
    if file_ext in ['.log', '.txt']:
        return [
            "Find any security threats or failed login attempts",
            "Identify performance bottlenecks and slow operations", 
            "What errors or warnings are present?",
            "Show me time-based trends in the data",
            "Are there any suspicious IP addresses or user activities?"
        ] + base_suggestions
    elif file_ext == '.csv':
        return [
            "Analyze the data distribution and statistics",
            "Find correlations between columns",
            "Identify outliers or anomalies in the data",
            "What are the key insights from this dataset?"
        ] + base_suggestions
    elif file_ext == '.json':
        return [
            "Parse and analyze the JSON structure",
            "What are the key data fields and their values?",
            "Find any nested patterns or relationships"
        ] + base_suggestions
    else:
        return base_suggestions

async def analyze_uploaded_file():
    """Legacy function - kept for backward compatibility"""
    return analyze_file_with_question("Provide a comprehensive analysis of this file.")

def run_file_analysis():
    """Wrapper to run async file analysis in sync context"""
    return asyncio.run(analyze_uploaded_file())

def update_question_suggestions():
    """Update question suggestions based on uploaded file"""
    global uploaded_file_path
    suggestions = get_question_suggestions(uploaded_file_path)
    return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "")

# Create the Gradio interface
with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo:
    gr.Markdown("# πŸ” DataForge - AI Assistant with Advanced File Analysis")
    gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.")
    
    with gr.Tab("πŸ’¬ Chat Assistant"):
        chat_interface = gr.ChatInterface(
            respond,
            additional_inputs=[
                gr.Textbox(
                    value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.", 
                    label="System message"
                ),
                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p (nucleus sampling)",
                ),
            ],
            title="Chat with AI Assistant",
            description="Ask questions or get help with any topic."
        )
    
    with gr.Tab("πŸ“ Advanced File Analysis"):
        gr.Markdown("## πŸš€ Guided File Analysis with LangGraph")
        gr.Markdown("""
        Upload files and ask specific questions for targeted AI analysis. Our guided approach:
        
        1. πŸ“‹ **Examines** your file structure and patterns
        2. 🎯 **Generates** specific code guidance based on your question  
        3. πŸš€ **Executes** enhanced analysis with improved accuracy
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                # File Upload Section
                gr.Markdown("### πŸ“€ File Upload")
                file_upload = gr.File(
                    label="Upload File for Analysis",
                    file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
                    type="filepath"
                )
                upload_status = gr.Textbox(
                    label="Upload Status",
                    value="No file uploaded",
                    interactive=False
                )
                
                # Question Section
                gr.Markdown("### ❓ Ask Your Question")
                question_suggestions = gr.Dropdown(
                    label="Question Suggestions (select or type your own)",
                    choices=[],
                    allow_custom_value=True,
                    value=""
                )
                
                user_question = gr.Textbox(
                    label="Your Question about the File",
                    placeholder="What would you like to know about this file?",
                    lines=3
                )
                
                analyze_btn = gr.Button("πŸ” Run Guided Analysis", variant="primary", size="lg")
                
                # Analysis Info
                gr.Markdown("### ℹ️ Analysis Method")
                gr.Markdown("""
                **Guided Analysis Features:**
                - 🎯 Question-aware code generation
                - πŸ“‹ File structure examination  
                - πŸš€ Dynamic prompt optimization
                - βœ… Higher accuracy than generic analysis
                """)
            
            with gr.Column(scale=2):
                analysis_output = gr.Textbox(
                    label="πŸ“Š Guided Analysis Results",
                    lines=25,
                    max_lines=35,
                    placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...",
                    interactive=False
                )
        
        # Event handlers
        file_upload.change(
            fn=handle_file_upload,
            inputs=[file_upload],
            outputs=[upload_status]
        ).then(
            fn=update_question_suggestions,
            inputs=[],
            outputs=[question_suggestions]
        )
        
        question_suggestions.change(
            fn=lambda x: x,
            inputs=[question_suggestions],
            outputs=[user_question]
        )
        
        analyze_btn.click(
            fn=analyze_file_with_question,
            inputs=[user_question],
            outputs=[analysis_output]
        )

    with gr.Tab("πŸ“Š Analysis Examples"):
        gr.Markdown("## πŸ’‘ Example Questions by File Type")
        
        with gr.Accordion("πŸ” Security Analysis Questions", open=False):
            gr.Markdown("""
            **For Log Files:**
            - "Find any failed login attempts and suspicious IP addresses"
            - "Identify potential security threats or anomalies"
            - "Show me authentication errors and user access patterns"
            - "Are there any brute force attacks or repeated failures?"
            
            **For Access Logs:**
            - "Detect unusual access patterns or potential intrusions"
            - "Find requests with suspicious user agents or payloads"
            - "Identify high-frequency requests from single IPs"
            """)
        
        with gr.Accordion("⚑ Performance Analysis Questions", open=False):
            gr.Markdown("""
            **For Application Logs:**
            - "Which API endpoints are slowest and why?"
            - "Find performance bottlenecks and response time issues"
            - "Show me timeout errors and failed requests"
            - "What are the peak usage times and load patterns?"
            
            **For System Logs:**
            - "Identify resource usage spikes and memory issues"
            - "Find database query performance problems"
            - "Show me error rates and system health indicators"
            """)
        
        with gr.Accordion("πŸ“ˆ Data Analysis Questions", open=False):
            gr.Markdown("""
            **For CSV/Data Files:**
            - "Analyze data distribution and find statistical insights"
            - "Identify outliers and anomalies in the dataset"
            - "What correlations exist between different columns?"
            - "Generate a comprehensive data quality report"
            
            **For JSON Files:**
            - "Parse the structure and extract key information"
            - "Find patterns in nested data and relationships"
            - "Summarize the main data points and values"
            """)

if __name__ == "__main__":
    demo.launch()