ai-puppy commited on
Commit
3774bab
Β·
1 Parent(s): 2cc0a25
Files changed (2) hide show
  1. app.py +165 -81
  2. graph.py +513 -0
app.py CHANGED
@@ -11,6 +11,9 @@ from langsmith import traceable
11
  # Import the CodeAct agent functionality
12
  from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
13
 
 
 
 
14
  # Load environment variables
15
  load_dotenv(find_dotenv())
16
 
@@ -86,91 +89,83 @@ def handle_file_upload(file):
86
  uploaded_file_path = None
87
  return "❌ No file uploaded"
88
 
89
- async def analyze_uploaded_file():
90
- """Analyze the uploaded file using CodeAct agent"""
 
 
91
  global uploaded_file_path
92
 
93
  if not uploaded_file_path or not os.path.exists(uploaded_file_path):
94
  return "❌ No file uploaded or file not found. Please upload a file first."
95
 
 
 
 
96
  try:
97
- # Create sandbox with the uploaded file
98
- sandbox = FileInjectedPyodideSandbox(
99
- file_path=uploaded_file_path,
100
- virtual_path="/uploaded_file.log",
101
- sessions_dir=None, # Will create temp directory automatically
102
- allow_net=True
103
- )
104
-
105
- eval_fn = create_pyodide_eval_fn(sandbox)
106
- code_act = create_codeact(codeact_model, [], eval_fn)
107
- agent = code_act.compile()
108
-
109
- # Create analysis query based on file type
110
- file_ext = os.path.splitext(uploaded_file_path)[1].lower()
111
-
112
- if file_ext in ['.log', '.txt']:
113
- query = """
114
- Analyze this uploaded file and provide:
115
- 1. **Content Overview** - What type of data/logs this file contains
116
- 2. **Key Patterns** - Important patterns, trends, or anomalies found
117
- 3. **Statistical Summary** - Basic statistics (line count, data distribution, etc.)
118
- 4. **Insights & Findings** - Key takeaways from the analysis
119
- 5. **Recommendations** - Suggested actions based on the analysis
120
-
121
- DATA SOURCES AVAILABLE:
122
- - `file_content`: Raw file content as a string
123
- - `log_lines`: List of individual lines
124
- - `total_lines`: Number of lines in the file
125
- - File path: `/uploaded_file.log` (can be read with open('/uploaded_file.log', 'r'))
126
-
127
- Generate Python code to analyze the file and provide comprehensive insights.
128
- """
129
- else:
130
- query = f"""
131
- Analyze this uploaded {file_ext} file and provide:
132
- 1. **File Type Analysis** - What type of file this is and its structure
133
- 2. **Content Summary** - Overview of the file contents
134
- 3. **Key Information** - Important data points or patterns found
135
- 4. **Statistical Analysis** - Basic statistics and data distribution
136
- 5. **Recommendations** - Suggested next steps or insights
137
-
138
- DATA SOURCES AVAILABLE:
139
- - `file_content`: Raw file content as a string
140
- - `log_lines`: List of individual lines
141
- - `total_lines`: Number of lines in the file
142
- - File path: `/uploaded_file.log`
143
-
144
- Generate Python code to analyze this file and provide comprehensive insights.
145
- """
146
-
147
- # Run the analysis
148
- result_parts = []
149
- async for typ, chunk in agent.astream(
150
- {"messages": query},
151
- stream_mode=["values", "messages"],
152
- ):
153
- if typ == "messages":
154
- result_parts.append(chunk[0].content)
155
- elif typ == "values":
156
- if chunk and "messages" in chunk:
157
- final_message = chunk["messages"][-1]
158
- if hasattr(final_message, 'content'):
159
- result_parts.append(f"\n\n**Final Analysis:**\n{final_message.content}")
160
-
161
- return "\n".join(result_parts) if result_parts else "Analysis completed but no output generated."
162
 
163
  except Exception as e:
164
- return f"❌ Error analyzing file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  def run_file_analysis():
167
  """Wrapper to run async file analysis in sync context"""
168
  return asyncio.run(analyze_uploaded_file())
169
 
 
 
 
 
 
 
170
  # Create the Gradio interface
171
- with gr.Blocks(title="DataForge - AI Assistant with File Analysis") as demo:
172
- gr.Markdown("# πŸ” DataForge - AI Assistant with File Analysis")
173
- gr.Markdown("Upload files for analysis or chat with the AI assistant.")
174
 
175
  with gr.Tab("πŸ’¬ Chat Assistant"):
176
  chat_interface = gr.ChatInterface(
@@ -194,12 +189,20 @@ with gr.Blocks(title="DataForge - AI Assistant with File Analysis") as demo:
194
  description="Ask questions or get help with any topic."
195
  )
196
 
197
- with gr.Tab("πŸ“ File Analysis"):
198
- gr.Markdown("## Upload and Analyze Files")
199
- gr.Markdown("Upload log files, text files, or other data files for comprehensive AI-powered analysis.")
 
 
 
 
 
 
200
 
201
  with gr.Row():
202
  with gr.Column(scale=1):
 
 
203
  file_upload = gr.File(
204
  label="Upload File for Analysis",
205
  file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
@@ -210,14 +213,40 @@ with gr.Blocks(title="DataForge - AI Assistant with File Analysis") as demo:
210
  value="No file uploaded",
211
  interactive=False
212
  )
213
- analyze_btn = gr.Button("πŸ” Analyze File", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  with gr.Column(scale=2):
216
  analysis_output = gr.Textbox(
217
- label="Analysis Results",
218
- lines=20,
219
- max_lines=30,
220
- placeholder="Upload a file and click 'Analyze File' to see detailed analysis results here...",
221
  interactive=False
222
  )
223
 
@@ -226,13 +255,68 @@ with gr.Blocks(title="DataForge - AI Assistant with File Analysis") as demo:
226
  fn=handle_file_upload,
227
  inputs=[file_upload],
228
  outputs=[upload_status]
 
 
 
 
 
 
 
 
 
 
229
  )
230
 
231
  analyze_btn.click(
232
- fn=run_file_analysis,
233
- inputs=[],
234
  outputs=[analysis_output]
235
  )
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  if __name__ == "__main__":
238
  demo.launch()
 
11
  # Import the CodeAct agent functionality
12
  from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
13
 
14
+ # Import the new guided analysis functionality
15
+ from graph import analyze_file_with_guidance_sync, guided_analysis_graph
16
+
17
  # Load environment variables
18
  load_dotenv(find_dotenv())
19
 
 
89
  uploaded_file_path = None
90
  return "❌ No file uploaded"
91
 
92
+ def analyze_file_with_question(user_question):
93
+ """
94
+ Analyze the uploaded file using the new guided approach with user question
95
+ """
96
  global uploaded_file_path
97
 
98
  if not uploaded_file_path or not os.path.exists(uploaded_file_path):
99
  return "❌ No file uploaded or file not found. Please upload a file first."
100
 
101
+ if not user_question or user_question.strip() == "":
102
+ user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
103
+
104
  try:
105
+ # Use the new guided analysis approach
106
+ result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
107
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  except Exception as e:
110
+ return f"❌ Error in guided analysis: {str(e)}"
111
+
112
+ def get_question_suggestions(file_path):
113
+ """
114
+ Generate suggested questions based on file type and structure
115
+ """
116
+ if not file_path or not os.path.exists(file_path):
117
+ return []
118
+
119
+ file_ext = os.path.splitext(file_path)[1].lower()
120
+ base_suggestions = [
121
+ "What are the main patterns in this file?",
122
+ "Are there any security issues or anomalies?",
123
+ "Provide a statistical summary of the data",
124
+ "What insights can you extract from this file?"
125
+ ]
126
+
127
+ if file_ext in ['.log', '.txt']:
128
+ return [
129
+ "Find any security threats or failed login attempts",
130
+ "Identify performance bottlenecks and slow operations",
131
+ "What errors or warnings are present?",
132
+ "Show me time-based trends in the data",
133
+ "Are there any suspicious IP addresses or user activities?"
134
+ ] + base_suggestions
135
+ elif file_ext == '.csv':
136
+ return [
137
+ "Analyze the data distribution and statistics",
138
+ "Find correlations between columns",
139
+ "Identify outliers or anomalies in the data",
140
+ "What are the key insights from this dataset?"
141
+ ] + base_suggestions
142
+ elif file_ext == '.json':
143
+ return [
144
+ "Parse and analyze the JSON structure",
145
+ "What are the key data fields and their values?",
146
+ "Find any nested patterns or relationships"
147
+ ] + base_suggestions
148
+ else:
149
+ return base_suggestions
150
+
151
+ async def analyze_uploaded_file():
152
+ """Legacy function - kept for backward compatibility"""
153
+ return analyze_file_with_question("Provide a comprehensive analysis of this file.")
154
 
155
  def run_file_analysis():
156
  """Wrapper to run async file analysis in sync context"""
157
  return asyncio.run(analyze_uploaded_file())
158
 
159
+ def update_question_suggestions():
160
+ """Update question suggestions based on uploaded file"""
161
+ global uploaded_file_path
162
+ suggestions = get_question_suggestions(uploaded_file_path)
163
+ return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "")
164
+
165
  # Create the Gradio interface
166
+ with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo:
167
+ gr.Markdown("# πŸ” DataForge - AI Assistant with Advanced File Analysis")
168
+ gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.")
169
 
170
  with gr.Tab("πŸ’¬ Chat Assistant"):
171
  chat_interface = gr.ChatInterface(
 
189
  description="Ask questions or get help with any topic."
190
  )
191
 
192
+ with gr.Tab("πŸ“ Advanced File Analysis"):
193
+ gr.Markdown("## πŸš€ Guided File Analysis with LangGraph")
194
+ gr.Markdown("""
195
+ Upload files and ask specific questions for targeted AI analysis. Our guided approach:
196
+
197
+ 1. πŸ“‹ **Examines** your file structure and patterns
198
+ 2. 🎯 **Generates** specific code guidance based on your question
199
+ 3. πŸš€ **Executes** enhanced analysis with improved accuracy
200
+ """)
201
 
202
  with gr.Row():
203
  with gr.Column(scale=1):
204
+ # File Upload Section
205
+ gr.Markdown("### πŸ“€ File Upload")
206
  file_upload = gr.File(
207
  label="Upload File for Analysis",
208
  file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
 
213
  value="No file uploaded",
214
  interactive=False
215
  )
216
+
217
+ # Question Section
218
+ gr.Markdown("### ❓ Ask Your Question")
219
+ question_suggestions = gr.Dropdown(
220
+ label="Question Suggestions (select or type your own)",
221
+ choices=[],
222
+ allow_custom_value=True,
223
+ value=""
224
+ )
225
+
226
+ user_question = gr.Textbox(
227
+ label="Your Question about the File",
228
+ placeholder="What would you like to know about this file?",
229
+ lines=3
230
+ )
231
+
232
+ analyze_btn = gr.Button("πŸ” Run Guided Analysis", variant="primary", size="lg")
233
+
234
+ # Analysis Info
235
+ gr.Markdown("### ℹ️ Analysis Method")
236
+ gr.Markdown("""
237
+ **Guided Analysis Features:**
238
+ - 🎯 Question-aware code generation
239
+ - πŸ“‹ File structure examination
240
+ - πŸš€ Dynamic prompt optimization
241
+ - βœ… Higher accuracy than generic analysis
242
+ """)
243
 
244
  with gr.Column(scale=2):
245
  analysis_output = gr.Textbox(
246
+ label="πŸ“Š Guided Analysis Results",
247
+ lines=25,
248
+ max_lines=35,
249
+ placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...",
250
  interactive=False
251
  )
252
 
 
255
  fn=handle_file_upload,
256
  inputs=[file_upload],
257
  outputs=[upload_status]
258
+ ).then(
259
+ fn=update_question_suggestions,
260
+ inputs=[],
261
+ outputs=[question_suggestions]
262
+ )
263
+
264
+ question_suggestions.change(
265
+ fn=lambda x: x,
266
+ inputs=[question_suggestions],
267
+ outputs=[user_question]
268
  )
269
 
270
  analyze_btn.click(
271
+ fn=analyze_file_with_question,
272
+ inputs=[user_question],
273
  outputs=[analysis_output]
274
  )
275
 
276
+ with gr.Tab("πŸ“Š Analysis Examples"):
277
+ gr.Markdown("## πŸ’‘ Example Questions by File Type")
278
+
279
+ with gr.Accordion("πŸ” Security Analysis Questions", open=False):
280
+ gr.Markdown("""
281
+ **For Log Files:**
282
+ - "Find any failed login attempts and suspicious IP addresses"
283
+ - "Identify potential security threats or anomalies"
284
+ - "Show me authentication errors and user access patterns"
285
+ - "Are there any brute force attacks or repeated failures?"
286
+
287
+ **For Access Logs:**
288
+ - "Detect unusual access patterns or potential intrusions"
289
+ - "Find requests with suspicious user agents or payloads"
290
+ - "Identify high-frequency requests from single IPs"
291
+ """)
292
+
293
+ with gr.Accordion("⚑ Performance Analysis Questions", open=False):
294
+ gr.Markdown("""
295
+ **For Application Logs:**
296
+ - "Which API endpoints are slowest and why?"
297
+ - "Find performance bottlenecks and response time issues"
298
+ - "Show me timeout errors and failed requests"
299
+ - "What are the peak usage times and load patterns?"
300
+
301
+ **For System Logs:**
302
+ - "Identify resource usage spikes and memory issues"
303
+ - "Find database query performance problems"
304
+ - "Show me error rates and system health indicators"
305
+ """)
306
+
307
+ with gr.Accordion("πŸ“ˆ Data Analysis Questions", open=False):
308
+ gr.Markdown("""
309
+ **For CSV/Data Files:**
310
+ - "Analyze data distribution and find statistical insights"
311
+ - "Identify outliers and anomalies in the dataset"
312
+ - "What correlations exist between different columns?"
313
+ - "Generate a comprehensive data quality report"
314
+
315
+ **For JSON Files:**
316
+ - "Parse the structure and extract key information"
317
+ - "Find patterns in nested data and relationships"
318
+ - "Summarize the main data points and values"
319
+ """)
320
+
321
  if __name__ == "__main__":
322
  demo.launch()
graph.py ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import re
4
+ from typing import Annotated, Dict, List, Optional
5
+ from typing_extensions import TypedDict
6
+
7
+ from dotenv import find_dotenv, load_dotenv
8
+ from langchain.chat_models import init_chat_model
9
+ from langgraph.graph import END, START, StateGraph
10
+ from pydantic import BaseModel, Field
11
+
12
+ # Import your existing agent functionality
13
+ from agent import create_analysis_agent, FileInjectedPyodideSandbox, create_pyodide_eval_fn
14
+
15
+ load_dotenv(find_dotenv())
16
+
17
+ # Initialize the language model
18
+ model = init_chat_model(
19
+ model="gpt-4.1-2025-04-14",
20
+ api_key=os.getenv("OPENAI_API_KEY"),
21
+ )
22
+
23
+
24
+ class FileExamination(BaseModel):
25
+ """File examination results"""
26
+ file_type: str = Field(description="Type of file detected (log, csv, json, etc.)")
27
+ structure_pattern: str = Field(description="Detected structure pattern of the file")
28
+ sample_lines: List[str] = Field(description="First few lines of the file")
29
+ key_patterns: List[str] = Field(description="Important patterns found in sample")
30
+ data_format: str = Field(description="Format of data (structured, unstructured, mixed)")
31
+ complexity_level: str = Field(description="Simple, Medium, or Complex")
32
+
33
+
34
+ class CodeGuidance(BaseModel):
35
+ """Code generation guidance"""
36
+ analysis_approach: str = Field(description="Recommended analysis approach")
37
+ required_imports: List[str] = Field(description="Python imports needed")
38
+ code_structure: str = Field(description="Step-by-step code structure")
39
+ specific_patterns: List[str] = Field(description="Specific regex patterns to use")
40
+ expected_outputs: List[str] = Field(description="What outputs to generate")
41
+ error_handling: str = Field(description="Error handling recommendations")
42
+
43
+
44
+ class CodeAnalysisState(TypedDict):
45
+ """State for the code analysis workflow"""
46
+ file_path: str # Input file path
47
+ analysis_query: Optional[str] # Optional custom analysis query
48
+
49
+ # File examination results
50
+ file_examination: Optional[FileExamination]
51
+
52
+ # Generated guidance
53
+ code_guidance: Optional[CodeGuidance]
54
+
55
+ # Final results
56
+ generated_code: Optional[str]
57
+ execution_result: Optional[str]
58
+ final_analysis: Optional[str]
59
+
60
+
61
+ def examine_file_structure(state: CodeAnalysisState) -> CodeAnalysisState:
62
+ """
63
+ Node 1: Examine the file structure by reading the first several lines
64
+ and understanding the file format and patterns.
65
+ """
66
+ file_path = state["file_path"]
67
+
68
+ if not os.path.exists(file_path):
69
+ return {
70
+ "file_examination": FileExamination(
71
+ file_type="error",
72
+ structure_pattern="File not found",
73
+ sample_lines=[],
74
+ key_patterns=[],
75
+ data_format="unknown",
76
+ complexity_level="Simple"
77
+ )
78
+ }
79
+
80
+ try:
81
+ # Read first 20 lines for examination
82
+ with open(file_path, 'r', encoding='utf-8') as f:
83
+ sample_lines = []
84
+ for i, line in enumerate(f):
85
+ if i >= 20: # Read first 20 lines
86
+ break
87
+ sample_lines.append(line.rstrip('\n\r'))
88
+
89
+ if not sample_lines:
90
+ sample_lines = ["<empty file>"]
91
+
92
+ # Create examination prompt
93
+ examination_model = model.with_structured_output(FileExamination)
94
+
95
+ sample_text = '\n'.join(sample_lines[:10]) # Show first 10 lines in prompt
96
+
97
+ message = {
98
+ "role": "user",
99
+ "content": f"""
100
+ Examine this file sample and determine its structure and characteristics:
101
+
102
+ FILE PATH: {file_path}
103
+ FILE EXTENSION: {os.path.splitext(file_path)[1]}
104
+
105
+ FIRST 10 LINES:
106
+ ```
107
+ {sample_text}
108
+ ```
109
+
110
+ TOTAL SAMPLE LINES AVAILABLE: {len(sample_lines)}
111
+
112
+ Analyze and determine:
113
+ 1. What type of file this is (log file, CSV, JSON, text, etc.)
114
+ 2. The structure pattern (each line format/pattern)
115
+ 3. Key patterns that would be important for analysis (timestamps, IPs, error codes, etc.)
116
+ 4. Data format classification (structured/unstructured/mixed)
117
+ 5. Complexity level for analysis (Simple/Medium/Complex)
118
+
119
+ Be specific about patterns you detect - these will guide code generation.
120
+ """
121
+ }
122
+
123
+ examination_result = examination_model.invoke([message])
124
+ examination_result.sample_lines = sample_lines # Keep full sample
125
+
126
+ print(f"πŸ“‹ File Examination Complete:")
127
+ print(f" Type: {examination_result.file_type}")
128
+ print(f" Structure: {examination_result.structure_pattern}")
129
+ print(f" Complexity: {examination_result.complexity_level}")
130
+ print(f" Key Patterns: {examination_result.key_patterns}")
131
+
132
+ return {"file_examination": examination_result}
133
+
134
+ except Exception as e:
135
+ print(f"❌ Error examining file: {e}")
136
+ return {
137
+ "file_examination": FileExamination(
138
+ file_type="error",
139
+ structure_pattern=f"Error reading file: {str(e)}",
140
+ sample_lines=[],
141
+ key_patterns=[],
142
+ data_format="unknown",
143
+ complexity_level="Simple"
144
+ )
145
+ }
146
+
147
+
148
+ def generate_code_guidance(state: CodeAnalysisState) -> CodeAnalysisState:
149
+ """
150
+ Node 2: Generate specific code guidance based on both the file examination and user question.
151
+ This creates a targeted prompt for the code generation that addresses the user's specific needs.
152
+ """
153
+ file_examination = state["file_examination"]
154
+ analysis_query = state.get("analysis_query", "")
155
+
156
+ if not file_examination or file_examination.file_type == "error":
157
+ return {
158
+ "code_guidance": CodeGuidance(
159
+ analysis_approach="Basic file analysis with error handling",
160
+ required_imports=["re", "os"],
161
+ code_structure="1. Check file exists\n2. Basic error handling\n3. Simple output",
162
+ specific_patterns=[],
163
+ expected_outputs=["Error message"],
164
+ error_handling="Try-catch with informative errors"
165
+ )
166
+ }
167
+
168
+ try:
169
+ guidance_model = model.with_structured_output(CodeGuidance)
170
+
171
+ sample_preview = '\n'.join(file_examination.sample_lines[:5])
172
+
173
+ # Analyze the user's question to understand intent
174
+ question_analysis = analyze_user_question(analysis_query or "General comprehensive analysis")
175
+
176
+ message = {
177
+ "role": "user",
178
+ "content": f"""
179
+ Generate QUESTION-SPECIFIC Python code guidance for analyzing this file:
180
+
181
+ FILE ANALYSIS RESULTS:
182
+ - File Type: {file_examination.file_type}
183
+ - Structure Pattern: {file_examination.structure_pattern}
184
+ - Data Format: {file_examination.data_format}
185
+ - Complexity: {file_examination.complexity_level}
186
+ - Key Patterns Found: {file_examination.key_patterns}
187
+
188
+ SAMPLE LINES:
189
+ ```
190
+ {sample_preview}
191
+ ```
192
+
193
+ USER'S SPECIFIC QUESTION: "{analysis_query or "General comprehensive analysis"}"
194
+
195
+ QUESTION ANALYSIS:
196
+ - Intent: {question_analysis['intent']}
197
+ - Focus Areas: {question_analysis['focus_areas']}
198
+ - Expected Analysis Type: {question_analysis['analysis_type']}
199
+ - Key Terms: {question_analysis['key_terms']}
200
+
201
+ Based on BOTH the file structure AND the user's specific question, provide targeted guidance:
202
+
203
+ 1. **Analysis Approach**: What specific method addresses the user's question for this file type
204
+ 2. **Required Imports**: Exact Python imports needed for this specific analysis
205
+ 3. **Code Structure**: Step-by-step structure that answers the user's question
206
+ 4. **Specific Patterns**: Exact regex patterns or operations needed for the user's query
207
+ 5. **Expected Outputs**: What specific outputs will answer the user's question
208
+ 6. **Error Handling**: How to handle issues specific to this analysis type
209
+
210
+ IMPORTANT - Make guidance QUESTION-SPECIFIC:
211
+ - If user asks about "security", focus on authentication, IPs, failed logins, errors
212
+ - If user asks about "performance", focus on response times, slow operations, bottlenecks
213
+ - If user asks about "patterns", focus on frequency analysis, trends, anomalies
214
+ - If user asks about "errors", focus on error extraction, categorization, root causes
215
+ - If user asks about "statistics", focus on counts, averages, distributions
216
+ - If user asks about "time trends", focus on temporal analysis, time-based patterns
217
+
218
+ Generate code guidance that directly answers their question using the detected file structure.
219
+ """
220
+ }
221
+
222
+ guidance_result = guidance_model.invoke([message])
223
+
224
+ print(f"🎯 Code Guidance Generated:")
225
+ print(f" Approach: {guidance_result.analysis_approach}")
226
+ print(f" Imports: {guidance_result.required_imports}")
227
+ print(f" Patterns: {len(guidance_result.specific_patterns)} specific patterns")
228
+
229
+ return {"code_guidance": guidance_result}
230
+
231
+ except Exception as e:
232
+ print(f"❌ Error generating guidance: {e}")
233
+ return {
234
+ "code_guidance": CodeGuidance(
235
+ analysis_approach="Basic analysis with error recovery",
236
+ required_imports=["re", "os"],
237
+ code_structure="Simple analysis with error handling",
238
+ specific_patterns=[],
239
+ expected_outputs=["Basic file information"],
240
+ error_handling="Comprehensive try-catch blocks"
241
+ )
242
+ }
243
+
244
+
245
+ def execute_guided_analysis(state: CodeAnalysisState) -> CodeAnalysisState:
246
+ """
247
+ Node 3: Execute the file analysis using the generated guidance.
248
+ This replaces the original agent with guided code generation.
249
+ """
250
+ file_path = state["file_path"]
251
+ file_examination = state["file_examination"]
252
+ code_guidance = state["code_guidance"]
253
+ analysis_query = state.get("analysis_query", "")
254
+
255
+ if not file_examination or not code_guidance:
256
+ return {
257
+ "execution_result": "❌ Missing examination or guidance data",
258
+ "final_analysis": "Analysis failed due to missing preliminary data"
259
+ }
260
+
261
+ try:
262
+ # Create the guided analysis query
263
+ guided_query = f"""
264
+ Based on the file examination and guidance, analyze this file with the following SPECIFIC instructions:
265
+
266
+ FILE CONTEXT:
267
+ - File Type: {file_examination.file_type}
268
+ - Structure: {file_examination.structure_pattern}
269
+ - Data Format: {file_examination.data_format}
270
+ - Complexity: {file_examination.complexity_level}
271
+
272
+ CODING GUIDANCE TO FOLLOW:
273
+ - Analysis Approach: {code_guidance.analysis_approach}
274
+ - Required Imports: {', '.join(code_guidance.required_imports)}
275
+ - Code Structure: {code_guidance.code_structure}
276
+ - Specific Patterns: {code_guidance.specific_patterns}
277
+ - Expected Outputs: {', '.join(code_guidance.expected_outputs)}
278
+ - Error Handling: {code_guidance.error_handling}
279
+
280
+ SAMPLE FILE STRUCTURE (first few lines):
281
+ ```
282
+ {chr(10).join(file_examination.sample_lines[:5])}
283
+ ```
284
+
285
+ USER REQUEST: {analysis_query or "Comprehensive analysis following the guidance above"}
286
+
287
+ INSTRUCTIONS:
288
+ 1. Follow the specified analysis approach exactly
289
+ 2. Import only the recommended libraries: {', '.join(code_guidance.required_imports)}
290
+ 3. Use the specific patterns provided: {code_guidance.specific_patterns}
291
+ 4. Structure your code following: {code_guidance.code_structure}
292
+ 5. Generate the expected outputs: {', '.join(code_guidance.expected_outputs)}
293
+ 6. Implement proper error handling: {code_guidance.error_handling}
294
+
295
+ Since you have detailed guidance about this specific file structure, your code should be highly accurate and efficient.
296
+ The file examination shows this is a {file_examination.file_type} with {file_examination.data_format} data format.
297
+
298
+ Write Python code that leverages this specific knowledge for optimal analysis.
299
+ """
300
+
301
+ print(f"πŸš€ Executing guided analysis...")
302
+ print(f" Using {len(code_guidance.required_imports)} specific imports")
303
+ print(f" Following {file_examination.complexity_level} complexity approach")
304
+
305
+ # Use the existing agent with the guided query
306
+ agent = create_analysis_agent(file_path, model)
307
+
308
+ async def run_guided_analysis():
309
+ result_parts = []
310
+ async for typ, chunk in agent.astream(
311
+ {"messages": guided_query},
312
+ stream_mode=["values", "messages"],
313
+ ):
314
+ if typ == "messages":
315
+ if hasattr(chunk[0], 'content') and chunk[0].content:
316
+ result_parts.append(chunk[0].content)
317
+ elif typ == "values":
318
+ if chunk and "messages" in chunk:
319
+ final_message = chunk["messages"][-1]
320
+ if hasattr(final_message, 'content') and final_message.content:
321
+ result_parts.append(f"\n\n=== FINAL ANALYSIS ===\n{final_message.content}")
322
+
323
+ return "\n".join(result_parts) if result_parts else "Analysis completed but no output generated."
324
+
325
+ # Run the analysis
326
+ execution_result = asyncio.run(run_guided_analysis())
327
+
328
+ # Create final analysis summary
329
+ final_analysis = f"""
330
+ === GUIDED FILE ANALYSIS RESULTS ===
331
+
332
+ File: {file_path}
333
+ Type: {file_examination.file_type} ({file_examination.data_format})
334
+ Approach: {code_guidance.analysis_approach}
335
+
336
+ {execution_result}
337
+
338
+ === ANALYSIS METADATA ===
339
+ - Examination guided approach: βœ…
340
+ - Specific patterns used: {len(code_guidance.specific_patterns)} patterns
341
+ - Complexity level: {file_examination.complexity_level}
342
+ - Guided imports: {', '.join(code_guidance.required_imports)}
343
+ """
344
+
345
+ print(f"βœ… Guided analysis completed successfully!")
346
+
347
+ return {
348
+ "execution_result": execution_result,
349
+ "final_analysis": final_analysis
350
+ }
351
+
352
+ except Exception as e:
353
+ error_msg = f"❌ Error in guided analysis execution: {str(e)}"
354
+ print(error_msg)
355
+ return {
356
+ "execution_result": error_msg,
357
+ "final_analysis": f"Analysis failed: {str(e)}"
358
+ }
359
+
360
+
361
+ def build_guided_analysis_graph():
362
+ """
363
+ Build the guided file analysis workflow graph.
364
+
365
+ The workflow:
366
+ 1. Examine file structure (first ~20 lines)
367
+ 2. Generate specific code guidance based on structure
368
+ 3. Execute analysis with improved guidance
369
+ """
370
+ builder = StateGraph(CodeAnalysisState)
371
+
372
+ # Add nodes
373
+ builder.add_node("examine_file_structure", examine_file_structure)
374
+ builder.add_node("generate_code_guidance", generate_code_guidance)
375
+ builder.add_node("execute_guided_analysis", execute_guided_analysis)
376
+
377
+ # Add edges - linear workflow
378
+ builder.add_edge(START, "examine_file_structure")
379
+ builder.add_edge("examine_file_structure", "generate_code_guidance")
380
+ builder.add_edge("generate_code_guidance", "execute_guided_analysis")
381
+ builder.add_edge("execute_guided_analysis", END)
382
+
383
+ return builder.compile()
384
+
385
+
386
+ # Create the graph instance
387
+ guided_analysis_graph = build_guided_analysis_graph()
388
+
389
+
390
+ def analyze_user_question(question: str) -> dict:
391
+ """
392
+ Analyze the user's question to understand their intent and focus areas.
393
+ This helps generate more targeted code guidance.
394
+ """
395
+ question_lower = question.lower()
396
+
397
+ # Determine primary intent
398
+ intent = "general"
399
+ if any(word in question_lower for word in ["security", "threat", "attack", "login", "auth", "breach", "suspicious"]):
400
+ intent = "security"
401
+ elif any(word in question_lower for word in ["performance", "slow", "fast", "speed", "time", "latency", "bottleneck"]):
402
+ intent = "performance"
403
+ elif any(word in question_lower for word in ["error", "exception", "fail", "problem", "issue", "bug"]):
404
+ intent = "error_analysis"
405
+ elif any(word in question_lower for word in ["pattern", "trend", "frequent", "common", "anomal", "unusual"]):
406
+ intent = "pattern_analysis"
407
+ elif any(word in question_lower for word in ["statistic", "count", "average", "distribution", "summary", "metrics"]):
408
+ intent = "statistical"
409
+ elif any(word in question_lower for word in ["time", "temporal", "timeline", "chronological", "over time"]):
410
+ intent = "temporal"
411
+
412
+ # Identify focus areas
413
+ focus_areas = []
414
+ if "ip" in question_lower or "address" in question_lower:
415
+ focus_areas.append("ip_analysis")
416
+ if "user" in question_lower or "account" in question_lower:
417
+ focus_areas.append("user_analysis")
418
+ if "endpoint" in question_lower or "api" in question_lower or "url" in question_lower:
419
+ focus_areas.append("endpoint_analysis")
420
+ if "database" in question_lower or "query" in question_lower or "db" in question_lower:
421
+ focus_areas.append("database_analysis")
422
+ if "network" in question_lower or "connection" in question_lower:
423
+ focus_areas.append("network_analysis")
424
+
425
+ # Determine analysis type
426
+ analysis_type = "comprehensive"
427
+ if any(word in question_lower for word in ["find", "identify", "detect", "search"]):
428
+ analysis_type = "detection"
429
+ elif any(word in question_lower for word in ["show", "list", "display", "get"]):
430
+ analysis_type = "extraction"
431
+ elif any(word in question_lower for word in ["analyze", "examine", "investigate"]):
432
+ analysis_type = "deep_analysis"
433
+ elif any(word in question_lower for word in ["count", "how many", "frequency"]):
434
+ analysis_type = "quantitative"
435
+ elif any(word in question_lower for word in ["compare", "correlation", "relationship"]):
436
+ analysis_type = "comparative"
437
+
438
+ # Extract key terms
439
+ key_terms = []
440
+ import re
441
+ # Extract quoted terms
442
+ quoted_terms = re.findall(r'"([^"]*)"', question)
443
+ key_terms.extend(quoted_terms)
444
+
445
+ # Extract technical terms
446
+ tech_terms = re.findall(r'\b(?:login|logout|auth|api|endpoint|database|query|ip|user|error|exception|timeout|response|request|status|code)\b', question_lower)
447
+ key_terms.extend(tech_terms)
448
+
449
+ return {
450
+ "intent": intent,
451
+ "focus_areas": focus_areas if focus_areas else ["general"],
452
+ "analysis_type": analysis_type,
453
+ "key_terms": list(set(key_terms)) # Remove duplicates
454
+ }
455
+
456
+
457
+ async def analyze_file_with_guidance(file_path: str, analysis_query: str = None) -> str:
458
+ """
459
+ Main function to analyze a file using the guided approach.
460
+
461
+ Args:
462
+ file_path: Path to the file to analyze
463
+ analysis_query: Optional specific analysis request
464
+
465
+ Returns:
466
+ Final analysis results
467
+ """
468
+ print(f"πŸ” Starting guided analysis for: {file_path}")
469
+
470
+ # Initialize state
471
+ initial_state = {
472
+ "file_path": file_path,
473
+ "analysis_query": analysis_query
474
+ }
475
+
476
+ # Run the graph
477
+ try:
478
+ final_state = await guided_analysis_graph.ainvoke(initial_state)
479
+ return final_state.get("final_analysis", "Analysis completed but no results generated.")
480
+ except Exception as e:
481
+ return f"❌ Guided analysis failed: {str(e)}"
482
+
483
+
484
+ def analyze_file_with_guidance_sync(file_path: str, analysis_query: str = None) -> str:
485
+ """
486
+ Synchronous wrapper for the guided analysis.
487
+ """
488
+ return asyncio.run(analyze_file_with_guidance(file_path, analysis_query))
489
+
490
+
491
+ # Example usage and testing
492
+ if __name__ == "__main__":
493
+ import sys
494
+
495
+ if len(sys.argv) > 1:
496
+ test_file_path = sys.argv[1]
497
+ test_query = sys.argv[2] if len(sys.argv) > 2 else None
498
+
499
+ print(f"πŸ§ͺ Testing guided analysis with: {test_file_path}")
500
+ if test_query:
501
+ print(f"πŸ“ Custom query: {test_query}")
502
+
503
+ result = analyze_file_with_guidance_sync(test_file_path, test_query)
504
+ print("\n" + "="*80)
505
+ print("GUIDED ANALYSIS RESULT:")
506
+ print("="*80)
507
+ print(result)
508
+ else:
509
+ print("Usage: python graph.py <file_path> [analysis_query]")
510
+ print("\nThis will run the guided analysis workflow that:")
511
+ print("1. πŸ“‹ Examines file structure (first ~20 lines)")
512
+ print("2. 🎯 Generates specific code guidance")
513
+ print("3. πŸš€ Executes analysis with improved context")