ai-puppy commited on
Commit
bb43287
Β·
1 Parent(s): 4be3026
Files changed (2) hide show
  1. app.py +43 -2
  2. graph_streaming.py +298 -0
app.py CHANGED
@@ -51,6 +51,7 @@ from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_cod
51
 
52
  # Import the new guided analysis functionality
53
  from graph import analyze_file_with_guidance_sync, guided_analysis_graph
 
54
 
55
  # Load environment variables
56
  load_dotenv(find_dotenv())
@@ -79,9 +80,49 @@ def handle_file_upload(file):
79
  uploaded_file_path = None
80
  return f"❌ Upload error: {str(e)}"
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def analyze_file_with_question(user_question):
83
  """
84
- Analyze the uploaded file using the new guided approach with user question
85
  """
86
  global uploaded_file_path, deno_available
87
 
@@ -191,7 +232,7 @@ with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
191
  )
192
 
193
  analyze_btn.click(
194
- fn=analyze_file_with_question,
195
  inputs=[user_question],
196
  outputs=[analysis_output]
197
  )
 
51
 
52
  # Import the new guided analysis functionality
53
  from graph import analyze_file_with_guidance_sync, guided_analysis_graph
54
+ from graph_streaming import streaming_analyze_file_with_guidance
55
 
56
  # Load environment variables
57
  load_dotenv(find_dotenv())
 
80
  uploaded_file_path = None
81
  return f"❌ Upload error: {str(e)}"
82
 
83
+ def streaming_analyze_file_with_question(user_question):
84
+ """
85
+ Streaming version that yields progress updates in real-time
86
+ """
87
+ global uploaded_file_path, deno_available
88
+
89
+ try:
90
+ if not uploaded_file_path or not os.path.exists(uploaded_file_path):
91
+ yield "❌ No file uploaded or file not found. Please upload a file first."
92
+ return
93
+
94
+ if not user_question or user_question.strip() == "":
95
+ user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
96
+
97
+ # Check if Deno is available for sandbox operations
98
+ if not deno_available:
99
+ yield """❌ Deno runtime not available. This is required for code execution in the sandbox.
100
+
101
+ πŸ“‹ Troubleshooting:
102
+ 1. This usually happens on deployment platforms that don't have Deno pre-installed
103
+ 2. The app attempted to install Deno automatically but failed
104
+ 3. Try restarting the space or contact support
105
+
106
+ πŸ”„ Alternative: You can still upload files, but advanced code analysis may be limited."""
107
+ return
108
+
109
+ # Use the streaming guided analysis approach
110
+ for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question):
111
+ yield chunk
112
+
113
+ except Exception as e:
114
+ error_msg = str(e)
115
+ if "Deno" in error_msg or "deno" in error_msg:
116
+ yield f"""❌ Deno-related error in analysis: {error_msg}
117
+
118
+ πŸ”§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
119
+ Try restarting the application or contact support if this persists."""
120
+ else:
121
+ yield f"❌ Error in guided analysis: {error_msg}"
122
+
123
  def analyze_file_with_question(user_question):
124
  """
125
+ Non-streaming version for backward compatibility
126
  """
127
  global uploaded_file_path, deno_available
128
 
 
232
  )
233
 
234
  analyze_btn.click(
235
+ fn=streaming_analyze_file_with_question,
236
  inputs=[user_question],
237
  outputs=[analysis_output]
238
  )
graph_streaming.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import ast
3
+ import os
4
+ import re
5
+ from typing import Annotated, Dict, List, Optional, Generator
6
+ from typing_extensions import TypedDict
7
+
8
+ from dotenv import find_dotenv, load_dotenv
9
+ from langchain.chat_models import init_chat_model
10
+ from langgraph.graph import END, START, StateGraph
11
+ from pydantic import BaseModel, Field
12
+
13
+ # Import your existing agent functionality
14
+ from agent import create_analysis_agent, FileInjectedPyodideSandbox, create_pyodide_eval_fn
15
+
16
+ load_dotenv(find_dotenv())
17
+
18
+ # Initialize the language model
19
+ model = init_chat_model(
20
+ model="gpt-4.1-2025-04-14",
21
+ api_key=os.getenv("OPENAI_API_KEY"),
22
+ )
23
+
24
+ # Import classes from original graph.py
25
+ from graph import FileExamination, CodeGuidance, CodeAnalysisState, validate_python_code, analyze_user_question
26
+
27
+ def streaming_analyze_file_with_guidance(file_path: str, analysis_query: str = None) -> Generator[str, None, str]:
28
+ """
29
+ Streaming version of guided file analysis that yields progress updates.
30
+
31
+ Args:
32
+ file_path: Path to the file to analyze
33
+ analysis_query: Optional specific analysis request
34
+
35
+ Yields:
36
+ Progress updates as strings
37
+
38
+ Returns:
39
+ Final analysis results
40
+ """
41
+ try:
42
+ yield "πŸ” **Starting Guided File Analysis...**\n\n"
43
+ yield f"πŸ“ **File:** `{os.path.basename(file_path)}`\n"
44
+ yield f"❓ **Question:** {analysis_query or 'Comprehensive analysis'}\n\n"
45
+
46
+ # Step 1: File Examination
47
+ yield "## πŸ“‹ Step 1: Examining File Structure\n\n"
48
+ yield "Reading first 20 lines to understand file format and patterns...\n\n"
49
+
50
+ file_examination = examine_file_structure_streaming(file_path)
51
+
52
+ yield f"βœ… **File Type Detected:** `{file_examination.file_type}`\n"
53
+ yield f"βœ… **Structure Pattern:** {file_examination.structure_pattern}\n"
54
+ yield f"βœ… **Data Format:** {file_examination.data_format}\n"
55
+ yield f"βœ… **Complexity:** {file_examination.complexity_level}\n"
56
+ yield f"βœ… **Key Patterns Found:** {len(file_examination.key_patterns)} patterns\n\n"
57
+
58
+ # Step 2: Code Guidance Generation
59
+ yield "## 🎯 Step 2: Generating Analysis Strategy\n\n"
60
+ yield "Creating specific code guidance based on file structure and your question...\n\n"
61
+
62
+ code_guidance = generate_code_guidance_streaming(file_examination, analysis_query)
63
+
64
+ yield f"βœ… **Analysis Approach:** {code_guidance.analysis_approach}\n"
65
+ yield f"βœ… **Required Imports:** {', '.join(code_guidance.required_imports)}\n"
66
+ yield f"βœ… **Specific Patterns:** {len(code_guidance.specific_patterns)} regex patterns ready\n"
67
+ yield f"βœ… **Expected Outputs:** {len(code_guidance.expected_outputs)} result types\n\n"
68
+
69
+ # Step 3: Code Execution
70
+ yield "## πŸš€ Step 3: Executing Analysis\n\n"
71
+ yield "Running guided code analysis with enhanced context...\n\n"
72
+
73
+ # Stream the execution results
74
+ execution_generator = execute_guided_analysis_streaming(file_path, file_examination, code_guidance, analysis_query)
75
+
76
+ execution_results = []
77
+ for chunk in execution_generator:
78
+ yield chunk
79
+ execution_results.append(chunk)
80
+
81
+ # Final Summary
82
+ yield "\n\n## βœ… Analysis Complete!\n\n"
83
+
84
+ final_analysis = f"""### πŸ“Š **Analysis Summary**
85
+
86
+ **File:** `{os.path.basename(file_path)}`
87
+ **Type:** {file_examination.file_type} ({file_examination.data_format})
88
+ **Approach:** {code_guidance.analysis_approach}
89
+ **Complexity:** {file_examination.complexity_level}
90
+
91
+ **Guided Features Used:**
92
+ - βœ… Structure-aware examination
93
+ - βœ… Question-specific code generation
94
+ - βœ… {len(code_guidance.specific_patterns)} targeted patterns
95
+ - βœ… Enhanced error handling
96
+
97
+ ---
98
+
99
+ {''.join(execution_results)}
100
+ """
101
+
102
+ yield final_analysis
103
+ return final_analysis
104
+
105
+ except Exception as e:
106
+ error_msg = f"❌ **Error in guided analysis:** {str(e)}\n\n"
107
+ yield error_msg
108
+ return error_msg
109
+
110
+ def examine_file_structure_streaming(file_path: str) -> FileExamination:
111
+ """Examine file structure with minimal processing for streaming."""
112
+ try:
113
+ if not os.path.exists(file_path):
114
+ return FileExamination(
115
+ file_type="error",
116
+ structure_pattern="File not found",
117
+ sample_lines=[],
118
+ key_patterns=[],
119
+ data_format="unknown",
120
+ complexity_level="Simple"
121
+ )
122
+
123
+ # Read first 20 lines
124
+ with open(file_path, 'r', encoding='utf-8') as f:
125
+ sample_lines = []
126
+ for i, line in enumerate(f):
127
+ if i >= 20:
128
+ break
129
+ sample_lines.append(line.rstrip('\n\r'))
130
+
131
+ if not sample_lines:
132
+ sample_lines = ["<empty file>"]
133
+
134
+ # Quick analysis based on file extension and content
135
+ file_ext = os.path.splitext(file_path)[1].lower()
136
+ first_lines_text = '\n'.join(sample_lines[:5])
137
+
138
+ # Simple file type detection
139
+ if file_ext in ['.log', '.txt']:
140
+ if 'ERROR' in first_lines_text or 'WARN' in first_lines_text:
141
+ file_type = "application_log"
142
+ structure_pattern = "Log entries with timestamps and severity levels"
143
+ key_patterns = ["timestamp", "log_level", "error_codes"]
144
+ else:
145
+ file_type = "text_log"
146
+ structure_pattern = "Plain text with line-based entries"
147
+ key_patterns = ["timestamps", "text_patterns"]
148
+ elif file_ext == '.csv':
149
+ file_type = "csv_data"
150
+ structure_pattern = "Comma-separated values with headers"
151
+ key_patterns = ["column_headers", "data_rows"]
152
+ elif file_ext == '.json':
153
+ file_type = "json_data"
154
+ structure_pattern = "Structured JSON data"
155
+ key_patterns = ["json_objects", "nested_data"]
156
+ else:
157
+ file_type = "generic_file"
158
+ structure_pattern = "Unknown structure"
159
+ key_patterns = ["general_patterns"]
160
+
161
+ return FileExamination(
162
+ file_type=file_type,
163
+ structure_pattern=structure_pattern,
164
+ sample_lines=sample_lines,
165
+ key_patterns=key_patterns,
166
+ data_format="structured" if file_ext in ['.csv', '.json'] else "unstructured",
167
+ complexity_level="Medium"
168
+ )
169
+
170
+ except Exception as e:
171
+ return FileExamination(
172
+ file_type="error",
173
+ structure_pattern=f"Error reading file: {str(e)}",
174
+ sample_lines=[],
175
+ key_patterns=[],
176
+ data_format="unknown",
177
+ complexity_level="Simple"
178
+ )
179
+
180
+ def generate_code_guidance_streaming(file_examination: FileExamination, analysis_query: str = None) -> CodeGuidance:
181
+ """Generate code guidance with quick processing for streaming."""
182
+
183
+ if not file_examination or file_examination.file_type == "error":
184
+ return CodeGuidance(
185
+ analysis_approach="Basic file analysis with error handling",
186
+ required_imports=["re", "os"],
187
+ code_structure="1. Check file exists\n2. Basic error handling\n3. Simple output",
188
+ specific_patterns=[],
189
+ expected_outputs=["Error message"],
190
+ error_handling="Try-catch with informative errors"
191
+ )
192
+
193
+ # Quick guidance based on file type
194
+ if "log" in file_examination.file_type:
195
+ approach = "Log file analysis with pattern matching"
196
+ imports = ["re", "datetime", "collections"]
197
+ patterns = [r'\d{4}-\d{2}-\d{2}', r'ERROR|WARN|INFO', r'\d+\.\d+\.\d+\.\d+']
198
+ outputs = ["Error counts", "Timeline analysis", "IP addresses"]
199
+ elif "csv" in file_examination.file_type:
200
+ approach = "CSV data analysis with statistical insights"
201
+ imports = ["pandas", "numpy", "re"]
202
+ patterns = [r'^\w+,', r'\d+', r'\w+@\w+']
203
+ outputs = ["Data summary", "Column analysis", "Statistics"]
204
+ elif "json" in file_examination.file_type:
205
+ approach = "JSON structure analysis and data extraction"
206
+ imports = ["json", "re", "collections"]
207
+ patterns = [r'"[\w]+":', r'\{.*\}', r'\[.*\]']
208
+ outputs = ["Structure overview", "Key extraction", "Value analysis"]
209
+ else:
210
+ approach = "General text analysis with pattern detection"
211
+ imports = ["re", "collections", "os"]
212
+ patterns = [r'\w+', r'\d+', r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}']
213
+ outputs = ["Pattern summary", "Content analysis", "Statistics"]
214
+
215
+ return CodeGuidance(
216
+ analysis_approach=approach,
217
+ required_imports=imports,
218
+ code_structure=f"1. Load and validate file\n2. Apply {len(patterns)} specific patterns\n3. Generate insights\n4. Format results",
219
+ specific_patterns=patterns,
220
+ expected_outputs=outputs,
221
+ error_handling="Comprehensive error handling with informative messages"
222
+ )
223
+
224
+ def execute_guided_analysis_streaming(file_path: str, file_examination: FileExamination,
225
+ code_guidance: CodeGuidance, analysis_query: str = None) -> Generator[str, None, None]:
226
+ """Execute the analysis with streaming progress updates."""
227
+
228
+ try:
229
+ yield "### πŸ”„ **Initializing Analysis Environment**\n\n"
230
+
231
+ # Create analysis agent
232
+ try:
233
+ model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
234
+ agent = create_analysis_agent(file_path, model)
235
+ yield "βœ… Analysis agent initialized successfully\n\n"
236
+ except Exception as e:
237
+ yield f"❌ Failed to initialize agent: {str(e)}\n\n"
238
+ return
239
+
240
+ yield "### πŸ“ **Generating Analysis Code**\n\n"
241
+
242
+ # Create analysis prompt
243
+ user_analysis = analyze_user_question(analysis_query or "Comprehensive analysis")
244
+
245
+ analysis_prompt = f"""
246
+ Analyze the uploaded file based on this guidance:
247
+
248
+ **File Information:**
249
+ - Type: {file_examination.file_type}
250
+ - Structure: {file_examination.structure_pattern}
251
+ - Format: {file_examination.data_format}
252
+ - Complexity: {file_examination.complexity_level}
253
+
254
+ **User Question Analysis:**
255
+ - Intent: {user_analysis['intent']}
256
+ - Focus Areas: {user_analysis['focus_areas']}
257
+ - Analysis Type: {user_analysis['analysis_type']}
258
+
259
+ **Generated Guidance:**
260
+ - Approach: {code_guidance.analysis_approach}
261
+ - Required Imports: {code_guidance.required_imports}
262
+ - Patterns to Use: {code_guidance.specific_patterns}
263
+ - Expected Outputs: {code_guidance.expected_outputs}
264
+
265
+ **User's Specific Question:** {analysis_query or 'Provide comprehensive analysis'}
266
+
267
+ Please write Python code that follows this guidance and analyzes the file. The file is available at the virtual path '/uploaded_file.log'.
268
+ """
269
+
270
+ yield "βœ… Analysis prompt prepared\n\n"
271
+ yield "### ⚑ **Running AI Analysis**\n\n"
272
+
273
+ # Execute analysis
274
+ try:
275
+ async def run_analysis():
276
+ result = await agent.ainvoke({"messages": [{"role": "user", "content": analysis_prompt}]})
277
+ return result
278
+
279
+ yield "πŸ€– AI model is analyzing your file...\n\n"
280
+ result = asyncio.run(run_analysis())
281
+
282
+ # Extract the final message
283
+ if result and "messages" in result:
284
+ final_message = result["messages"][-1]
285
+ if hasattr(final_message, 'content'):
286
+ yield "### πŸ“Š **Analysis Results**\n\n"
287
+ yield final_message.content
288
+ yield "\n\n"
289
+ else:
290
+ yield "❌ No content in analysis result\n\n"
291
+ else:
292
+ yield "❌ Invalid analysis result format\n\n"
293
+
294
+ except Exception as e:
295
+ yield f"❌ Error during analysis execution: {str(e)}\n\n"
296
+
297
+ except Exception as e:
298
+ yield f"❌ Error in analysis setup: {str(e)}\n\n"