Spaces:
Running
Running
ai-puppy
commited on
Commit
Β·
bb43287
1
Parent(s):
4be3026
save
Browse files- app.py +43 -2
- graph_streaming.py +298 -0
app.py
CHANGED
@@ -51,6 +51,7 @@ from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_cod
|
|
51 |
|
52 |
# Import the new guided analysis functionality
|
53 |
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
|
|
|
54 |
|
55 |
# Load environment variables
|
56 |
load_dotenv(find_dotenv())
|
@@ -79,9 +80,49 @@ def handle_file_upload(file):
|
|
79 |
uploaded_file_path = None
|
80 |
return f"β Upload error: {str(e)}"
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def analyze_file_with_question(user_question):
|
83 |
"""
|
84 |
-
|
85 |
"""
|
86 |
global uploaded_file_path, deno_available
|
87 |
|
@@ -191,7 +232,7 @@ with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
|
|
191 |
)
|
192 |
|
193 |
analyze_btn.click(
|
194 |
-
fn=
|
195 |
inputs=[user_question],
|
196 |
outputs=[analysis_output]
|
197 |
)
|
|
|
51 |
|
52 |
# Import the new guided analysis functionality
|
53 |
from graph import analyze_file_with_guidance_sync, guided_analysis_graph
|
54 |
+
from graph_streaming import streaming_analyze_file_with_guidance
|
55 |
|
56 |
# Load environment variables
|
57 |
load_dotenv(find_dotenv())
|
|
|
80 |
uploaded_file_path = None
|
81 |
return f"β Upload error: {str(e)}"
|
82 |
|
83 |
+
def streaming_analyze_file_with_question(user_question):
|
84 |
+
"""
|
85 |
+
Streaming version that yields progress updates in real-time
|
86 |
+
"""
|
87 |
+
global uploaded_file_path, deno_available
|
88 |
+
|
89 |
+
try:
|
90 |
+
if not uploaded_file_path or not os.path.exists(uploaded_file_path):
|
91 |
+
yield "β No file uploaded or file not found. Please upload a file first."
|
92 |
+
return
|
93 |
+
|
94 |
+
if not user_question or user_question.strip() == "":
|
95 |
+
user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
|
96 |
+
|
97 |
+
# Check if Deno is available for sandbox operations
|
98 |
+
if not deno_available:
|
99 |
+
yield """β Deno runtime not available. This is required for code execution in the sandbox.
|
100 |
+
|
101 |
+
π Troubleshooting:
|
102 |
+
1. This usually happens on deployment platforms that don't have Deno pre-installed
|
103 |
+
2. The app attempted to install Deno automatically but failed
|
104 |
+
3. Try restarting the space or contact support
|
105 |
+
|
106 |
+
π Alternative: You can still upload files, but advanced code analysis may be limited."""
|
107 |
+
return
|
108 |
+
|
109 |
+
# Use the streaming guided analysis approach
|
110 |
+
for chunk in streaming_analyze_file_with_guidance(uploaded_file_path, user_question):
|
111 |
+
yield chunk
|
112 |
+
|
113 |
+
except Exception as e:
|
114 |
+
error_msg = str(e)
|
115 |
+
if "Deno" in error_msg or "deno" in error_msg:
|
116 |
+
yield f"""β Deno-related error in analysis: {error_msg}
|
117 |
+
|
118 |
+
π§ This appears to be a Deno runtime issue. The sandbox requires Deno for code execution.
|
119 |
+
Try restarting the application or contact support if this persists."""
|
120 |
+
else:
|
121 |
+
yield f"β Error in guided analysis: {error_msg}"
|
122 |
+
|
123 |
def analyze_file_with_question(user_question):
|
124 |
"""
|
125 |
+
Non-streaming version for backward compatibility
|
126 |
"""
|
127 |
global uploaded_file_path, deno_available
|
128 |
|
|
|
232 |
)
|
233 |
|
234 |
analyze_btn.click(
|
235 |
+
fn=streaming_analyze_file_with_question,
|
236 |
inputs=[user_question],
|
237 |
outputs=[analysis_output]
|
238 |
)
|
graph_streaming.py
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import ast
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
from typing import Annotated, Dict, List, Optional, Generator
|
6 |
+
from typing_extensions import TypedDict
|
7 |
+
|
8 |
+
from dotenv import find_dotenv, load_dotenv
|
9 |
+
from langchain.chat_models import init_chat_model
|
10 |
+
from langgraph.graph import END, START, StateGraph
|
11 |
+
from pydantic import BaseModel, Field
|
12 |
+
|
13 |
+
# Import your existing agent functionality
|
14 |
+
from agent import create_analysis_agent, FileInjectedPyodideSandbox, create_pyodide_eval_fn
|
15 |
+
|
16 |
+
load_dotenv(find_dotenv())
|
17 |
+
|
18 |
+
# Initialize the language model
|
19 |
+
model = init_chat_model(
|
20 |
+
model="gpt-4.1-2025-04-14",
|
21 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
22 |
+
)
|
23 |
+
|
24 |
+
# Import classes from original graph.py
|
25 |
+
from graph import FileExamination, CodeGuidance, CodeAnalysisState, validate_python_code, analyze_user_question
|
26 |
+
|
27 |
+
def streaming_analyze_file_with_guidance(file_path: str, analysis_query: str = None) -> Generator[str, None, str]:
|
28 |
+
"""
|
29 |
+
Streaming version of guided file analysis that yields progress updates.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
file_path: Path to the file to analyze
|
33 |
+
analysis_query: Optional specific analysis request
|
34 |
+
|
35 |
+
Yields:
|
36 |
+
Progress updates as strings
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
Final analysis results
|
40 |
+
"""
|
41 |
+
try:
|
42 |
+
yield "π **Starting Guided File Analysis...**\n\n"
|
43 |
+
yield f"π **File:** `{os.path.basename(file_path)}`\n"
|
44 |
+
yield f"β **Question:** {analysis_query or 'Comprehensive analysis'}\n\n"
|
45 |
+
|
46 |
+
# Step 1: File Examination
|
47 |
+
yield "## π Step 1: Examining File Structure\n\n"
|
48 |
+
yield "Reading first 20 lines to understand file format and patterns...\n\n"
|
49 |
+
|
50 |
+
file_examination = examine_file_structure_streaming(file_path)
|
51 |
+
|
52 |
+
yield f"β
**File Type Detected:** `{file_examination.file_type}`\n"
|
53 |
+
yield f"β
**Structure Pattern:** {file_examination.structure_pattern}\n"
|
54 |
+
yield f"β
**Data Format:** {file_examination.data_format}\n"
|
55 |
+
yield f"β
**Complexity:** {file_examination.complexity_level}\n"
|
56 |
+
yield f"β
**Key Patterns Found:** {len(file_examination.key_patterns)} patterns\n\n"
|
57 |
+
|
58 |
+
# Step 2: Code Guidance Generation
|
59 |
+
yield "## π― Step 2: Generating Analysis Strategy\n\n"
|
60 |
+
yield "Creating specific code guidance based on file structure and your question...\n\n"
|
61 |
+
|
62 |
+
code_guidance = generate_code_guidance_streaming(file_examination, analysis_query)
|
63 |
+
|
64 |
+
yield f"β
**Analysis Approach:** {code_guidance.analysis_approach}\n"
|
65 |
+
yield f"β
**Required Imports:** {', '.join(code_guidance.required_imports)}\n"
|
66 |
+
yield f"β
**Specific Patterns:** {len(code_guidance.specific_patterns)} regex patterns ready\n"
|
67 |
+
yield f"β
**Expected Outputs:** {len(code_guidance.expected_outputs)} result types\n\n"
|
68 |
+
|
69 |
+
# Step 3: Code Execution
|
70 |
+
yield "## π Step 3: Executing Analysis\n\n"
|
71 |
+
yield "Running guided code analysis with enhanced context...\n\n"
|
72 |
+
|
73 |
+
# Stream the execution results
|
74 |
+
execution_generator = execute_guided_analysis_streaming(file_path, file_examination, code_guidance, analysis_query)
|
75 |
+
|
76 |
+
execution_results = []
|
77 |
+
for chunk in execution_generator:
|
78 |
+
yield chunk
|
79 |
+
execution_results.append(chunk)
|
80 |
+
|
81 |
+
# Final Summary
|
82 |
+
yield "\n\n## β
Analysis Complete!\n\n"
|
83 |
+
|
84 |
+
final_analysis = f"""### π **Analysis Summary**
|
85 |
+
|
86 |
+
**File:** `{os.path.basename(file_path)}`
|
87 |
+
**Type:** {file_examination.file_type} ({file_examination.data_format})
|
88 |
+
**Approach:** {code_guidance.analysis_approach}
|
89 |
+
**Complexity:** {file_examination.complexity_level}
|
90 |
+
|
91 |
+
**Guided Features Used:**
|
92 |
+
- β
Structure-aware examination
|
93 |
+
- β
Question-specific code generation
|
94 |
+
- β
{len(code_guidance.specific_patterns)} targeted patterns
|
95 |
+
- β
Enhanced error handling
|
96 |
+
|
97 |
+
---
|
98 |
+
|
99 |
+
{''.join(execution_results)}
|
100 |
+
"""
|
101 |
+
|
102 |
+
yield final_analysis
|
103 |
+
return final_analysis
|
104 |
+
|
105 |
+
except Exception as e:
|
106 |
+
error_msg = f"β **Error in guided analysis:** {str(e)}\n\n"
|
107 |
+
yield error_msg
|
108 |
+
return error_msg
|
109 |
+
|
110 |
+
def examine_file_structure_streaming(file_path: str) -> FileExamination:
|
111 |
+
"""Examine file structure with minimal processing for streaming."""
|
112 |
+
try:
|
113 |
+
if not os.path.exists(file_path):
|
114 |
+
return FileExamination(
|
115 |
+
file_type="error",
|
116 |
+
structure_pattern="File not found",
|
117 |
+
sample_lines=[],
|
118 |
+
key_patterns=[],
|
119 |
+
data_format="unknown",
|
120 |
+
complexity_level="Simple"
|
121 |
+
)
|
122 |
+
|
123 |
+
# Read first 20 lines
|
124 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
125 |
+
sample_lines = []
|
126 |
+
for i, line in enumerate(f):
|
127 |
+
if i >= 20:
|
128 |
+
break
|
129 |
+
sample_lines.append(line.rstrip('\n\r'))
|
130 |
+
|
131 |
+
if not sample_lines:
|
132 |
+
sample_lines = ["<empty file>"]
|
133 |
+
|
134 |
+
# Quick analysis based on file extension and content
|
135 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
136 |
+
first_lines_text = '\n'.join(sample_lines[:5])
|
137 |
+
|
138 |
+
# Simple file type detection
|
139 |
+
if file_ext in ['.log', '.txt']:
|
140 |
+
if 'ERROR' in first_lines_text or 'WARN' in first_lines_text:
|
141 |
+
file_type = "application_log"
|
142 |
+
structure_pattern = "Log entries with timestamps and severity levels"
|
143 |
+
key_patterns = ["timestamp", "log_level", "error_codes"]
|
144 |
+
else:
|
145 |
+
file_type = "text_log"
|
146 |
+
structure_pattern = "Plain text with line-based entries"
|
147 |
+
key_patterns = ["timestamps", "text_patterns"]
|
148 |
+
elif file_ext == '.csv':
|
149 |
+
file_type = "csv_data"
|
150 |
+
structure_pattern = "Comma-separated values with headers"
|
151 |
+
key_patterns = ["column_headers", "data_rows"]
|
152 |
+
elif file_ext == '.json':
|
153 |
+
file_type = "json_data"
|
154 |
+
structure_pattern = "Structured JSON data"
|
155 |
+
key_patterns = ["json_objects", "nested_data"]
|
156 |
+
else:
|
157 |
+
file_type = "generic_file"
|
158 |
+
structure_pattern = "Unknown structure"
|
159 |
+
key_patterns = ["general_patterns"]
|
160 |
+
|
161 |
+
return FileExamination(
|
162 |
+
file_type=file_type,
|
163 |
+
structure_pattern=structure_pattern,
|
164 |
+
sample_lines=sample_lines,
|
165 |
+
key_patterns=key_patterns,
|
166 |
+
data_format="structured" if file_ext in ['.csv', '.json'] else "unstructured",
|
167 |
+
complexity_level="Medium"
|
168 |
+
)
|
169 |
+
|
170 |
+
except Exception as e:
|
171 |
+
return FileExamination(
|
172 |
+
file_type="error",
|
173 |
+
structure_pattern=f"Error reading file: {str(e)}",
|
174 |
+
sample_lines=[],
|
175 |
+
key_patterns=[],
|
176 |
+
data_format="unknown",
|
177 |
+
complexity_level="Simple"
|
178 |
+
)
|
179 |
+
|
180 |
+
def generate_code_guidance_streaming(file_examination: FileExamination, analysis_query: str = None) -> CodeGuidance:
|
181 |
+
"""Generate code guidance with quick processing for streaming."""
|
182 |
+
|
183 |
+
if not file_examination or file_examination.file_type == "error":
|
184 |
+
return CodeGuidance(
|
185 |
+
analysis_approach="Basic file analysis with error handling",
|
186 |
+
required_imports=["re", "os"],
|
187 |
+
code_structure="1. Check file exists\n2. Basic error handling\n3. Simple output",
|
188 |
+
specific_patterns=[],
|
189 |
+
expected_outputs=["Error message"],
|
190 |
+
error_handling="Try-catch with informative errors"
|
191 |
+
)
|
192 |
+
|
193 |
+
# Quick guidance based on file type
|
194 |
+
if "log" in file_examination.file_type:
|
195 |
+
approach = "Log file analysis with pattern matching"
|
196 |
+
imports = ["re", "datetime", "collections"]
|
197 |
+
patterns = [r'\d{4}-\d{2}-\d{2}', r'ERROR|WARN|INFO', r'\d+\.\d+\.\d+\.\d+']
|
198 |
+
outputs = ["Error counts", "Timeline analysis", "IP addresses"]
|
199 |
+
elif "csv" in file_examination.file_type:
|
200 |
+
approach = "CSV data analysis with statistical insights"
|
201 |
+
imports = ["pandas", "numpy", "re"]
|
202 |
+
patterns = [r'^\w+,', r'\d+', r'\w+@\w+']
|
203 |
+
outputs = ["Data summary", "Column analysis", "Statistics"]
|
204 |
+
elif "json" in file_examination.file_type:
|
205 |
+
approach = "JSON structure analysis and data extraction"
|
206 |
+
imports = ["json", "re", "collections"]
|
207 |
+
patterns = [r'"[\w]+":', r'\{.*\}', r'\[.*\]']
|
208 |
+
outputs = ["Structure overview", "Key extraction", "Value analysis"]
|
209 |
+
else:
|
210 |
+
approach = "General text analysis with pattern detection"
|
211 |
+
imports = ["re", "collections", "os"]
|
212 |
+
patterns = [r'\w+', r'\d+', r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}']
|
213 |
+
outputs = ["Pattern summary", "Content analysis", "Statistics"]
|
214 |
+
|
215 |
+
return CodeGuidance(
|
216 |
+
analysis_approach=approach,
|
217 |
+
required_imports=imports,
|
218 |
+
code_structure=f"1. Load and validate file\n2. Apply {len(patterns)} specific patterns\n3. Generate insights\n4. Format results",
|
219 |
+
specific_patterns=patterns,
|
220 |
+
expected_outputs=outputs,
|
221 |
+
error_handling="Comprehensive error handling with informative messages"
|
222 |
+
)
|
223 |
+
|
224 |
+
def execute_guided_analysis_streaming(file_path: str, file_examination: FileExamination,
|
225 |
+
code_guidance: CodeGuidance, analysis_query: str = None) -> Generator[str, None, None]:
|
226 |
+
"""Execute the analysis with streaming progress updates."""
|
227 |
+
|
228 |
+
try:
|
229 |
+
yield "### π **Initializing Analysis Environment**\n\n"
|
230 |
+
|
231 |
+
# Create analysis agent
|
232 |
+
try:
|
233 |
+
model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
|
234 |
+
agent = create_analysis_agent(file_path, model)
|
235 |
+
yield "β
Analysis agent initialized successfully\n\n"
|
236 |
+
except Exception as e:
|
237 |
+
yield f"β Failed to initialize agent: {str(e)}\n\n"
|
238 |
+
return
|
239 |
+
|
240 |
+
yield "### π **Generating Analysis Code**\n\n"
|
241 |
+
|
242 |
+
# Create analysis prompt
|
243 |
+
user_analysis = analyze_user_question(analysis_query or "Comprehensive analysis")
|
244 |
+
|
245 |
+
analysis_prompt = f"""
|
246 |
+
Analyze the uploaded file based on this guidance:
|
247 |
+
|
248 |
+
**File Information:**
|
249 |
+
- Type: {file_examination.file_type}
|
250 |
+
- Structure: {file_examination.structure_pattern}
|
251 |
+
- Format: {file_examination.data_format}
|
252 |
+
- Complexity: {file_examination.complexity_level}
|
253 |
+
|
254 |
+
**User Question Analysis:**
|
255 |
+
- Intent: {user_analysis['intent']}
|
256 |
+
- Focus Areas: {user_analysis['focus_areas']}
|
257 |
+
- Analysis Type: {user_analysis['analysis_type']}
|
258 |
+
|
259 |
+
**Generated Guidance:**
|
260 |
+
- Approach: {code_guidance.analysis_approach}
|
261 |
+
- Required Imports: {code_guidance.required_imports}
|
262 |
+
- Patterns to Use: {code_guidance.specific_patterns}
|
263 |
+
- Expected Outputs: {code_guidance.expected_outputs}
|
264 |
+
|
265 |
+
**User's Specific Question:** {analysis_query or 'Provide comprehensive analysis'}
|
266 |
+
|
267 |
+
Please write Python code that follows this guidance and analyzes the file. The file is available at the virtual path '/uploaded_file.log'.
|
268 |
+
"""
|
269 |
+
|
270 |
+
yield "β
Analysis prompt prepared\n\n"
|
271 |
+
yield "### β‘ **Running AI Analysis**\n\n"
|
272 |
+
|
273 |
+
# Execute analysis
|
274 |
+
try:
|
275 |
+
async def run_analysis():
|
276 |
+
result = await agent.ainvoke({"messages": [{"role": "user", "content": analysis_prompt}]})
|
277 |
+
return result
|
278 |
+
|
279 |
+
yield "π€ AI model is analyzing your file...\n\n"
|
280 |
+
result = asyncio.run(run_analysis())
|
281 |
+
|
282 |
+
# Extract the final message
|
283 |
+
if result and "messages" in result:
|
284 |
+
final_message = result["messages"][-1]
|
285 |
+
if hasattr(final_message, 'content'):
|
286 |
+
yield "### π **Analysis Results**\n\n"
|
287 |
+
yield final_message.content
|
288 |
+
yield "\n\n"
|
289 |
+
else:
|
290 |
+
yield "β No content in analysis result\n\n"
|
291 |
+
else:
|
292 |
+
yield "β Invalid analysis result format\n\n"
|
293 |
+
|
294 |
+
except Exception as e:
|
295 |
+
yield f"β Error during analysis execution: {str(e)}\n\n"
|
296 |
+
|
297 |
+
except Exception as e:
|
298 |
+
yield f"β Error in analysis setup: {str(e)}\n\n"
|