ai-puppy commited on
Commit
b8d6d6e
·
2 Parent(s): 5268062 c7ebfd3

Merge branch 'main' of https://huggingface.co/spaces/Agents-MCP-Hackathon/DataForge

Browse files
Files changed (2) hide show
  1. app.py +128 -262
  2. graph.py +73 -8
app.py CHANGED
@@ -4,9 +4,7 @@ import asyncio
4
  import tempfile
5
  from dotenv import find_dotenv, load_dotenv
6
  from langchain.chat_models import init_chat_model
7
- from langchain.schema import HumanMessage, SystemMessage
8
- from langgraph.prebuilt import create_react_agent
9
- from langsmith import traceable
10
 
11
  # Import the CodeAct agent functionality
12
  from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
@@ -17,77 +15,29 @@ from graph import analyze_file_with_guidance_sync, guided_analysis_graph
17
  # Load environment variables
18
  load_dotenv(find_dotenv())
19
 
20
- # Initialize OpenAI model
21
- openai_model = init_chat_model(
22
- model="gpt-4.1-nano-2025-04-14",
23
- api_key=os.getenv("OPENAI_API_KEY"),
24
- )
25
-
26
- # Create the basic chat agent
27
- chat_agent = create_react_agent(openai_model, tools=[])
28
-
29
- # Initialize CodeAct model for file analysis
30
  codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
31
 
32
  # Store uploaded file path globally
33
  uploaded_file_path = None
34
 
35
- @traceable
36
- def respond(
37
- message,
38
- history: list[tuple[str, str]],
39
- system_message,
40
- max_tokens,
41
- temperature,
42
- top_p,
43
- ):
44
- """
45
- Main chat function that processes user input and returns AI response
46
- """
47
- try:
48
- # Convert history to LangChain message format
49
- messages = [SystemMessage(content=system_message)]
50
-
51
- # Add conversation history
52
- for user_msg, assistant_msg in history:
53
- if user_msg:
54
- messages.append(HumanMessage(content=user_msg))
55
- if assistant_msg:
56
- messages.append(SystemMessage(content=assistant_msg))
57
-
58
- # Add current user message
59
- messages.append(HumanMessage(content=message))
60
-
61
- # Prepare input for the agent
62
- input_data = {"messages": messages}
63
-
64
- # Stream the response
65
- response_text = ""
66
- for chunk in chat_agent.stream(input_data, stream_mode="values"):
67
- if "messages" in chunk and chunk["messages"]:
68
- latest_message = chunk["messages"][-1]
69
- if hasattr(latest_message, 'content'):
70
- current_content = latest_message.content
71
- if current_content and len(current_content) > len(response_text):
72
- response_text = current_content
73
- yield response_text
74
-
75
- # Ensure we return something even if streaming doesn't work
76
- if not response_text:
77
- yield "I'm sorry, I couldn't process your message. Please check your OpenAI API key."
78
-
79
- except Exception as e:
80
- yield f"Error: {str(e)}. Please make sure your OpenAI API key is set correctly."
81
 
82
  def handle_file_upload(file):
83
  """Handle file upload and store the path globally"""
84
  global uploaded_file_path
85
- if file is not None:
86
- uploaded_file_path = file.name
87
- return f" File uploaded successfully: {os.path.basename(file.name)}"
88
- else:
 
 
 
 
 
 
89
  uploaded_file_path = None
90
- return "❌ No file uploaded"
91
 
92
  def analyze_file_with_question(user_question):
93
  """
@@ -95,13 +45,13 @@ def analyze_file_with_question(user_question):
95
  """
96
  global uploaded_file_path
97
 
98
- if not uploaded_file_path or not os.path.exists(uploaded_file_path):
99
- return "❌ No file uploaded or file not found. Please upload a file first."
100
-
101
- if not user_question or user_question.strip() == "":
102
- user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
103
-
104
  try:
 
 
 
 
 
 
105
  # Use the new guided analysis approach
106
  result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
107
  return result
@@ -109,45 +59,6 @@ def analyze_file_with_question(user_question):
109
  except Exception as e:
110
  return f"❌ Error in guided analysis: {str(e)}"
111
 
112
- def get_question_suggestions(file_path):
113
- """
114
- Generate suggested questions based on file type and structure
115
- """
116
- if not file_path or not os.path.exists(file_path):
117
- return []
118
-
119
- file_ext = os.path.splitext(file_path)[1].lower()
120
- base_suggestions = [
121
- "What are the main patterns in this file?",
122
- "Are there any security issues or anomalies?",
123
- "Provide a statistical summary of the data",
124
- "What insights can you extract from this file?"
125
- ]
126
-
127
- if file_ext in ['.log', '.txt']:
128
- return [
129
- "Find any security threats or failed login attempts",
130
- "Identify performance bottlenecks and slow operations",
131
- "What errors or warnings are present?",
132
- "Show me time-based trends in the data",
133
- "Are there any suspicious IP addresses or user activities?"
134
- ] + base_suggestions
135
- elif file_ext == '.csv':
136
- return [
137
- "Analyze the data distribution and statistics",
138
- "Find correlations between columns",
139
- "Identify outliers or anomalies in the data",
140
- "What are the key insights from this dataset?"
141
- ] + base_suggestions
142
- elif file_ext == '.json':
143
- return [
144
- "Parse and analyze the JSON structure",
145
- "What are the key data fields and their values?",
146
- "Find any nested patterns or relationships"
147
- ] + base_suggestions
148
- else:
149
- return base_suggestions
150
-
151
  async def analyze_uploaded_file():
152
  """Legacy function - kept for backward compatibility"""
153
  return analyze_file_with_question("Provide a comprehensive analysis of this file.")
@@ -156,167 +67,122 @@ def run_file_analysis():
156
  """Wrapper to run async file analysis in sync context"""
157
  return asyncio.run(analyze_uploaded_file())
158
 
159
- def update_question_suggestions():
160
- """Update question suggestions based on uploaded file"""
161
- global uploaded_file_path
162
- suggestions = get_question_suggestions(uploaded_file_path)
163
- return gr.Dropdown.update(choices=suggestions, value=suggestions[0] if suggestions else "")
164
-
165
  # Create the Gradio interface
166
- with gr.Blocks(title="DataForge - AI Assistant with Advanced File Analysis") as demo:
167
- gr.Markdown("# 🔍 DataForge - AI Assistant with Advanced File Analysis")
168
- gr.Markdown("Upload files and ask specific questions for AI-powered guided analysis using LangGraph.")
 
169
 
170
- with gr.Tab("💬 Chat Assistant"):
171
- chat_interface = gr.ChatInterface(
172
- respond,
173
- additional_inputs=[
174
- gr.Textbox(
175
- value="You are a helpful AI assistant. Be friendly, informative, and concise in your responses.",
176
- label="System message"
177
- ),
178
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
179
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
180
- gr.Slider(
181
- minimum=0.1,
182
- maximum=1.0,
183
- value=0.95,
184
- step=0.05,
185
- label="Top-p (nucleus sampling)",
186
- ),
187
- ],
188
- title="Chat with AI Assistant",
189
- description="Ask questions or get help with any topic."
190
- )
191
 
192
- with gr.Tab("📁 Advanced File Analysis"):
193
- gr.Markdown("## 🚀 Guided File Analysis with LangGraph")
194
- gr.Markdown("""
195
- Upload files and ask specific questions for targeted AI analysis. Our guided approach:
196
-
197
- 1. 📋 **Examines** your file structure and patterns
198
- 2. 🎯 **Generates** specific code guidance based on your question
199
- 3. 🚀 **Executes** enhanced analysis with improved accuracy
200
- """)
201
-
202
- with gr.Row():
203
- with gr.Column(scale=1):
204
- # File Upload Section
205
- gr.Markdown("### 📤 File Upload")
206
- file_upload = gr.File(
207
- label="Upload File for Analysis",
208
- file_types=[".txt", ".log", ".csv", ".json", ".xml", ".py", ".js", ".html", ".md"],
209
- type="filepath"
210
- )
211
- upload_status = gr.Textbox(
212
- label="Upload Status",
213
- value="No file uploaded",
214
- interactive=False
215
- )
216
-
217
- # Question Section
218
- gr.Markdown("### ❓ Ask Your Question")
219
- question_suggestions = gr.Dropdown(
220
- label="Question Suggestions (select or type your own)",
221
- choices=[],
222
- allow_custom_value=True,
223
- value=""
224
- )
225
-
226
- user_question = gr.Textbox(
227
- label="Your Question about the File",
228
- placeholder="What would you like to know about this file?",
229
- lines=3
230
- )
231
-
232
- analyze_btn = gr.Button("🔍 Run Guided Analysis", variant="primary", size="lg")
233
-
234
- # Analysis Info
235
- gr.Markdown("### ℹ️ Analysis Method")
236
- gr.Markdown("""
237
- **Guided Analysis Features:**
238
- - 🎯 Question-aware code generation
239
- - 📋 File structure examination
240
- - 🚀 Dynamic prompt optimization
241
- - ✅ Higher accuracy than generic analysis
242
- """)
243
 
244
- with gr.Column(scale=2):
245
- analysis_output = gr.Textbox(
246
- label="📊 Guided Analysis Results",
247
- lines=25,
248
- max_lines=35,
249
- placeholder="Upload a file, ask a question, and click 'Run Guided Analysis' to see detailed results here...",
250
- interactive=False
251
- )
252
-
253
- # Event handlers
254
- file_upload.change(
255
- fn=handle_file_upload,
256
- inputs=[file_upload],
257
- outputs=[upload_status]
258
- ).then(
259
- fn=update_question_suggestions,
260
- inputs=[],
261
- outputs=[question_suggestions]
262
- )
263
-
264
- question_suggestions.change(
265
- fn=lambda x: x,
266
- inputs=[question_suggestions],
267
- outputs=[user_question]
268
- )
269
-
270
- analyze_btn.click(
271
- fn=analyze_file_with_question,
272
- inputs=[user_question],
273
- outputs=[analysis_output]
274
- )
275
-
276
- with gr.Tab("📊 Analysis Examples"):
277
- gr.Markdown("## 💡 Example Questions by File Type")
278
-
279
- with gr.Accordion("🔐 Security Analysis Questions", open=False):
280
- gr.Markdown("""
281
- **For Log Files:**
282
- - "Find any failed login attempts and suspicious IP addresses"
283
- - "Identify potential security threats or anomalies"
284
- - "Show me authentication errors and user access patterns"
285
- - "Are there any brute force attacks or repeated failures?"
286
 
287
- **For Access Logs:**
288
- - "Detect unusual access patterns or potential intrusions"
289
- - "Find requests with suspicious user agents or payloads"
290
- - "Identify high-frequency requests from single IPs"
291
- """)
292
-
293
- with gr.Accordion("⚡ Performance Analysis Questions", open=False):
294
- gr.Markdown("""
295
- **For Application Logs:**
296
- - "Which API endpoints are slowest and why?"
297
- - "Find performance bottlenecks and response time issues"
298
- - "Show me timeout errors and failed requests"
299
- - "What are the peak usage times and load patterns?"
300
 
301
- **For System Logs:**
302
- - "Identify resource usage spikes and memory issues"
303
- - "Find database query performance problems"
304
- - "Show me error rates and system health indicators"
305
- """)
306
-
307
- with gr.Accordion("📈 Data Analysis Questions", open=False):
308
  gr.Markdown("""
309
- **For CSV/Data Files:**
310
- - "Analyze data distribution and find statistical insights"
311
- - "Identify outliers and anomalies in the dataset"
312
- - "What correlations exist between different columns?"
313
- - "Generate a comprehensive data quality report"
314
-
315
- **For JSON Files:**
316
- - "Parse the structure and extract key information"
317
- - "Find patterns in nested data and relationships"
318
- - "Summarize the main data points and values"
319
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  if __name__ == "__main__":
 
322
  demo.launch()
 
4
  import tempfile
5
  from dotenv import find_dotenv, load_dotenv
6
  from langchain.chat_models import init_chat_model
7
+ # Simplified imports - focusing on file analysis
 
 
8
 
9
  # Import the CodeAct agent functionality
10
  from agent import FileInjectedPyodideSandbox, create_pyodide_eval_fn, create_codeact
 
15
  # Load environment variables
16
  load_dotenv(find_dotenv())
17
 
18
+ # Initialize model for file analysis
 
 
 
 
 
 
 
 
 
19
  codeact_model = init_chat_model("gpt-4.1-2025-04-14", model_provider="openai")
20
 
21
  # Store uploaded file path globally
22
  uploaded_file_path = None
23
 
24
+ # Chat functionality removed - focusing on file analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def handle_file_upload(file):
27
  """Handle file upload and store the path globally"""
28
  global uploaded_file_path
29
+ try:
30
+ if file is not None:
31
+ # With type="filepath", Gradio returns the file path as a string
32
+ uploaded_file_path = file
33
+ filename = os.path.basename(file)
34
+ return f"✅ File uploaded successfully: {filename}"
35
+ else:
36
+ uploaded_file_path = None
37
+ return "❌ No file uploaded"
38
+ except Exception as e:
39
  uploaded_file_path = None
40
+ return f"❌ Upload error: {str(e)}"
41
 
42
  def analyze_file_with_question(user_question):
43
  """
 
45
  """
46
  global uploaded_file_path
47
 
 
 
 
 
 
 
48
  try:
49
+ if not uploaded_file_path or not os.path.exists(uploaded_file_path):
50
+ return "❌ No file uploaded or file not found. Please upload a file first."
51
+
52
+ if not user_question or user_question.strip() == "":
53
+ user_question = "Provide a comprehensive analysis of this file including security, performance, and data insights."
54
+
55
  # Use the new guided analysis approach
56
  result = analyze_file_with_guidance_sync(uploaded_file_path, user_question)
57
  return result
 
59
  except Exception as e:
60
  return f"❌ Error in guided analysis: {str(e)}"
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  async def analyze_uploaded_file():
63
  """Legacy function - kept for backward compatibility"""
64
  return analyze_file_with_question("Provide a comprehensive analysis of this file.")
 
67
  """Wrapper to run async file analysis in sync context"""
68
  return asyncio.run(analyze_uploaded_file())
69
 
 
 
 
 
 
 
70
  # Create the Gradio interface
71
+ with gr.Blocks(title="DataForge - AI-Powered File Analysis") as demo:
72
+ gr.Markdown("# 🔍 DataForge - AI-Powered File Analysis")
73
+ gr.Markdown("""
74
+ Upload any file and ask specific questions for targeted AI analysis. Our guided approach:
75
 
76
+ 1. 📋 **Examines** your file structure and patterns automatically
77
+ 2. 🎯 **Generates** specific code guidance based on your question
78
+ 3. 🚀 **Executes** enhanced analysis with improved accuracy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ **Simply upload a file and ask any question you want!**
81
+ """)
82
+
83
+ with gr.Row():
84
+ with gr.Column(scale=1):
85
+ # File Upload Section
86
+ gr.Markdown("### 📤 File Upload")
87
+ file_upload = gr.File(
88
+ label="Upload File for Analysis",
89
+ type="filepath"
90
+ )
91
+ upload_status = gr.Textbox(
92
+ label="Upload Status",
93
+ value="No file uploaded",
94
+ interactive=False
95
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ # Question Section
98
+ gr.Markdown("### ❓ Ask Your Question")
99
+ user_question = gr.Textbox(
100
+ label="Your Question about the File",
101
+ placeholder="What would you like to know about this file? (e.g., 'Find security threats', 'Show performance issues', 'What errors are present?')",
102
+ lines=4,
103
+ value=""
104
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ analyze_btn = gr.Button("🔍 Run Guided Analysis", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # Analysis Info
109
+ gr.Markdown("### ℹ️ How It Works")
 
 
 
 
 
110
  gr.Markdown("""
111
+ **Guided Analysis Process:**
112
+ - 🎯 **Question-aware**: Code generation tailored to your specific question
113
+ - 📋 **Smart examination**: Automatically detects file structure and patterns
114
+ - 🚀 **Dynamic optimization**: Creates targeted analysis approach
115
+ - **Higher accuracy**: Prevents common code generation errors
116
+ - 🔧 **Quality control**: Built-in validation to avoid syntax issues
 
 
 
 
117
  """)
118
+
119
+ with gr.Column(scale=2):
120
+ analysis_output = gr.Textbox(
121
+ label="📊 Guided Analysis Results",
122
+ lines=25,
123
+ max_lines=35,
124
+ placeholder="Upload a file, type your question, and click 'Run Guided Analysis' to see detailed results here...",
125
+ interactive=False
126
+ )
127
+
128
+ # Event handlers
129
+ file_upload.change(
130
+ fn=handle_file_upload,
131
+ inputs=[file_upload],
132
+ outputs=[upload_status]
133
+ )
134
+
135
+ analyze_btn.click(
136
+ fn=analyze_file_with_question,
137
+ inputs=[user_question],
138
+ outputs=[analysis_output]
139
+ )
140
+
141
+ gr.Markdown("---")
142
+ gr.Markdown("## 💡 Example Questions by File Type")
143
+
144
+ with gr.Accordion("🔐 Security Analysis Questions", open=False):
145
+ gr.Markdown("""
146
+ **For Log Files:**
147
+ - "Find any failed login attempts and suspicious IP addresses"
148
+ - "Identify potential security threats or anomalies"
149
+ - "Show me authentication errors and user access patterns"
150
+ - "Are there any brute force attacks or repeated failures?"
151
+
152
+ **For Access Logs:**
153
+ - "Detect unusual access patterns or potential intrusions"
154
+ - "Find requests with suspicious user agents or payloads"
155
+ - "Identify high-frequency requests from single IPs"
156
+ """)
157
+
158
+ with gr.Accordion("⚡ Performance Analysis Questions", open=False):
159
+ gr.Markdown("""
160
+ **For Application Logs:**
161
+ - "Which API endpoints are slowest and why?"
162
+ - "Find performance bottlenecks and response time issues"
163
+ - "Show me timeout errors and failed requests"
164
+ - "What are the peak usage times and load patterns?"
165
+
166
+ **For System Logs:**
167
+ - "Identify resource usage spikes and memory issues"
168
+ - "Find database query performance problems"
169
+ - "Show me error rates and system health indicators"
170
+ """)
171
+
172
+ with gr.Accordion("📈 Data Analysis Questions", open=False):
173
+ gr.Markdown("""
174
+ **For CSV/Data Files:**
175
+ - "Analyze data distribution and find statistical insights"
176
+ - "Identify outliers and anomalies in the dataset"
177
+ - "What correlations exist between different columns?"
178
+ - "Generate a comprehensive data quality report"
179
+
180
+ **For JSON Files:**
181
+ - "Parse the structure and extract key information"
182
+ - "Find patterns in nested data and relationships"
183
+ - "Summarize the main data points and values"
184
+ """)
185
 
186
  if __name__ == "__main__":
187
+ print("Starting DataForge application...")
188
  demo.launch()
graph.py CHANGED
@@ -1,4 +1,5 @@
1
  import asyncio
 
2
  import os
3
  import re
4
  from typing import Annotated, Dict, List, Optional
@@ -58,6 +59,37 @@ class CodeAnalysisState(TypedDict):
58
  final_analysis: Optional[str]
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def examine_file_structure(state: CodeAnalysisState) -> CodeAnalysisState:
62
  """
63
  Node 1: Examine the file structure by reading the first several lines
@@ -244,8 +276,7 @@ def generate_code_guidance(state: CodeAnalysisState) -> CodeAnalysisState:
244
 
245
  def execute_guided_analysis(state: CodeAnalysisState) -> CodeAnalysisState:
246
  """
247
- Node 3: Execute the file analysis using the generated guidance.
248
- This replaces the original agent with guided code generation.
249
  """
250
  file_path = state["file_path"]
251
  file_examination = state["file_examination"]
@@ -259,9 +290,8 @@ def execute_guided_analysis(state: CodeAnalysisState) -> CodeAnalysisState:
259
  }
260
 
261
  try:
262
- # Create the guided analysis query
263
- guided_query = f"""
264
- Based on the file examination and guidance, analyze this file with the following SPECIFIC instructions:
265
 
266
  FILE CONTEXT:
267
  - File Type: {file_examination.file_type}
@@ -284,6 +314,41 @@ SAMPLE FILE STRUCTURE (first few lines):
284
 
285
  USER REQUEST: {analysis_query or "Comprehensive analysis following the guidance above"}
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  INSTRUCTIONS:
288
  1. Follow the specified analysis approach exactly
289
  2. Import only the recommended libraries: {', '.join(code_guidance.required_imports)}
@@ -291,11 +356,12 @@ INSTRUCTIONS:
291
  4. Structure your code following: {code_guidance.code_structure}
292
  5. Generate the expected outputs: {', '.join(code_guidance.expected_outputs)}
293
  6. Implement proper error handling: {code_guidance.error_handling}
 
294
 
295
  Since you have detailed guidance about this specific file structure, your code should be highly accurate and efficient.
296
  The file examination shows this is a {file_examination.file_type} with {file_examination.data_format} data format.
297
 
298
- Write Python code that leverages this specific knowledge for optimal analysis.
299
  """
300
 
301
  print(f"🚀 Executing guided analysis...")
@@ -326,8 +392,7 @@ Write Python code that leverages this specific knowledge for optimal analysis.
326
  execution_result = asyncio.run(run_guided_analysis())
327
 
328
  # Create final analysis summary
329
- final_analysis = f"""
330
- === GUIDED FILE ANALYSIS RESULTS ===
331
 
332
  File: {file_path}
333
  Type: {file_examination.file_type} ({file_examination.data_format})
 
1
  import asyncio
2
+ import ast
3
  import os
4
  import re
5
  from typing import Annotated, Dict, List, Optional
 
59
  final_analysis: Optional[str]
60
 
61
 
62
+ def validate_python_code(code: str) -> tuple[bool, str]:
63
+ """
64
+ Validate Python code for syntax errors and potential issues.
65
+ Returns (is_valid, error_message)
66
+ """
67
+ try:
68
+ # Try to parse the code as AST
69
+ ast.parse(code)
70
+
71
+ # Check for common problematic patterns
72
+ lines = code.split('\n')
73
+ for i, line in enumerate(lines, 1):
74
+ line_stripped = line.strip()
75
+
76
+ # Check for unterminated strings
77
+ if line_stripped.startswith('print(') and not line_stripped.endswith(')'):
78
+ if line_stripped.count('"') % 2 != 0 or line_stripped.count("'") % 2 != 0:
79
+ return False, f"Line {i}: Potentially unterminated string in print statement"
80
+
81
+ # Check for very long lines that might get truncated
82
+ if len(line) > 100:
83
+ return False, f"Line {i}: Line too long ({len(line)} chars) - may cause truncation"
84
+
85
+ return True, "Code validation passed"
86
+
87
+ except SyntaxError as e:
88
+ return False, f"Syntax error: {e.msg} at line {e.lineno}"
89
+ except Exception as e:
90
+ return False, f"Validation error: {str(e)}"
91
+
92
+
93
  def examine_file_structure(state: CodeAnalysisState) -> CodeAnalysisState:
94
  """
95
  Node 1: Examine the file structure by reading the first several lines
 
276
 
277
  def execute_guided_analysis(state: CodeAnalysisState) -> CodeAnalysisState:
278
  """
279
+ Node 3: Execute the file analysis using the generated guidance with code quality validation.
 
280
  """
281
  file_path = state["file_path"]
282
  file_examination = state["file_examination"]
 
290
  }
291
 
292
  try:
293
+ # Create the guided analysis query with strict code quality requirements
294
+ guided_query = f"""Based on the file examination and guidance, analyze this file with the following SPECIFIC instructions:
 
295
 
296
  FILE CONTEXT:
297
  - File Type: {file_examination.file_type}
 
314
 
315
  USER REQUEST: {analysis_query or "Comprehensive analysis following the guidance above"}
316
 
317
+ CRITICAL CODE QUALITY REQUIREMENTS:
318
+ 1. ALL print statements MUST be on single lines with properly closed quotes
319
+ 2. NO multi-line strings or f-strings that span multiple lines
320
+ 3. NO print statements longer than 80 characters - break into multiple prints instead
321
+ 4. ALL strings must be properly terminated with matching quotes
322
+ 5. Use short variable names and concise output formatting
323
+ 6. If you need to print long text, use multiple short print() calls
324
+ 7. Always close parentheses, brackets, and quotes on the same line they open
325
+ 8. Use simple string concatenation instead of complex f-strings for long output
326
+ 9. NEVER use triple quotes for multi-line strings in limited execution environments
327
+ 10. Test each print statement individually to ensure it executes without truncation
328
+
329
+ EXAMPLE OF SAFE CODING PRACTICES:
330
+ ```python
331
+ # GOOD - Short, single-line prints
332
+ print("=== Results ===")
333
+ print(f"Count: {{count}}")
334
+ print(f"User: {{user}}")
335
+
336
+ # BAD - Long print that could be truncated
337
+ print(f"This is a very long print statement that could get truncated...")
338
+
339
+ # GOOD - Break long output into multiple prints
340
+ print("Analysis complete:")
341
+ print(f"Found {{count}} items")
342
+ print(f"Top user: {{user}}")
343
+ ```
344
+
345
+ MANDATORY CODE GENERATION PROCESS:
346
+ 1. Generate your analysis code following the above requirements
347
+ 2. Before presenting the code, internally validate each line for potential issues
348
+ 3. Ensure ALL print statements are under 80 characters
349
+ 4. Verify all quotes and parentheses are properly closed
350
+ 5. If any line might cause issues, rewrite it using multiple shorter statements
351
+
352
  INSTRUCTIONS:
353
  1. Follow the specified analysis approach exactly
354
  2. Import only the recommended libraries: {', '.join(code_guidance.required_imports)}
 
356
  4. Structure your code following: {code_guidance.code_structure}
357
  5. Generate the expected outputs: {', '.join(code_guidance.expected_outputs)}
358
  6. Implement proper error handling: {code_guidance.error_handling}
359
+ 7. ENSURE ALL CODE FOLLOWS THE QUALITY REQUIREMENTS ABOVE
360
 
361
  Since you have detailed guidance about this specific file structure, your code should be highly accurate and efficient.
362
  The file examination shows this is a {file_examination.file_type} with {file_examination.data_format} data format.
363
 
364
+ Write Python code that leverages this specific knowledge for optimal analysis and follows strict code quality standards.
365
  """
366
 
367
  print(f"🚀 Executing guided analysis...")
 
392
  execution_result = asyncio.run(run_guided_analysis())
393
 
394
  # Create final analysis summary
395
+ final_analysis = f"""=== GUIDED FILE ANALYSIS RESULTS ===
 
396
 
397
  File: {file_path}
398
  Type: {file_examination.file_type} ({file_examination.data_format})