butoon
Browse files- analyzer.py +148 -0
- app.py +7 -2
- repo_explorer.py +1 -148
analyzer.py
CHANGED
@@ -2,6 +2,7 @@ import openai
|
|
2 |
import os
|
3 |
import json
|
4 |
import re
|
|
|
5 |
|
6 |
def analyze_code(code: str) -> str:
|
7 |
"""
|
@@ -206,3 +207,150 @@ def analyze_combined_file(output_file="combined_repo.txt", user_requirements: st
|
|
206 |
return debug_output
|
207 |
except Exception as e:
|
208 |
return f"Error analyzing combined file: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
import json
|
4 |
import re
|
5 |
+
from typing import Tuple
|
6 |
|
7 |
def analyze_code(code: str) -> str:
|
8 |
"""
|
|
|
207 |
return debug_output
|
208 |
except Exception as e:
|
209 |
return f"Error analyzing combined file: {e}"
|
210 |
+
|
211 |
+
def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
|
212 |
+
"""
|
213 |
+
Analyze a repository chunk to create conversational context for the chatbot.
|
214 |
+
This creates summaries focused on helping users understand the repository.
|
215 |
+
"""
|
216 |
+
try:
|
217 |
+
from openai import OpenAI
|
218 |
+
client = OpenAI(api_key=os.getenv("modal_api"))
|
219 |
+
client.base_url = os.getenv("base_url")
|
220 |
+
|
221 |
+
context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
|
222 |
+
|
223 |
+
Create a concise but informative summary that helps understand:
|
224 |
+
- What this code section does
|
225 |
+
- Key functions, classes, or components
|
226 |
+
- Important features or capabilities
|
227 |
+
- How it relates to the overall repository purpose
|
228 |
+
- Any notable patterns or technologies used
|
229 |
+
|
230 |
+
Focus on information that would be useful for answering user questions about the repository.
|
231 |
+
|
232 |
+
Repository chunk:
|
233 |
+
{chunk}
|
234 |
+
|
235 |
+
Provide a clear, conversational summary in 2-3 paragraphs:"""
|
236 |
+
|
237 |
+
response = client.chat.completions.create(
|
238 |
+
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
239 |
+
messages=[
|
240 |
+
{"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
|
241 |
+
{"role": "user", "content": context_prompt}
|
242 |
+
],
|
243 |
+
max_tokens=600, # Increased for more detailed analysis with larger chunks
|
244 |
+
temperature=0.3
|
245 |
+
)
|
246 |
+
|
247 |
+
return response.choices[0].message.content
|
248 |
+
|
249 |
+
except Exception as e:
|
250 |
+
logger.error(f"Error analyzing chunk for context: {e}")
|
251 |
+
return f"Code section analysis unavailable: {e}"
|
252 |
+
|
253 |
+
def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
|
254 |
+
"""
|
255 |
+
Create a comprehensive context summary by analyzing the repository in chunks.
|
256 |
+
Returns a detailed summary that the chatbot can use to answer questions.
|
257 |
+
"""
|
258 |
+
try:
|
259 |
+
lines = repo_content.split('\n')
|
260 |
+
chunk_size = 1200 # Increased for better context and fewer API calls
|
261 |
+
chunk_summaries = []
|
262 |
+
|
263 |
+
logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
|
264 |
+
|
265 |
+
for i in range(0, len(lines), chunk_size):
|
266 |
+
chunk = '\n'.join(lines[i:i+chunk_size])
|
267 |
+
if chunk.strip(): # Only analyze non-empty chunks
|
268 |
+
summary = analyze_repo_chunk_for_context(chunk, repo_id)
|
269 |
+
chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
|
270 |
+
|
271 |
+
# Create final comprehensive summary
|
272 |
+
try:
|
273 |
+
from openai import OpenAI
|
274 |
+
client = OpenAI(api_key=os.getenv("modal_api"))
|
275 |
+
client.base_url = os.getenv("base_url")
|
276 |
+
|
277 |
+
final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
|
278 |
+
|
279 |
+
Section Summaries:
|
280 |
+
{chr(10).join(chunk_summaries)}
|
281 |
+
|
282 |
+
Create a well-structured overview covering:
|
283 |
+
1. Repository Purpose & Main Functionality
|
284 |
+
2. Key Components & Architecture
|
285 |
+
3. Important Features & Capabilities
|
286 |
+
4. Technology Stack & Dependencies
|
287 |
+
5. Usage Patterns & Examples
|
288 |
+
|
289 |
+
Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
|
290 |
+
|
291 |
+
response = client.chat.completions.create(
|
292 |
+
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
293 |
+
messages=[
|
294 |
+
{"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
|
295 |
+
{"role": "user", "content": final_prompt}
|
296 |
+
],
|
297 |
+
max_tokens=1500, # Increased for more comprehensive summaries
|
298 |
+
temperature=0.3
|
299 |
+
)
|
300 |
+
|
301 |
+
final_summary = response.choices[0].message.content
|
302 |
+
|
303 |
+
# Combine everything for the chatbot context
|
304 |
+
full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
|
305 |
+
|
306 |
+
{final_summary}
|
307 |
+
|
308 |
+
=== DETAILED SECTION SUMMARIES ===
|
309 |
+
{chr(10).join(chunk_summaries)}"""
|
310 |
+
|
311 |
+
logger.info(f"Created comprehensive context summary for {repo_id}")
|
312 |
+
return full_context
|
313 |
+
|
314 |
+
except Exception as e:
|
315 |
+
logger.error(f"Error creating final summary: {e}")
|
316 |
+
# Fallback to just section summaries
|
317 |
+
return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
|
318 |
+
|
319 |
+
except Exception as e:
|
320 |
+
logger.error(f"Error creating repo context summary: {e}")
|
321 |
+
return f"Repository analysis unavailable: {e}"
|
322 |
+
|
323 |
+
def handle_load_repository(repo_id: str) -> Tuple[str, str]:
|
324 |
+
"""Load a specific repository and prepare it for exploration with chunk-based analysis."""
|
325 |
+
if not repo_id.strip():
|
326 |
+
return "Status: Please enter a repository ID.", ""
|
327 |
+
|
328 |
+
try:
|
329 |
+
logger.info(f"Loading repository for exploration: {repo_id}")
|
330 |
+
|
331 |
+
# Download and process the repository
|
332 |
+
try:
|
333 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
|
334 |
+
combined_text_path = combine_repo_files_for_llm()
|
335 |
+
|
336 |
+
except Exception as e:
|
337 |
+
logger.error(f"Error downloading repository {repo_id}: {e}")
|
338 |
+
error_status = f"❌ Error downloading repository: {e}"
|
339 |
+
return error_status, ""
|
340 |
+
|
341 |
+
with open(combined_text_path, "r", encoding="utf-8") as f:
|
342 |
+
repo_content = f.read()
|
343 |
+
|
344 |
+
status = f"✅ Repository '{repo_id}' loaded successfully!\\n📁 Files processed and ready for exploration.\\n🔄 Analyzing repository in chunks for comprehensive context...\\n💬 You can now ask questions about this repository."
|
345 |
+
|
346 |
+
# Create comprehensive context summary using chunk analysis
|
347 |
+
logger.info(f"Creating context summary for {repo_id}")
|
348 |
+
context_summary = create_repo_context_summary(repo_content, repo_id)
|
349 |
+
|
350 |
+
logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
|
351 |
+
return status, context_summary
|
352 |
+
|
353 |
+
except Exception as e:
|
354 |
+
logger.error(f"Error loading repository {repo_id}: {e}")
|
355 |
+
error_status = f"❌ Error loading repository: {e}"
|
356 |
+
return error_status, ""
|
app.py
CHANGED
@@ -8,10 +8,15 @@ import os
|
|
8 |
import time
|
9 |
|
10 |
# Import core logic from other modules, as in app_old.py
|
11 |
-
from analyzer import
|
|
|
|
|
|
|
|
|
|
|
12 |
from hf_utils import download_filtered_space_files, search_top_spaces
|
13 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
14 |
-
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
|
15 |
|
16 |
# --- Configuration ---
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
8 |
import time
|
9 |
|
10 |
# Import core logic from other modules, as in app_old.py
|
11 |
+
from analyzer import (
|
12 |
+
combine_repo_files_for_llm,
|
13 |
+
parse_llm_json_response,
|
14 |
+
analyze_combined_file,
|
15 |
+
handle_load_repository
|
16 |
+
)
|
17 |
from hf_utils import download_filtered_space_files, search_top_spaces
|
18 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
19 |
+
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
|
20 |
|
21 |
# --- Configuration ---
|
22 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
repo_explorer.py
CHANGED
@@ -2,124 +2,12 @@ import gradio as gr
|
|
2 |
import os
|
3 |
import logging
|
4 |
from typing import List, Dict, Tuple
|
5 |
-
from analyzer import combine_repo_files_for_llm
|
6 |
from hf_utils import download_filtered_space_files
|
7 |
|
8 |
# Setup logger
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
-
def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
|
12 |
-
"""
|
13 |
-
Analyze a repository chunk to create conversational context for the chatbot.
|
14 |
-
This creates summaries focused on helping users understand the repository.
|
15 |
-
"""
|
16 |
-
try:
|
17 |
-
from openai import OpenAI
|
18 |
-
client = OpenAI(api_key=os.getenv("modal_api"))
|
19 |
-
client.base_url = os.getenv("base_url")
|
20 |
-
|
21 |
-
context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
|
22 |
-
|
23 |
-
Create a concise but informative summary that helps understand:
|
24 |
-
- What this code section does
|
25 |
-
- Key functions, classes, or components
|
26 |
-
- Important features or capabilities
|
27 |
-
- How it relates to the overall repository purpose
|
28 |
-
- Any notable patterns or technologies used
|
29 |
-
|
30 |
-
Focus on information that would be useful for answering user questions about the repository.
|
31 |
-
|
32 |
-
Repository chunk:
|
33 |
-
{chunk}
|
34 |
-
|
35 |
-
Provide a clear, conversational summary in 2-3 paragraphs:"""
|
36 |
-
|
37 |
-
response = client.chat.completions.create(
|
38 |
-
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
39 |
-
messages=[
|
40 |
-
{"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
|
41 |
-
{"role": "user", "content": context_prompt}
|
42 |
-
],
|
43 |
-
max_tokens=600, # Increased for more detailed analysis with larger chunks
|
44 |
-
temperature=0.3
|
45 |
-
)
|
46 |
-
|
47 |
-
return response.choices[0].message.content
|
48 |
-
|
49 |
-
except Exception as e:
|
50 |
-
logger.error(f"Error analyzing chunk for context: {e}")
|
51 |
-
return f"Code section analysis unavailable: {e}"
|
52 |
-
|
53 |
-
def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
|
54 |
-
"""
|
55 |
-
Create a comprehensive context summary by analyzing the repository in chunks.
|
56 |
-
Returns a detailed summary that the chatbot can use to answer questions.
|
57 |
-
"""
|
58 |
-
try:
|
59 |
-
lines = repo_content.split('\n')
|
60 |
-
chunk_size = 1200 # Increased for better context and fewer API calls
|
61 |
-
chunk_summaries = []
|
62 |
-
|
63 |
-
logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
|
64 |
-
|
65 |
-
for i in range(0, len(lines), chunk_size):
|
66 |
-
chunk = '\n'.join(lines[i:i+chunk_size])
|
67 |
-
if chunk.strip(): # Only analyze non-empty chunks
|
68 |
-
summary = analyze_repo_chunk_for_context(chunk, repo_id)
|
69 |
-
chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
|
70 |
-
|
71 |
-
# Create final comprehensive summary
|
72 |
-
try:
|
73 |
-
from openai import OpenAI
|
74 |
-
client = OpenAI(api_key=os.getenv("modal_api"))
|
75 |
-
client.base_url = os.getenv("base_url")
|
76 |
-
|
77 |
-
final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
|
78 |
-
|
79 |
-
Section Summaries:
|
80 |
-
{chr(10).join(chunk_summaries)}
|
81 |
-
|
82 |
-
Create a well-structured overview covering:
|
83 |
-
1. Repository Purpose & Main Functionality
|
84 |
-
2. Key Components & Architecture
|
85 |
-
3. Important Features & Capabilities
|
86 |
-
4. Technology Stack & Dependencies
|
87 |
-
5. Usage Patterns & Examples
|
88 |
-
|
89 |
-
Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
|
90 |
-
|
91 |
-
response = client.chat.completions.create(
|
92 |
-
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
93 |
-
messages=[
|
94 |
-
{"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
|
95 |
-
{"role": "user", "content": final_prompt}
|
96 |
-
],
|
97 |
-
max_tokens=1500, # Increased for more comprehensive summaries
|
98 |
-
temperature=0.3
|
99 |
-
)
|
100 |
-
|
101 |
-
final_summary = response.choices[0].message.content
|
102 |
-
|
103 |
-
# Combine everything for the chatbot context
|
104 |
-
full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
|
105 |
-
|
106 |
-
{final_summary}
|
107 |
-
|
108 |
-
=== DETAILED SECTION SUMMARIES ===
|
109 |
-
{chr(10).join(chunk_summaries)}"""
|
110 |
-
|
111 |
-
logger.info(f"Created comprehensive context summary for {repo_id}")
|
112 |
-
return full_context
|
113 |
-
|
114 |
-
except Exception as e:
|
115 |
-
logger.error(f"Error creating final summary: {e}")
|
116 |
-
# Fallback to just section summaries
|
117 |
-
return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
|
118 |
-
|
119 |
-
except Exception as e:
|
120 |
-
logger.error(f"Error creating repo context summary: {e}")
|
121 |
-
return f"Repository analysis unavailable: {e}"
|
122 |
-
|
123 |
def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]:
|
124 |
"""
|
125 |
Creates the Repo Explorer tab content and returns the component references and state variables.
|
@@ -198,41 +86,6 @@ def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict
|
|
198 |
|
199 |
return components, states
|
200 |
|
201 |
-
def handle_load_repository(repo_id: str) -> Tuple[str, str]:
|
202 |
-
"""Load a specific repository and prepare it for exploration with chunk-based analysis."""
|
203 |
-
if not repo_id.strip():
|
204 |
-
return "Status: Please enter a repository ID.", ""
|
205 |
-
|
206 |
-
try:
|
207 |
-
logger.info(f"Loading repository for exploration: {repo_id}")
|
208 |
-
|
209 |
-
# Download and process the repository
|
210 |
-
try:
|
211 |
-
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
|
212 |
-
combined_text_path = combine_repo_files_for_llm()
|
213 |
-
|
214 |
-
except Exception as e:
|
215 |
-
logger.error(f"Error downloading repository {repo_id}: {e}")
|
216 |
-
error_status = f"❌ Error downloading repository: {e}"
|
217 |
-
return error_status, ""
|
218 |
-
|
219 |
-
with open(combined_text_path, "r", encoding="utf-8") as f:
|
220 |
-
repo_content = f.read()
|
221 |
-
|
222 |
-
status = f"✅ Repository '{repo_id}' loaded successfully!\n📁 Files processed and ready for exploration.\n🔄 Analyzing repository in chunks for comprehensive context...\n💬 You can now ask questions about this repository."
|
223 |
-
|
224 |
-
# Create comprehensive context summary using chunk analysis
|
225 |
-
logger.info(f"Creating context summary for {repo_id}")
|
226 |
-
context_summary = create_repo_context_summary(repo_content, repo_id)
|
227 |
-
|
228 |
-
logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
|
229 |
-
return status, context_summary
|
230 |
-
|
231 |
-
except Exception as e:
|
232 |
-
logger.error(f"Error loading repository {repo_id}: {e}")
|
233 |
-
error_status = f"❌ Error loading repository: {e}"
|
234 |
-
return error_status, ""
|
235 |
-
|
236 |
def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
|
237 |
"""Handle user messages in the repo-specific chatbot."""
|
238 |
if not repo_context_summary.strip():
|
|
|
2 |
import os
|
3 |
import logging
|
4 |
from typing import List, Dict, Tuple
|
5 |
+
from analyzer import combine_repo_files_for_llm, handle_load_repository
|
6 |
from hf_utils import download_filtered_space_files
|
7 |
|
8 |
# Setup logger
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]:
|
12 |
"""
|
13 |
Creates the Repo Explorer tab content and returns the component references and state variables.
|
|
|
86 |
|
87 |
return components, states
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
|
90 |
"""Handle user messages in the repo-specific chatbot."""
|
91 |
if not repo_context_summary.strip():
|