naman1102 commited on
Commit
7209842
·
1 Parent(s): d85f92d
Files changed (3) hide show
  1. analyzer.py +148 -0
  2. app.py +7 -2
  3. repo_explorer.py +1 -148
analyzer.py CHANGED
@@ -2,6 +2,7 @@ import openai
2
  import os
3
  import json
4
  import re
 
5
 
6
  def analyze_code(code: str) -> str:
7
  """
@@ -206,3 +207,150 @@ def analyze_combined_file(output_file="combined_repo.txt", user_requirements: st
206
  return debug_output
207
  except Exception as e:
208
  return f"Error analyzing combined file: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import json
4
  import re
5
+ from typing import Tuple
6
 
7
  def analyze_code(code: str) -> str:
8
  """
 
207
  return debug_output
208
  except Exception as e:
209
  return f"Error analyzing combined file: {e}"
210
+
211
+ def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
212
+ """
213
+ Analyze a repository chunk to create conversational context for the chatbot.
214
+ This creates summaries focused on helping users understand the repository.
215
+ """
216
+ try:
217
+ from openai import OpenAI
218
+ client = OpenAI(api_key=os.getenv("modal_api"))
219
+ client.base_url = os.getenv("base_url")
220
+
221
+ context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
222
+
223
+ Create a concise but informative summary that helps understand:
224
+ - What this code section does
225
+ - Key functions, classes, or components
226
+ - Important features or capabilities
227
+ - How it relates to the overall repository purpose
228
+ - Any notable patterns or technologies used
229
+
230
+ Focus on information that would be useful for answering user questions about the repository.
231
+
232
+ Repository chunk:
233
+ {chunk}
234
+
235
+ Provide a clear, conversational summary in 2-3 paragraphs:"""
236
+
237
+ response = client.chat.completions.create(
238
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
239
+ messages=[
240
+ {"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
241
+ {"role": "user", "content": context_prompt}
242
+ ],
243
+ max_tokens=600, # Increased for more detailed analysis with larger chunks
244
+ temperature=0.3
245
+ )
246
+
247
+ return response.choices[0].message.content
248
+
249
+ except Exception as e:
250
+ logger.error(f"Error analyzing chunk for context: {e}")
251
+ return f"Code section analysis unavailable: {e}"
252
+
253
+ def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
254
+ """
255
+ Create a comprehensive context summary by analyzing the repository in chunks.
256
+ Returns a detailed summary that the chatbot can use to answer questions.
257
+ """
258
+ try:
259
+ lines = repo_content.split('\n')
260
+ chunk_size = 1200 # Increased for better context and fewer API calls
261
+ chunk_summaries = []
262
+
263
+ logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
264
+
265
+ for i in range(0, len(lines), chunk_size):
266
+ chunk = '\n'.join(lines[i:i+chunk_size])
267
+ if chunk.strip(): # Only analyze non-empty chunks
268
+ summary = analyze_repo_chunk_for_context(chunk, repo_id)
269
+ chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
270
+
271
+ # Create final comprehensive summary
272
+ try:
273
+ from openai import OpenAI
274
+ client = OpenAI(api_key=os.getenv("modal_api"))
275
+ client.base_url = os.getenv("base_url")
276
+
277
+ final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
278
+
279
+ Section Summaries:
280
+ {chr(10).join(chunk_summaries)}
281
+
282
+ Create a well-structured overview covering:
283
+ 1. Repository Purpose & Main Functionality
284
+ 2. Key Components & Architecture
285
+ 3. Important Features & Capabilities
286
+ 4. Technology Stack & Dependencies
287
+ 5. Usage Patterns & Examples
288
+
289
+ Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
290
+
291
+ response = client.chat.completions.create(
292
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
293
+ messages=[
294
+ {"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
295
+ {"role": "user", "content": final_prompt}
296
+ ],
297
+ max_tokens=1500, # Increased for more comprehensive summaries
298
+ temperature=0.3
299
+ )
300
+
301
+ final_summary = response.choices[0].message.content
302
+
303
+ # Combine everything for the chatbot context
304
+ full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
305
+
306
+ {final_summary}
307
+
308
+ === DETAILED SECTION SUMMARIES ===
309
+ {chr(10).join(chunk_summaries)}"""
310
+
311
+ logger.info(f"Created comprehensive context summary for {repo_id}")
312
+ return full_context
313
+
314
+ except Exception as e:
315
+ logger.error(f"Error creating final summary: {e}")
316
+ # Fallback to just section summaries
317
+ return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
318
+
319
+ except Exception as e:
320
+ logger.error(f"Error creating repo context summary: {e}")
321
+ return f"Repository analysis unavailable: {e}"
322
+
323
+ def handle_load_repository(repo_id: str) -> Tuple[str, str]:
324
+ """Load a specific repository and prepare it for exploration with chunk-based analysis."""
325
+ if not repo_id.strip():
326
+ return "Status: Please enter a repository ID.", ""
327
+
328
+ try:
329
+ logger.info(f"Loading repository for exploration: {repo_id}")
330
+
331
+ # Download and process the repository
332
+ try:
333
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
334
+ combined_text_path = combine_repo_files_for_llm()
335
+
336
+ except Exception as e:
337
+ logger.error(f"Error downloading repository {repo_id}: {e}")
338
+ error_status = f"❌ Error downloading repository: {e}"
339
+ return error_status, ""
340
+
341
+ with open(combined_text_path, "r", encoding="utf-8") as f:
342
+ repo_content = f.read()
343
+
344
+ status = f"✅ Repository '{repo_id}' loaded successfully!\\n📁 Files processed and ready for exploration.\\n🔄 Analyzing repository in chunks for comprehensive context...\\n💬 You can now ask questions about this repository."
345
+
346
+ # Create comprehensive context summary using chunk analysis
347
+ logger.info(f"Creating context summary for {repo_id}")
348
+ context_summary = create_repo_context_summary(repo_content, repo_id)
349
+
350
+ logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
351
+ return status, context_summary
352
+
353
+ except Exception as e:
354
+ logger.error(f"Error loading repository {repo_id}: {e}")
355
+ error_status = f"❌ Error loading repository: {e}"
356
+ return error_status, ""
app.py CHANGED
@@ -8,10 +8,15 @@ import os
8
  import time
9
 
10
  # Import core logic from other modules, as in app_old.py
11
- from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
 
 
 
 
 
12
  from hf_utils import download_filtered_space_files, search_top_spaces
13
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
14
- from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events, handle_load_repository
15
 
16
  # --- Configuration ---
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
8
  import time
9
 
10
  # Import core logic from other modules, as in app_old.py
11
+ from analyzer import (
12
+ combine_repo_files_for_llm,
13
+ parse_llm_json_response,
14
+ analyze_combined_file,
15
+ handle_load_repository
16
+ )
17
  from hf_utils import download_filtered_space_files, search_top_spaces
18
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
19
+ from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
20
 
21
  # --- Configuration ---
22
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
repo_explorer.py CHANGED
@@ -2,124 +2,12 @@ import gradio as gr
2
  import os
3
  import logging
4
  from typing import List, Dict, Tuple
5
- from analyzer import combine_repo_files_for_llm
6
  from hf_utils import download_filtered_space_files
7
 
8
  # Setup logger
9
  logger = logging.getLogger(__name__)
10
 
11
- def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
12
- """
13
- Analyze a repository chunk to create conversational context for the chatbot.
14
- This creates summaries focused on helping users understand the repository.
15
- """
16
- try:
17
- from openai import OpenAI
18
- client = OpenAI(api_key=os.getenv("modal_api"))
19
- client.base_url = os.getenv("base_url")
20
-
21
- context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
22
-
23
- Create a concise but informative summary that helps understand:
24
- - What this code section does
25
- - Key functions, classes, or components
26
- - Important features or capabilities
27
- - How it relates to the overall repository purpose
28
- - Any notable patterns or technologies used
29
-
30
- Focus on information that would be useful for answering user questions about the repository.
31
-
32
- Repository chunk:
33
- {chunk}
34
-
35
- Provide a clear, conversational summary in 2-3 paragraphs:"""
36
-
37
- response = client.chat.completions.create(
38
- model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
39
- messages=[
40
- {"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
41
- {"role": "user", "content": context_prompt}
42
- ],
43
- max_tokens=600, # Increased for more detailed analysis with larger chunks
44
- temperature=0.3
45
- )
46
-
47
- return response.choices[0].message.content
48
-
49
- except Exception as e:
50
- logger.error(f"Error analyzing chunk for context: {e}")
51
- return f"Code section analysis unavailable: {e}"
52
-
53
- def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
54
- """
55
- Create a comprehensive context summary by analyzing the repository in chunks.
56
- Returns a detailed summary that the chatbot can use to answer questions.
57
- """
58
- try:
59
- lines = repo_content.split('\n')
60
- chunk_size = 1200 # Increased for better context and fewer API calls
61
- chunk_summaries = []
62
-
63
- logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
64
-
65
- for i in range(0, len(lines), chunk_size):
66
- chunk = '\n'.join(lines[i:i+chunk_size])
67
- if chunk.strip(): # Only analyze non-empty chunks
68
- summary = analyze_repo_chunk_for_context(chunk, repo_id)
69
- chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
70
-
71
- # Create final comprehensive summary
72
- try:
73
- from openai import OpenAI
74
- client = OpenAI(api_key=os.getenv("modal_api"))
75
- client.base_url = os.getenv("base_url")
76
-
77
- final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
78
-
79
- Section Summaries:
80
- {chr(10).join(chunk_summaries)}
81
-
82
- Create a well-structured overview covering:
83
- 1. Repository Purpose & Main Functionality
84
- 2. Key Components & Architecture
85
- 3. Important Features & Capabilities
86
- 4. Technology Stack & Dependencies
87
- 5. Usage Patterns & Examples
88
-
89
- Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
90
-
91
- response = client.chat.completions.create(
92
- model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
93
- messages=[
94
- {"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
95
- {"role": "user", "content": final_prompt}
96
- ],
97
- max_tokens=1500, # Increased for more comprehensive summaries
98
- temperature=0.3
99
- )
100
-
101
- final_summary = response.choices[0].message.content
102
-
103
- # Combine everything for the chatbot context
104
- full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
105
-
106
- {final_summary}
107
-
108
- === DETAILED SECTION SUMMARIES ===
109
- {chr(10).join(chunk_summaries)}"""
110
-
111
- logger.info(f"Created comprehensive context summary for {repo_id}")
112
- return full_context
113
-
114
- except Exception as e:
115
- logger.error(f"Error creating final summary: {e}")
116
- # Fallback to just section summaries
117
- return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
118
-
119
- except Exception as e:
120
- logger.error(f"Error creating repo context summary: {e}")
121
- return f"Repository analysis unavailable: {e}"
122
-
123
  def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]:
124
  """
125
  Creates the Repo Explorer tab content and returns the component references and state variables.
@@ -198,41 +86,6 @@ def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict
198
 
199
  return components, states
200
 
201
- def handle_load_repository(repo_id: str) -> Tuple[str, str]:
202
- """Load a specific repository and prepare it for exploration with chunk-based analysis."""
203
- if not repo_id.strip():
204
- return "Status: Please enter a repository ID.", ""
205
-
206
- try:
207
- logger.info(f"Loading repository for exploration: {repo_id}")
208
-
209
- # Download and process the repository
210
- try:
211
- download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
212
- combined_text_path = combine_repo_files_for_llm()
213
-
214
- except Exception as e:
215
- logger.error(f"Error downloading repository {repo_id}: {e}")
216
- error_status = f"❌ Error downloading repository: {e}"
217
- return error_status, ""
218
-
219
- with open(combined_text_path, "r", encoding="utf-8") as f:
220
- repo_content = f.read()
221
-
222
- status = f"✅ Repository '{repo_id}' loaded successfully!\n📁 Files processed and ready for exploration.\n🔄 Analyzing repository in chunks for comprehensive context...\n💬 You can now ask questions about this repository."
223
-
224
- # Create comprehensive context summary using chunk analysis
225
- logger.info(f"Creating context summary for {repo_id}")
226
- context_summary = create_repo_context_summary(repo_content, repo_id)
227
-
228
- logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
229
- return status, context_summary
230
-
231
- except Exception as e:
232
- logger.error(f"Error loading repository {repo_id}: {e}")
233
- error_status = f"❌ Error loading repository: {e}"
234
- return error_status, ""
235
-
236
  def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
237
  """Handle user messages in the repo-specific chatbot."""
238
  if not repo_context_summary.strip():
 
2
  import os
3
  import logging
4
  from typing import List, Dict, Tuple
5
+ from analyzer import combine_repo_files_for_llm, handle_load_repository
6
  from hf_utils import download_filtered_space_files
7
 
8
  # Setup logger
9
  logger = logging.getLogger(__name__)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]:
12
  """
13
  Creates the Repo Explorer tab content and returns the component references and state variables.
 
86
 
87
  return components, states
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
90
  """Handle user messages in the repo-specific chatbot."""
91
  if not repo_context_summary.strip():