naman1102 commited on
Commit
fd7c5f8
Β·
1 Parent(s): 3330689
Files changed (3) hide show
  1. analyzer.py +2 -2
  2. app.py +9 -0
  3. repo_explorer.py +331 -0
analyzer.py CHANGED
@@ -140,7 +140,7 @@ def analyze_code_chunk(code: str, user_requirements: str = "") -> str:
140
  {"role": "system", "content": chunk_prompt},
141
  {"role": "user", "content": code}
142
  ],
143
- max_tokens=512,
144
  temperature=0.4
145
  )
146
  return response.choices[0].message.content
@@ -190,7 +190,7 @@ def analyze_combined_file(output_file="combined_repo.txt", user_requirements: st
190
  try:
191
  with open(output_file, "r", encoding="utf-8") as f:
192
  lines = f.readlines()
193
- chunk_size = 500
194
  chunk_jsons = []
195
  for i in range(0, len(lines), chunk_size):
196
  chunk = "".join(lines[i:i+chunk_size])
 
140
  {"role": "system", "content": chunk_prompt},
141
  {"role": "user", "content": code}
142
  ],
143
+
144
  temperature=0.4
145
  )
146
  return response.choices[0].message.content
 
190
  try:
191
  with open(output_file, "r", encoding="utf-8") as f:
192
  lines = f.readlines()
193
+ chunk_size = 1200
194
  chunk_jsons = []
195
  for i in range(0, len(lines), chunk_size):
196
  chunk = "".join(lines[i:i+chunk_size])
app.py CHANGED
@@ -10,6 +10,7 @@ import os
10
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
11
  from hf_utils import download_space_repo, search_top_spaces
12
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
 
13
 
14
  # --- Configuration ---
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -242,6 +243,8 @@ def create_ui() -> gr.Blocks:
242
  repo_ids_state = gr.State([])
243
  current_repo_idx_state = gr.State(0)
244
  user_requirements_state = gr.State("") # Store user requirements from chatbot
 
 
245
 
246
  gr.Markdown(
247
  """
@@ -365,6 +368,9 @@ def create_ui() -> gr.Blocks:
365
  interactive=False,
366
  info="Current conversation status"
367
  )
 
 
 
368
 
369
  # --- Footer ---
370
  gr.Markdown(
@@ -561,6 +567,9 @@ def create_ui() -> gr.Blocks:
561
  outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
562
  )
563
 
 
 
 
564
  return app
565
 
566
  if __name__ == "__main__":
 
10
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
11
  from hf_utils import download_space_repo, search_top_spaces
12
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
13
+ from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
14
 
15
  # --- Configuration ---
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
243
  repo_ids_state = gr.State([])
244
  current_repo_idx_state = gr.State(0)
245
  user_requirements_state = gr.State("") # Store user requirements from chatbot
246
+ loaded_repo_content_state = gr.State("") # Store loaded repository content
247
+ current_repo_id_state = gr.State("") # Store current repository ID
248
 
249
  gr.Markdown(
250
  """
 
368
  interactive=False,
369
  info="Current conversation status"
370
  )
371
+
372
+ # --- Repo Explorer Tab ---
373
+ repo_explorer_tab, repo_components, repo_states = create_repo_explorer_tab()
374
 
375
  # --- Footer ---
376
  gr.Markdown(
 
567
  outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
568
  )
569
 
570
+ # Repo Explorer Tab
571
+ setup_repo_explorer_events(repo_components, repo_states)
572
+
573
  return app
574
 
575
  if __name__ == "__main__":
repo_explorer.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import logging
4
+ from typing import List, Dict, Tuple
5
+ from analyzer import combine_repo_files_for_llm
6
+ from hf_utils import download_space_repo
7
+
8
+ # Setup logger
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
12
+ """
13
+ Analyze a repository chunk to create conversational context for the chatbot.
14
+ This creates summaries focused on helping users understand the repository.
15
+ """
16
+ try:
17
+ from openai import OpenAI
18
+ client = OpenAI(api_key=os.getenv("modal_api"))
19
+ client.base_url = os.getenv("base_url")
20
+
21
+ context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
22
+
23
+ Create a concise but informative summary that helps understand:
24
+ - What this code section does
25
+ - Key functions, classes, or components
26
+ - Important features or capabilities
27
+ - How it relates to the overall repository purpose
28
+ - Any notable patterns or technologies used
29
+
30
+ Focus on information that would be useful for answering user questions about the repository.
31
+
32
+ Repository chunk:
33
+ {chunk}
34
+
35
+ Provide a clear, conversational summary in 2-3 paragraphs:"""
36
+
37
+ response = client.chat.completions.create(
38
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
39
+ messages=[
40
+ {"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
41
+ {"role": "user", "content": context_prompt}
42
+ ],
43
+ max_tokens=600, # Increased for more detailed analysis with larger chunks
44
+ temperature=0.3
45
+ )
46
+
47
+ return response.choices[0].message.content
48
+
49
+ except Exception as e:
50
+ logger.error(f"Error analyzing chunk for context: {e}")
51
+ return f"Code section analysis unavailable: {e}"
52
+
53
+ def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
54
+ """
55
+ Create a comprehensive context summary by analyzing the repository in chunks.
56
+ Returns a detailed summary that the chatbot can use to answer questions.
57
+ """
58
+ try:
59
+ lines = repo_content.split('\n')
60
+ chunk_size = 1200 # Increased for better context and fewer API calls
61
+ chunk_summaries = []
62
+
63
+ logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
64
+
65
+ for i in range(0, len(lines), chunk_size):
66
+ chunk = '\n'.join(lines[i:i+chunk_size])
67
+ if chunk.strip(): # Only analyze non-empty chunks
68
+ summary = analyze_repo_chunk_for_context(chunk, repo_id)
69
+ chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
70
+
71
+ # Create final comprehensive summary
72
+ try:
73
+ from openai import OpenAI
74
+ client = OpenAI(api_key=os.getenv("modal_api"))
75
+ client.base_url = os.getenv("base_url")
76
+
77
+ final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
78
+
79
+ Section Summaries:
80
+ {chr(10).join(chunk_summaries)}
81
+
82
+ Create a well-structured overview covering:
83
+ 1. Repository Purpose & Main Functionality
84
+ 2. Key Components & Architecture
85
+ 3. Important Features & Capabilities
86
+ 4. Technology Stack & Dependencies
87
+ 5. Usage Patterns & Examples
88
+
89
+ Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
90
+
91
+ response = client.chat.completions.create(
92
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
93
+ messages=[
94
+ {"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
95
+ {"role": "user", "content": final_prompt}
96
+ ],
97
+ max_tokens=1500, # Increased for more comprehensive summaries
98
+ temperature=0.3
99
+ )
100
+
101
+ final_summary = response.choices[0].message.content
102
+
103
+ # Combine everything for the chatbot context
104
+ full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
105
+
106
+ {final_summary}
107
+
108
+ === DETAILED SECTION SUMMARIES ===
109
+ {chr(10).join(chunk_summaries)}"""
110
+
111
+ logger.info(f"Created comprehensive context summary for {repo_id}")
112
+ return full_context
113
+
114
+ except Exception as e:
115
+ logger.error(f"Error creating final summary: {e}")
116
+ # Fallback to just section summaries
117
+ return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
118
+
119
+ except Exception as e:
120
+ logger.error(f"Error creating repo context summary: {e}")
121
+ return f"Repository analysis unavailable: {e}"
122
+
123
+ def create_repo_explorer_tab() -> Tuple[gr.TabItem, Dict[str, gr.components.Component], Dict[str, gr.State]]:
124
+ """
125
+ Creates the Repo Explorer tab with all its components and returns the tab,
126
+ component references, and state variables.
127
+ """
128
+
129
+ # State variables for repo explorer
130
+ states = {
131
+ "repo_context_summary": gr.State(""),
132
+ "current_repo_id": gr.State("")
133
+ }
134
+
135
+ with gr.TabItem("πŸ” Repo Explorer", id="repo_explorer_tab") as tab:
136
+ gr.Markdown("### πŸ—‚οΈ Deep Dive into a Specific Repository")
137
+
138
+ with gr.Row():
139
+ with gr.Column(scale=2):
140
+ repo_explorer_input = gr.Textbox(
141
+ label="πŸ“ Repository ID",
142
+ placeholder="microsoft/DialoGPT-medium",
143
+ info="Enter a Hugging Face repository ID to explore"
144
+ )
145
+ with gr.Column(scale=1):
146
+ load_repo_btn = gr.Button("πŸš€ Load Repository", variant="primary", size="lg")
147
+
148
+ with gr.Row():
149
+ repo_status_display = gr.Textbox(
150
+ label="πŸ“Š Repository Status",
151
+ interactive=False,
152
+ lines=3,
153
+ info="Current repository loading status and basic info"
154
+ )
155
+
156
+ with gr.Row():
157
+ with gr.Column(scale=2):
158
+ repo_chatbot = gr.Chatbot(
159
+ label="πŸ€– Repository Assistant",
160
+ height=500,
161
+ type="messages",
162
+ avatar_images=(
163
+ "https://cdn-icons-png.flaticon.com/512/149/149071.png",
164
+ "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
165
+ ),
166
+ show_copy_button=True,
167
+ info="Ask questions about the loaded repository"
168
+ )
169
+
170
+ with gr.Row():
171
+ repo_msg_input = gr.Textbox(
172
+ label="πŸ’­ Ask about this repository",
173
+ placeholder="What does this repository do? How do I use it?",
174
+ lines=1,
175
+ scale=4,
176
+ info="Ask anything about the loaded repository"
177
+ )
178
+ repo_send_btn = gr.Button("πŸ“€ Send", variant="primary", scale=1)
179
+
180
+ with gr.Column(scale=1):
181
+ repo_content_display = gr.Textbox(
182
+ label="πŸ“„ Repository Content Preview",
183
+ lines=25,
184
+ interactive=False,
185
+ show_copy_button=True,
186
+ info="Preview of the repository files and content"
187
+ )
188
+
189
+ # Component references
190
+ components = {
191
+ "repo_explorer_input": repo_explorer_input,
192
+ "load_repo_btn": load_repo_btn,
193
+ "repo_status_display": repo_status_display,
194
+ "repo_chatbot": repo_chatbot,
195
+ "repo_msg_input": repo_msg_input,
196
+ "repo_send_btn": repo_send_btn,
197
+ "repo_content_display": repo_content_display
198
+ }
199
+
200
+ return tab, components, states
201
+
202
+ def handle_load_repository(repo_id: str) -> Tuple[str, str, str]:
203
+ """Load a specific repository and prepare it for exploration with chunk-based analysis."""
204
+ if not repo_id.strip():
205
+ return "", "Status: Please enter a repository ID.", ""
206
+
207
+ try:
208
+ logger.info(f"Loading repository for exploration: {repo_id}")
209
+
210
+ # Download and combine repository files
211
+ download_space_repo(repo_id, local_dir="repo_files")
212
+ txt_path = combine_repo_files_for_llm()
213
+
214
+ with open(txt_path, "r", encoding="utf-8") as f:
215
+ repo_content = f.read()
216
+
217
+ # Create a preview (first 2000 characters)
218
+ preview = repo_content[:2000] + "..." if len(repo_content) > 2000 else repo_content
219
+
220
+ status = f"βœ… Repository '{repo_id}' loaded successfully!\nπŸ“ Files processed and ready for exploration.\nπŸ”„ Analyzing repository in chunks for comprehensive context...\nπŸ’¬ You can now ask questions about this repository."
221
+
222
+ # Create comprehensive context summary using chunk analysis
223
+ logger.info(f"Creating context summary for {repo_id}")
224
+ context_summary = create_repo_context_summary(repo_content, repo_id)
225
+
226
+ logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
227
+ return status, preview, context_summary
228
+
229
+ except Exception as e:
230
+ logger.error(f"Error loading repository {repo_id}: {e}")
231
+ error_status = f"❌ Error loading repository: {e}"
232
+ return error_status, "", ""
233
+
234
+ def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
235
+ """Handle user messages in the repo-specific chatbot."""
236
+ if not repo_context_summary.strip():
237
+ return history, ""
238
+
239
+ # Initialize with repository-specific welcome message if empty
240
+ if not history:
241
+ welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?"
242
+ history = [{"role": "assistant", "content": welcome_msg}]
243
+
244
+ if user_message:
245
+ history.append({"role": "user", "content": user_message})
246
+ return history, ""
247
+
248
+ def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]:
249
+ """Generate bot response for repo-specific questions using comprehensive context."""
250
+ if not history or history[-1]["role"] != "user" or not repo_context_summary.strip():
251
+ return history
252
+
253
+ user_message = history[-1]["content"]
254
+
255
+ # Create a specialized prompt using the comprehensive context summary
256
+ repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'.
257
+ You have comprehensive knowledge about this repository based on detailed analysis of all its files and components.
258
+
259
+ Use the following comprehensive analysis to answer user questions accurately and helpfully:
260
+
261
+ {repo_context_summary}
262
+
263
+ Instructions:
264
+ - Answer questions clearly and conversationally about this specific repository
265
+ - Reference specific components, functions, or features when relevant
266
+ - Provide practical guidance on installation, usage, and implementation
267
+ - If asked about code details, refer to the analysis above
268
+ - Be helpful and informative while staying focused on this repository
269
+ - If something isn't covered in the analysis, acknowledge the limitation
270
+
271
+ Answer the user's question based on your comprehensive knowledge of this repository."""
272
+
273
+ try:
274
+ from openai import OpenAI
275
+ client = OpenAI(api_key=os.getenv("modal_api"))
276
+ client.base_url = os.getenv("base_url")
277
+
278
+ response = client.chat.completions.create(
279
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
280
+ messages=[
281
+ {"role": "system", "content": repo_system_prompt},
282
+ {"role": "user", "content": user_message}
283
+ ],
284
+ max_tokens=1024,
285
+ temperature=0.7
286
+ )
287
+
288
+ bot_response = response.choices[0].message.content
289
+ history.append({"role": "assistant", "content": bot_response})
290
+
291
+ except Exception as e:
292
+ logger.error(f"Error generating repo bot response: {e}")
293
+ error_response = f"I apologize, but I encountered an error while processing your question: {e}"
294
+ history.append({"role": "assistant", "content": error_response})
295
+
296
+ return history
297
+
298
+ def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]):
299
+ """Setup event handlers for the repo explorer components."""
300
+
301
+ # Load repository event
302
+ components["load_repo_btn"].click(
303
+ fn=handle_load_repository,
304
+ inputs=[components["repo_explorer_input"]],
305
+ outputs=[components["repo_status_display"], components["repo_content_display"], states["repo_context_summary"]]
306
+ ).then(
307
+ fn=lambda repo_id: repo_id,
308
+ inputs=[components["repo_explorer_input"]],
309
+ outputs=[states["current_repo_id"]]
310
+ )
311
+
312
+ # Chat message submission events
313
+ components["repo_msg_input"].submit(
314
+ fn=handle_repo_user_message,
315
+ inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
316
+ outputs=[components["repo_chatbot"], components["repo_msg_input"]]
317
+ ).then(
318
+ fn=handle_repo_bot_response,
319
+ inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
320
+ outputs=[components["repo_chatbot"]]
321
+ )
322
+
323
+ components["repo_send_btn"].click(
324
+ fn=handle_repo_user_message,
325
+ inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
326
+ outputs=[components["repo_chatbot"], components["repo_msg_input"]]
327
+ ).then(
328
+ fn=handle_repo_bot_response,
329
+ inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
330
+ outputs=[components["repo_chatbot"]]
331
+ )