naman1102 commited on
Commit
274a509
·
1 Parent(s): f3ed537

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +211 -205
app.py CHANGED
@@ -2,20 +2,19 @@ import gradio as gr
2
  import regex as re
3
  import csv
4
  import pandas as pd
5
- from typing import List, Dict, Tuple, Optional
6
  import logging
7
- from datetime import datetime
8
  import os
9
- from huggingface_hub import HfApi, SpaceCard
 
10
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
11
  from hf_utils import download_space_repo, search_top_spaces
12
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
13
 
14
- # Configure logging
15
- logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
- # Constants
19
  CSV_FILE = "repo_ids.csv"
20
  CHATBOT_SYSTEM_PROMPT = (
21
  "You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
@@ -23,268 +22,275 @@ CHATBOT_SYSTEM_PROMPT = (
23
  "When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
24
  "Return only the keywords as a comma-separated list."
25
  )
 
26
 
27
- class AppState:
28
- """State management for the application."""
29
- def __init__(self):
30
- self.repo_ids: List[str] = []
31
- self.current_repo_idx: int = 0
32
- self.generated_keywords: List[str] = []
33
- self.chat_history: List[Dict[str, str]] = []
34
-
35
- def read_csv_as_text(filename: str) -> pd.DataFrame:
36
- """Read CSV file and return as DataFrame."""
37
- try:
38
- return pd.read_csv(filename, dtype=str)
39
- except Exception as e:
40
- logger.error(f"Error reading CSV: {e}")
41
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
42
 
43
  def write_repos_to_csv(repo_ids: List[str]) -> None:
44
- """Write repository IDs to CSV file."""
45
  try:
46
- with open(CSV_FILE, 'w', newline='', encoding="utf-8") as f:
47
- writer = csv.writer(f)
48
  writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
49
  for repo_id in repo_ids:
50
  writer.writerow([repo_id, "", "", "", ""])
 
51
  except Exception as e:
52
  logger.error(f"Error writing to CSV: {e}")
53
 
54
- def process_repo_input(text: str, state: AppState) -> pd.DataFrame:
55
- """Process repository IDs input."""
56
- if not text:
57
- state.repo_ids = []
58
- state.current_repo_idx = 0
59
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
60
-
61
- repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
62
- state.repo_ids = repo_ids
63
- state.current_repo_idx = 0
64
-
65
- write_repos_to_csv(repo_ids)
66
- return read_csv_as_text(CSV_FILE)
67
-
68
- def keyword_search_and_update(keyword: str, state: AppState) -> pd.DataFrame:
69
- """Search for repositories by keywords."""
70
- if not keyword:
71
  return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
72
-
73
- keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
74
- repo_ids = []
75
-
76
- for kw in keyword_list:
77
- repo_ids.extend(search_top_spaces(kw, limit=5))
78
-
79
- # Remove duplicates while preserving order
80
- seen = set()
81
- unique_repo_ids = []
82
- for rid in repo_ids:
83
- if rid not in seen:
84
- unique_repo_ids.append(rid)
85
- seen.add(rid)
86
-
87
- state.repo_ids = unique_repo_ids
88
- state.current_repo_idx = 0
89
-
90
- write_repos_to_csv(unique_repo_ids)
91
- return read_csv_as_text(CSV_FILE)
92
 
93
- def analyze_single_repo(repo_id: str) -> Tuple[str, str, Dict]:
94
- """Analyze a single repository."""
 
 
 
95
  try:
 
96
  download_space_repo(repo_id, local_dir="repo_files")
97
  txt_path = combine_repo_files_for_llm()
98
 
99
  with open(txt_path, "r", encoding="utf-8") as f:
100
  combined_content = f.read()
101
-
102
  llm_output = analyze_combined_file(txt_path)
 
103
  last_start = llm_output.rfind('{')
104
  last_end = llm_output.rfind('}')
 
105
 
106
- final_json_str = llm_output[last_start:last_end+1] if last_start != -1 and last_end != -1 and last_end > last_start else llm_output
107
  llm_json = parse_llm_json_response(final_json_str)
108
 
 
109
  if isinstance(llm_json, dict) and "error" not in llm_json:
110
- strengths = llm_json.get("strength", "")
111
- weaknesses = llm_json.get("weaknesses", "")
112
  summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
113
  else:
114
- summary = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
115
-
116
- return combined_content, summary, llm_json
117
-
118
- except Exception as e:
119
- logger.error(f"Error analyzing repo {repo_id}: {e}")
120
- return f"Error analyzing {repo_id}", f"Error: {str(e)}", {"error": str(e)}
121
 
122
- def update_csv_with_analysis(repo_id: str, analysis_results: Dict) -> pd.DataFrame:
123
- """Update CSV file with analysis results."""
124
- try:
125
- df = read_csv_as_text(CSV_FILE)
126
- updated = False
127
-
128
  for idx, row in df.iterrows():
129
  if row["repo id"] == repo_id:
130
- if isinstance(analysis_results, dict) and "error" not in analysis_results:
131
- df.at[idx, "strength"] = analysis_results.get("strength", "")
132
- df.at[idx, "weaknesses"] = analysis_results.get("weaknesses", "")
133
- df.at[idx, "speciality"] = analysis_results.get("speciality", "")
134
- df.at[idx, "relevance rating"] = analysis_results.get("relevance rating", "")
135
- updated = True
136
  break
137
 
138
- if not updated and isinstance(analysis_results, dict) and "error" not in analysis_results:
139
- new_row = {
140
- "repo id": repo_id,
141
- "strength": analysis_results.get("strength", ""),
142
- "weaknesses": analysis_results.get("weaknesses", ""),
143
- "speciality": analysis_results.get("speciality", ""),
144
- "relevance rating": analysis_results.get("relevance rating", "")
145
- }
146
- df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
147
-
148
  df.to_csv(CSV_FILE, index=False)
149
- return df
150
-
 
151
  except Exception as e:
152
- logger.error(f"Error updating CSV: {e}")
153
- return read_csv_as_text(CSV_FILE)
 
154
 
155
- def show_combined_repo_and_llm(state: AppState) -> Tuple[str, str, pd.DataFrame]:
156
- """Show combined repo content and LLM analysis."""
157
- if not state.repo_ids:
158
- return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
159
-
160
- if state.current_repo_idx >= len(state.repo_ids):
161
- return "All repo IDs have been processed.", "", read_csv_as_text(CSV_FILE)
162
-
163
- repo_id = state.repo_ids[state.current_repo_idx]
164
- combined_content, summary, analysis_results = analyze_single_repo(repo_id)
165
- df = update_csv_with_analysis(repo_id, analysis_results)
166
-
167
- state.current_repo_idx += 1
168
- return combined_content, summary, df
169
 
170
  def create_ui() -> gr.Blocks:
171
- """Create the Gradio interface."""
172
- state = gr.State(AppState())
173
-
174
- with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
175
- gr.Markdown("# Hugging Face Repository Analyzer")
176
 
177
- with gr.Row():
178
- with gr.Column():
179
- # Input Section
180
- gr.Markdown("### Enter Repository IDs")
181
- repo_id_input = gr.Textbox(
182
- label="Enter repo IDs (comma or newline separated)",
183
- lines=5,
184
- placeholder="repo1, repo2\nrepo3"
185
- )
186
- submit_btn = gr.Button("Submit Repository IDs", variant="primary")
187
-
188
- gr.Markdown("### Or Search by Keywords")
189
- keyword_input = gr.Textbox(
190
- label="Enter keywords to search",
191
- lines=3,
192
- placeholder="Enter keywords separated by commas"
193
- )
194
- search_btn = gr.Button("Search by Keywords", variant="primary")
195
-
196
- status = gr.Textbox(label="Status", visible=True)
197
-
198
- # Results Section
199
- df_output = gr.Dataframe(
200
- headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
201
- datatype=["str", "str", "str", "str", "str"]
202
- )
 
203
 
204
- # Analysis Section
205
- content_output = gr.Textbox(label="Repository Content", lines=10)
206
- summary_output = gr.Textbox(label="Analysis Summary", lines=5)
 
 
 
 
207
 
208
  with gr.Row():
209
- analyze_btn = gr.Button("Analyze Next Repository", variant="primary")
210
- finish_btn = gr.Button("Finish Analysis", variant="secondary")
211
-
212
- # Chat Section
 
 
 
 
 
213
  chatbot = gr.Chatbot(
 
214
  label="Chat with Assistant",
215
  height=400,
216
  type="messages"
217
  )
218
- msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
219
  with gr.Row():
220
  send_btn = gr.Button("Send", variant="primary")
221
- end_chat_btn = gr.Button("End Chat", variant="secondary")
222
-
223
- def process_repo_input_with_status(text: str, state: AppState) -> Tuple[pd.DataFrame, str]:
224
- """Process repo input with status update."""
225
- df = process_repo_input(text, state)
226
- return df, f"Found {len(state.repo_ids)} repositories"
227
-
228
- def keyword_search_with_status(keyword: str, state: AppState) -> Tuple[pd.DataFrame, str]:
229
- """Search keywords with status update."""
230
- df = keyword_search_and_update(keyword, state)
231
- return df, f"Found {len(state.repo_ids)} repositories"
232
-
233
- def analyze_with_status(state: AppState) -> Tuple[str, str, pd.DataFrame, str]:
234
- """Analyze with status update."""
235
- content, summary, df = show_combined_repo_and_llm(state)
236
- return content, summary, df, f"Analyzing repository {state.current_repo_idx} of {len(state.repo_ids)}"
237
 
238
- def send_message_with_status(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
239
- """Send message with status update."""
240
- if not message:
241
- return history, ""
242
- history.append({"role": "user", "content": message})
243
- response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
244
- history.append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  return history, ""
246
-
247
- def end_chat_with_status(history: List[Dict[str, str]], state: AppState) -> Tuple[List[str], str]:
248
- """End chat and extract keywords."""
 
 
 
 
 
 
 
249
  if not history:
250
- return [], "No chat history to analyze"
251
- keywords = extract_keywords_from_conversation(history)
252
- state.generated_keywords = keywords
253
- return keywords, "Keywords extracted from conversation"
 
 
254
 
255
- # Event handlers
256
- submit_btn.click(
257
- fn=process_repo_input_with_status,
258
- inputs=[repo_id_input, state],
259
- outputs=[df_output, status]
260
  )
261
-
262
  search_btn.click(
263
- fn=keyword_search_with_status,
264
- inputs=[keyword_input, state],
265
- outputs=[df_output, status]
266
  )
267
 
268
- analyze_btn.click(
269
- fn=analyze_with_status,
270
- inputs=[state],
271
- outputs=[content_output, summary_output, df_output, status]
 
272
  )
273
 
 
 
 
 
 
 
 
 
 
 
274
  send_btn.click(
275
- fn=send_message_with_status,
276
- inputs=[msg, chatbot, state],
277
- outputs=[chatbot, msg]
 
 
 
 
278
  )
279
-
280
  end_chat_btn.click(
281
- fn=end_chat_with_status,
282
- inputs=[chatbot, state],
283
- outputs=[gr.Textbox(label="Extracted Keywords"), status]
 
 
 
 
 
284
  )
285
-
286
  return app
287
 
288
  if __name__ == "__main__":
289
  app = create_ui()
290
- app.launch()
 
2
  import regex as re
3
  import csv
4
  import pandas as pd
5
+ from typing import List, Dict, Tuple, Any
6
  import logging
 
7
  import os
8
+
9
+ # Import core logic from other modules, as in app_old.py
10
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
11
  from hf_utils import download_space_repo, search_top_spaces
12
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
13
 
14
+ # --- Configuration ---
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
  logger = logging.getLogger(__name__)
17
 
 
18
  CSV_FILE = "repo_ids.csv"
19
  CHATBOT_SYSTEM_PROMPT = (
20
  "You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
 
22
  "When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
23
  "Return only the keywords as a comma-separated list."
24
  )
25
+ CHATBOT_INITIAL_MESSAGE = "Hello! Please tell me about your ideal Hugging Face repo. What use case, preferred language, or features are you looking for?"
26
 
27
+ # --- Helper Functions (Logic) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def write_repos_to_csv(repo_ids: List[str]) -> None:
30
+ """Writes a list of repo IDs to the CSV file, overwriting the previous content."""
31
  try:
32
+ with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
33
+ writer = csv.writer(csvfile)
34
  writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
35
  for repo_id in repo_ids:
36
  writer.writerow([repo_id, "", "", "", ""])
37
+ logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
38
  except Exception as e:
39
  logger.error(f"Error writing to CSV: {e}")
40
 
41
+ def read_csv_to_dataframe() -> pd.DataFrame:
42
+ """Reads the CSV file into a pandas DataFrame."""
43
+ try:
44
+ return pd.read_csv(CSV_FILE, dtype=str).fillna('')
45
+ except FileNotFoundError:
 
 
 
 
 
 
 
 
 
 
 
 
46
  return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
47
+ except Exception as e:
48
+ logger.error(f"Error reading CSV: {e}")
49
+ return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ def analyze_and_update_single_repo(repo_id: str) -> Tuple[str, str, pd.DataFrame]:
52
+ """
53
+ Downloads, analyzes a single repo, updates the CSV, and returns results.
54
+ This function combines the logic of downloading, analyzing, and updating the CSV for one repo.
55
+ """
56
  try:
57
+ logger.info(f"Starting analysis for repo: {repo_id}")
58
  download_space_repo(repo_id, local_dir="repo_files")
59
  txt_path = combine_repo_files_for_llm()
60
 
61
  with open(txt_path, "r", encoding="utf-8") as f:
62
  combined_content = f.read()
63
+
64
  llm_output = analyze_combined_file(txt_path)
65
+
66
  last_start = llm_output.rfind('{')
67
  last_end = llm_output.rfind('}')
68
+ final_json_str = llm_output[last_start:last_end+1] if last_start != -1 and last_end != -1 else "{}"
69
 
 
70
  llm_json = parse_llm_json_response(final_json_str)
71
 
72
+ summary = ""
73
  if isinstance(llm_json, dict) and "error" not in llm_json:
74
+ strengths = llm_json.get("strength", "N/A")
75
+ weaknesses = llm_json.get("weaknesses", "N/A")
76
  summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
77
  else:
78
+ summary = f"JSON extraction: FAILED\nRaw response might not be valid JSON."
 
 
 
 
 
 
79
 
80
+ # Update CSV
81
+ df = read_csv_to_dataframe()
82
+ repo_found_in_df = False
 
 
 
83
  for idx, row in df.iterrows():
84
  if row["repo id"] == repo_id:
85
+ if isinstance(llm_json, dict):
86
+ df.at[idx, "strength"] = llm_json.get("strength", "")
87
+ df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
88
+ df.at[idx, "speciality"] = llm_json.get("speciality", "")
89
+ df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
90
+ repo_found_in_df = True
91
  break
92
 
93
+ if not repo_found_in_df:
94
+ logger.warning(f"Repo ID {repo_id} not found in CSV for updating.")
95
+
 
 
 
 
 
 
 
96
  df.to_csv(CSV_FILE, index=False)
97
+ logger.info(f"Successfully analyzed and updated CSV for {repo_id}")
98
+ return combined_content, summary, df
99
+
100
  except Exception as e:
101
+ logger.error(f"An error occurred during analysis of {repo_id}: {e}")
102
+ error_summary = f"Error analyzing repo: {e}"
103
+ return "", error_summary, read_csv_to_dataframe()
104
 
105
+ # --- Gradio UI ---
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  def create_ui() -> gr.Blocks:
108
+ """Creates and configures the entire Gradio interface."""
109
+
110
+ with gr.Blocks(theme=gr.themes.Soft(), title="Hugging Face Repo Analyzer") as app:
 
 
111
 
112
+ # --- State Management ---
113
+ # Using simple, separate state objects for robustness.
114
+ repo_ids_state = gr.State([])
115
+ current_repo_idx_state = gr.State(0)
116
+
117
+ gr.Markdown("# Hugging Face Repository Analyzer")
118
+
119
+ with gr.Tabs() as tabs:
120
+ # --- Input Tab ---
121
+ with gr.TabItem("1. Input Repositories", id="input_tab"):
122
+ with gr.Row():
123
+ with gr.Column():
124
+ gr.Markdown("## Enter Repository IDs")
125
+ repo_id_input = gr.Textbox(
126
+ label="Enter repo IDs (comma or newline separated)",
127
+ lines=8,
128
+ placeholder="org/repo1, org/repo2"
129
+ )
130
+ submit_repo_btn = gr.Button("Submit Repository IDs", variant="primary")
131
+ with gr.Column():
132
+ gr.Markdown("## Or Search by Keywords")
133
+ keyword_input = gr.Textbox(
134
+ label="Enter keywords to search",
135
+ lines=8,
136
+ placeholder="e.g., text generation, image classification"
137
+ )
138
+ search_btn = gr.Button("Search by Keywords", variant="primary")
139
 
140
+ status_box_input = gr.Textbox(label="Status", interactive=False)
141
+
142
+ # --- Analysis Tab ---
143
+ with gr.TabItem("2. Analyze Repositories", id="analysis_tab"):
144
+ gr.Markdown("## Repository Analysis")
145
+ analyze_next_btn = gr.Button("Analyze Next Repository", variant="primary")
146
+ status_box_analysis = gr.Textbox(label="Status", interactive=False)
147
 
148
  with gr.Row():
149
+ content_output = gr.Textbox(label="Repository Content", lines=20)
150
+ summary_output = gr.Textbox(label="Analysis Summary", lines=20)
151
+
152
+ gr.Markdown("### Analysis Results Table")
153
+ df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
154
+
155
+ # --- Chatbot Tab ---
156
+ with gr.TabItem("3. Find Repos with AI", id="chatbot_tab"):
157
+ gr.Markdown("## Chat with an Assistant to Find Repositories")
158
  chatbot = gr.Chatbot(
159
+ value=[(None, CHATBOT_INITIAL_MESSAGE)],
160
  label="Chat with Assistant",
161
  height=400,
162
  type="messages"
163
  )
164
+ msg_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2)
165
  with gr.Row():
166
  send_btn = gr.Button("Send", variant="primary")
167
+ end_chat_btn = gr.Button("End Chat & Get Keywords")
168
+
169
+ gr.Markdown("### Extracted Keywords")
170
+ extracted_keywords_output = gr.Textbox(label="Keywords", interactive=False)
171
+ use_keywords_btn = gr.Button("Use These Keywords to Search", variant="primary")
172
+ status_box_chatbot = gr.Textbox(label="Status", interactive=False)
 
 
 
 
 
 
 
 
 
 
173
 
174
+ # --- Event Handler Functions ---
175
+
176
+ def handle_repo_id_submission(text: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
177
+ """Processes submitted repo IDs, updates state, and prepares for analysis."""
178
+ if not text:
179
+ return [], 0, pd.DataFrame(), "Status: Please enter repository IDs.", gr.update(selected="input_tab")
180
+
181
+ repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]))
182
+ write_repos_to_csv(repo_ids)
183
+ df = read_csv_to_dataframe()
184
+ status = f"Status: {len(repo_ids)} repositories submitted. Ready for analysis."
185
+ return repo_ids, 0, df, status, gr.update(selected="analysis_tab")
186
+
187
+ def handle_keyword_search(keywords: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
188
+ """Processes submitted keywords, finds repos, updates state, and prepares for analysis."""
189
+ if not keywords:
190
+ return [], 0, pd.DataFrame(), "Status: Please enter keywords.", gr.update(selected="input_tab")
191
+
192
+ keyword_list = [k.strip() for k in re.split(r'[\n,]+', keywords) if k.strip()]
193
+ repo_ids = []
194
+ for kw in keyword_list:
195
+ repo_ids.extend(search_top_spaces(kw, limit=5))
196
+
197
+ unique_repo_ids = list(dict.fromkeys(repo_ids))
198
+ write_repos_to_csv(unique_repo_ids)
199
+ df = read_csv_to_dataframe()
200
+ status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
201
+ return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
202
+
203
+ def handle_analyze_next(repo_ids: List[str], current_idx: int) -> Tuple[str, str, pd.DataFrame, int, str]:
204
+ """Analyzes the next repository in the list."""
205
+ if not repo_ids:
206
+ return "", "", pd.DataFrame(), 0, "Status: No repositories to analyze. Please submit repo IDs first."
207
+ if current_idx >= len(repo_ids):
208
+ return "", "", read_csv_to_dataframe(), current_idx, "Status: All repositories have been analyzed."
209
+
210
+ repo_id_to_analyze = repo_ids[current_idx]
211
+ status = f"Status: Analyzing repository {current_idx + 1}/{len(repo_ids)}: {repo_id_to_analyze}"
212
+
213
+ content, summary, df = analyze_and_update_single_repo(repo_id_to_analyze)
214
+
215
+ next_idx = current_idx + 1
216
+ if next_idx >= len(repo_ids):
217
+ status += "\n\nFinished all analyses."
218
+
219
+ return content, summary, df, next_idx, status
220
+
221
+ def handle_user_message(user_message: str, history: List[List[str]]) -> Tuple[List[List[str]], str]:
222
+ """Handles sending a user message to the chatbot."""
223
+ history.append([user_message, None])
224
  return history, ""
225
+
226
+ def handle_bot_response(history: List[List[str]]) -> List[List[str]]:
227
+ """Generates and displays the bot's response."""
228
+ user_message = history[-1][0]
229
+ response = chat_with_user(user_message, history[:-1], CHATBOT_SYSTEM_PROMPT)
230
+ history[-1][1] = response
231
+ return history
232
+
233
+ def handle_end_chat(history: List[List[str]]) -> Tuple[str, str]:
234
+ """Ends the chat and extracts keywords from the conversation."""
235
  if not history:
236
+ return "", "Status: Chat is empty, nothing to analyze."
237
+ keywords_str = extract_keywords_from_conversation(history)
238
+ status = "Status: Keywords extracted. You can now use them to search."
239
+ return keywords_str, status
240
+
241
+ # --- Component Event Wiring ---
242
 
243
+ # Input Tab
244
+ submit_repo_btn.click(
245
+ fn=handle_repo_id_submission,
246
+ inputs=[repo_id_input],
247
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
248
  )
 
249
  search_btn.click(
250
+ fn=handle_keyword_search,
251
+ inputs=[keyword_input],
252
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
253
  )
254
 
255
+ # Analysis Tab
256
+ analyze_next_btn.click(
257
+ fn=handle_analyze_next,
258
+ inputs=[repo_ids_state, current_repo_idx_state],
259
+ outputs=[content_output, summary_output, df_output, current_repo_idx_state, status_box_analysis]
260
  )
261
 
262
+ # Chatbot Tab
263
+ msg_input.submit(
264
+ fn=handle_user_message,
265
+ inputs=[msg_input, chatbot],
266
+ outputs=[chatbot, msg_input]
267
+ ).then(
268
+ fn=handle_bot_response,
269
+ inputs=[chatbot],
270
+ outputs=[chatbot]
271
+ )
272
  send_btn.click(
273
+ fn=handle_user_message,
274
+ inputs=[msg_input, chatbot],
275
+ outputs=[chatbot, msg_input]
276
+ ).then(
277
+ fn=handle_bot_response,
278
+ inputs=[chatbot],
279
+ outputs=[chatbot]
280
  )
 
281
  end_chat_btn.click(
282
+ fn=handle_end_chat,
283
+ inputs=[chatbot],
284
+ outputs=[extracted_keywords_output, status_box_chatbot]
285
+ )
286
+ use_keywords_btn.click(
287
+ fn=handle_keyword_search,
288
+ inputs=[extracted_keywords_output],
289
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
290
  )
291
+
292
  return app
293
 
294
  if __name__ == "__main__":
295
  app = create_ui()
296
+ app.launch(debug=True)