csv
Browse files- app.py +10 -1
- app_old.py +0 -481
- hf_utils.py +4 -0
app.py
CHANGED
@@ -977,6 +977,15 @@ def create_ui() -> gr.Blocks:
|
|
977 |
# Get final updated dataframe
|
978 |
updated_df = read_csv_to_dataframe()
|
979 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
980 |
# Get top 3 most relevant repositories using full data
|
981 |
top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
|
982 |
|
@@ -993,7 +1002,7 @@ def create_ui() -> gr.Blocks:
|
|
993 |
show_top_section = gr.update(visible=not top_repos.empty)
|
994 |
|
995 |
logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
|
996 |
-
return format_dataframe_for_display(
|
997 |
|
998 |
except Exception as e:
|
999 |
logger.error(f"Error in batch analysis: {e}")
|
|
|
977 |
# Get final updated dataframe
|
978 |
updated_df = read_csv_to_dataframe()
|
979 |
|
980 |
+
# Filter out rows with no analysis data for consistent display with top 3
|
981 |
+
analyzed_df = updated_df.copy()
|
982 |
+
analyzed_df = analyzed_df[
|
983 |
+
(analyzed_df['strength'].str.strip() != '') |
|
984 |
+
(analyzed_df['weaknesses'].str.strip() != '') |
|
985 |
+
(analyzed_df['speciality'].str.strip() != '') |
|
986 |
+
(analyzed_df['relevance rating'].str.strip() != '')
|
987 |
+
]
|
988 |
+
|
989 |
# Get top 3 most relevant repositories using full data
|
990 |
top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
|
991 |
|
|
|
1002 |
show_top_section = gr.update(visible=not top_repos.empty)
|
1003 |
|
1004 |
logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
|
1005 |
+
return format_dataframe_for_display(analyzed_df), final_status, format_dataframe_for_display(top_repos), show_top_section
|
1006 |
|
1007 |
except Exception as e:
|
1008 |
logger.error(f"Error in batch analysis: {e}")
|
app_old.py
DELETED
@@ -1,481 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import regex as re
|
3 |
-
import csv
|
4 |
-
import pandas as pd
|
5 |
-
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
6 |
-
from hf_utils import download_filtered_space_files, search_top_spaces
|
7 |
-
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
8 |
-
# Import chatbot logic
|
9 |
-
from analyzer import analyze_code
|
10 |
-
|
11 |
-
# Chatbot system prompt
|
12 |
-
CHATBOT_SYSTEM_PROMPT = (
|
13 |
-
"You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
|
14 |
-
"Ask questions to clarify what they want, their use case, preferred language, features, etc. "
|
15 |
-
"When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
|
16 |
-
"Return only the keywords as a comma-separated list."
|
17 |
-
)
|
18 |
-
|
19 |
-
# Initial assistant message for chatbot
|
20 |
-
CHATBOT_INITIAL_MESSAGE = "Hello! Please tell me about your ideal Hugging Face repo. What use case, preferred language, or features are you looking for?"
|
21 |
-
|
22 |
-
def read_csv_as_text(csv_filename):
|
23 |
-
return pd.read_csv(csv_filename, dtype=str)
|
24 |
-
|
25 |
-
def process_repo_input(text):
|
26 |
-
if not text:
|
27 |
-
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
28 |
-
# Split by newlines and commas, strip whitespace
|
29 |
-
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
|
30 |
-
# Write to CSV
|
31 |
-
csv_filename = "repo_ids.csv"
|
32 |
-
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
|
33 |
-
writer = csv.writer(csvfile)
|
34 |
-
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
35 |
-
for repo_id in repo_ids:
|
36 |
-
writer.writerow([repo_id, "", "", "", ""])
|
37 |
-
# Read the CSV into a DataFrame to display
|
38 |
-
df = read_csv_as_text(csv_filename)
|
39 |
-
return df
|
40 |
-
|
41 |
-
# Store the last entered repo ids and the current index in global variables for button access
|
42 |
-
last_repo_ids = []
|
43 |
-
current_repo_idx = 0
|
44 |
-
|
45 |
-
# Store extracted keywords for the chatbot flow
|
46 |
-
generated_keywords = []
|
47 |
-
|
48 |
-
def process_repo_input_and_store(text):
|
49 |
-
global last_repo_ids, current_repo_idx
|
50 |
-
if not text:
|
51 |
-
last_repo_ids = []
|
52 |
-
current_repo_idx = 0
|
53 |
-
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
54 |
-
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
|
55 |
-
last_repo_ids = repo_ids
|
56 |
-
current_repo_idx = 0
|
57 |
-
csv_filename = "repo_ids.csv"
|
58 |
-
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
|
59 |
-
writer = csv.writer(csvfile)
|
60 |
-
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
61 |
-
for repo_id in repo_ids:
|
62 |
-
writer.writerow([repo_id, "", "", "", ""])
|
63 |
-
df = read_csv_as_text(csv_filename)
|
64 |
-
return df
|
65 |
-
|
66 |
-
def keyword_search_and_update(keyword):
|
67 |
-
global last_repo_ids, current_repo_idx
|
68 |
-
if not keyword:
|
69 |
-
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
70 |
-
# Accept multiple keywords, comma or newline separated
|
71 |
-
keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
|
72 |
-
repo_ids = []
|
73 |
-
for kw in keyword_list:
|
74 |
-
repo_ids.extend(search_top_spaces(kw, limit=5))
|
75 |
-
# Remove duplicates while preserving order
|
76 |
-
seen = set()
|
77 |
-
unique_repo_ids = []
|
78 |
-
for rid in repo_ids:
|
79 |
-
if rid not in seen:
|
80 |
-
unique_repo_ids.append(rid)
|
81 |
-
seen.add(rid)
|
82 |
-
last_repo_ids = unique_repo_ids
|
83 |
-
current_repo_idx = 0
|
84 |
-
csv_filename = "repo_ids.csv"
|
85 |
-
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
|
86 |
-
writer = csv.writer(csvfile)
|
87 |
-
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
88 |
-
for repo_id in unique_repo_ids:
|
89 |
-
writer.writerow([repo_id, "", "", "", ""])
|
90 |
-
df = read_csv_as_text(csv_filename)
|
91 |
-
return df
|
92 |
-
|
93 |
-
def show_combined_repo_and_llm():
|
94 |
-
global current_repo_idx
|
95 |
-
if not last_repo_ids:
|
96 |
-
return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
|
97 |
-
if current_repo_idx >= len(last_repo_ids):
|
98 |
-
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
99 |
-
repo_id = last_repo_ids[current_repo_idx]
|
100 |
-
try:
|
101 |
-
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
102 |
-
except Exception as e:
|
103 |
-
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
|
104 |
-
txt_path = combine_repo_files_for_llm()
|
105 |
-
try:
|
106 |
-
with open(txt_path, "r", encoding="utf-8") as f:
|
107 |
-
combined_content = f.read()
|
108 |
-
except Exception as e:
|
109 |
-
return f"Error reading {txt_path}: {e}", "", read_csv_as_text("repo_ids.csv")
|
110 |
-
llm_output = analyze_combined_file(txt_path)
|
111 |
-
# Extract only the last JSON object (final summary) for CSV writing
|
112 |
-
last_start = llm_output.rfind('{')
|
113 |
-
last_end = llm_output.rfind('}')
|
114 |
-
if last_start != -1 and last_end != -1 and last_end > last_start:
|
115 |
-
final_json_str = llm_output[last_start:last_end+1]
|
116 |
-
else:
|
117 |
-
final_json_str = llm_output
|
118 |
-
llm_json = parse_llm_json_response(final_json_str)
|
119 |
-
# Update CSV for the current repo id
|
120 |
-
csv_filename = "repo_ids.csv"
|
121 |
-
extraction_status = ""
|
122 |
-
strengths = ""
|
123 |
-
weaknesses = ""
|
124 |
-
try:
|
125 |
-
df = read_csv_as_text(csv_filename)
|
126 |
-
for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
|
127 |
-
df[col] = df[col].astype(str)
|
128 |
-
updated = False
|
129 |
-
for idx, row in df.iterrows():
|
130 |
-
if row["repo id"] == repo_id:
|
131 |
-
if isinstance(llm_json, dict) and "error" not in llm_json:
|
132 |
-
extraction_status = "JSON extraction: SUCCESS"
|
133 |
-
strengths = llm_json.get("strength", "")
|
134 |
-
weaknesses = llm_json.get("weaknesses", "")
|
135 |
-
df.at[idx, "strength"] = strengths
|
136 |
-
df.at[idx, "weaknesses"] = weaknesses
|
137 |
-
df.at[idx, "speciality"] = llm_json.get("speciality", "")
|
138 |
-
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
|
139 |
-
updated = True
|
140 |
-
else:
|
141 |
-
extraction_status = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
|
142 |
-
break
|
143 |
-
# If not updated (repo_id not found), append a new row
|
144 |
-
if not updated and isinstance(llm_json, dict) and "error" not in llm_json:
|
145 |
-
extraction_status = "JSON extraction: SUCCESS (new row)"
|
146 |
-
strengths = llm_json.get("strength", "")
|
147 |
-
weaknesses = llm_json.get("weaknesses", "")
|
148 |
-
new_row = {
|
149 |
-
"repo id": repo_id,
|
150 |
-
"strength": strengths,
|
151 |
-
"weaknesses": weaknesses,
|
152 |
-
"speciality": llm_json.get("speciality", ""),
|
153 |
-
"relevance rating": llm_json.get("relevance rating", "")
|
154 |
-
}
|
155 |
-
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
156 |
-
df.to_csv(csv_filename, index=False)
|
157 |
-
except Exception as e:
|
158 |
-
df = read_csv_as_text(csv_filename)
|
159 |
-
extraction_status = f"CSV update error: {e}"
|
160 |
-
# Move to next repo for next click
|
161 |
-
current_repo_idx += 1
|
162 |
-
summary = f"{extraction_status}\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
|
163 |
-
return combined_content, summary, df
|
164 |
-
|
165 |
-
def go_to_analysis():
|
166 |
-
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
167 |
-
|
168 |
-
def go_to_input():
|
169 |
-
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
170 |
-
|
171 |
-
def go_to_chatbot():
|
172 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
173 |
-
|
174 |
-
def go_to_start():
|
175 |
-
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
176 |
-
|
177 |
-
def go_to_results():
|
178 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
179 |
-
|
180 |
-
repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
|
181 |
-
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
|
182 |
-
datatype=["str", "str", "str", "str", "str", "str"]
|
183 |
-
)
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
def use_keywords_to_search_and_update_csv(keywords):
|
189 |
-
global last_repo_ids, current_repo_idx
|
190 |
-
if not keywords:
|
191 |
-
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
192 |
-
# Split keywords and search for each
|
193 |
-
keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
|
194 |
-
repo_ids = []
|
195 |
-
for kw in keyword_list:
|
196 |
-
repo_ids.extend(search_top_spaces(kw, limit=3)) # limit=3 per keyword
|
197 |
-
# Remove duplicates while preserving order
|
198 |
-
seen = set()
|
199 |
-
unique_repo_ids = []
|
200 |
-
for rid in repo_ids:
|
201 |
-
if rid not in seen:
|
202 |
-
unique_repo_ids.append(rid)
|
203 |
-
seen.add(rid)
|
204 |
-
last_repo_ids = unique_repo_ids
|
205 |
-
current_repo_idx = 0
|
206 |
-
csv_filename = "repo_ids.csv"
|
207 |
-
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
|
208 |
-
writer = csv.writer(csvfile)
|
209 |
-
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
210 |
-
for repo_id in unique_repo_ids:
|
211 |
-
writer.writerow([repo_id, "", "", "", ""])
|
212 |
-
df = read_csv_as_text(csv_filename)
|
213 |
-
return df
|
214 |
-
|
215 |
-
def batch_analyze_and_select_top():
|
216 |
-
csv_filename = "repo_ids.csv"
|
217 |
-
try:
|
218 |
-
df = read_csv_as_text(csv_filename)
|
219 |
-
all_infos = []
|
220 |
-
# Analyze each repo and update CSV
|
221 |
-
for idx, row in df.iterrows():
|
222 |
-
repo_id = row["repo id"]
|
223 |
-
try:
|
224 |
-
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
225 |
-
txt_path = combine_repo_files_for_llm()
|
226 |
-
llm_output = analyze_combined_file(txt_path)
|
227 |
-
last_start = llm_output.rfind('{')
|
228 |
-
last_end = llm_output.rfind('}')
|
229 |
-
if last_start != -1 and last_end != -1 and last_end > last_start:
|
230 |
-
final_json_str = llm_output[last_start:last_end+1]
|
231 |
-
else:
|
232 |
-
final_json_str = llm_output
|
233 |
-
llm_json = parse_llm_json_response(final_json_str)
|
234 |
-
if isinstance(llm_json, dict) and "error" not in llm_json:
|
235 |
-
df.at[idx, "strength"] = llm_json.get("strength", "")
|
236 |
-
df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
|
237 |
-
df.at[idx, "speciality"] = llm_json.get("speciality", "")
|
238 |
-
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
|
239 |
-
all_infos.append({"repo id": repo_id, **llm_json})
|
240 |
-
except Exception as e:
|
241 |
-
all_infos.append({"repo id": repo_id, "error": str(e)})
|
242 |
-
df.to_csv(csv_filename, index=False)
|
243 |
-
# Display all info
|
244 |
-
all_info_str = "\n\n".join([str(info) for info in all_infos])
|
245 |
-
# Let LLM choose the best 3
|
246 |
-
from openai import OpenAI
|
247 |
-
import os
|
248 |
-
client = OpenAI(api_key=os.getenv("modal_api"))
|
249 |
-
client.base_url = os.getenv("base_url")
|
250 |
-
selection_prompt = (
|
251 |
-
"You are a helpful assistant. You are given a list of repo analyses in JSON format. "
|
252 |
-
"Choose the 3 repos that are the most impressive, relevant, or useful. "
|
253 |
-
"Return ONLY a JSON array of the 3 best repo ids, in order of preference, under the key 'top_repos'. "
|
254 |
-
"Example: {\"top_repos\": [\"repo1\", \"repo2\", \"repo3\"]}"
|
255 |
-
)
|
256 |
-
user_content = "Here are the repo analyses:\n" + all_info_str
|
257 |
-
response = client.chat.completions.create(
|
258 |
-
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
259 |
-
messages=[
|
260 |
-
{"role": "system", "content": selection_prompt},
|
261 |
-
{"role": "user", "content": user_content}
|
262 |
-
],
|
263 |
-
max_tokens=256,
|
264 |
-
temperature=0.3
|
265 |
-
)
|
266 |
-
selection_json = parse_llm_json_response(response.choices[0].message.content)
|
267 |
-
top_repos = selection_json.get("top_repos", [])
|
268 |
-
return all_info_str, str(top_repos), df
|
269 |
-
except Exception as e:
|
270 |
-
return f"Error in batch analysis: {e}", "", pd.DataFrame()
|
271 |
-
|
272 |
-
def batch_analyze_and_select_top_for_chat(state):
|
273 |
-
csv_filename = "repo_ids.csv"
|
274 |
-
try:
|
275 |
-
df = read_csv_as_text(csv_filename)
|
276 |
-
all_infos = []
|
277 |
-
for idx, row in df.iterrows():
|
278 |
-
repo_id = row["repo id"]
|
279 |
-
try:
|
280 |
-
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
281 |
-
txt_path = combine_repo_files_for_llm()
|
282 |
-
llm_output = analyze_combined_file(txt_path)
|
283 |
-
last_start = llm_output.rfind('{')
|
284 |
-
last_end = llm_output.rfind('}')
|
285 |
-
if last_start != -1 and last_end != -1 and last_end > last_start:
|
286 |
-
final_json_str = llm_output[last_start:last_end+1]
|
287 |
-
else:
|
288 |
-
final_json_str = llm_output
|
289 |
-
llm_json = parse_llm_json_response(final_json_str)
|
290 |
-
if isinstance(llm_json, dict) and "error" not in llm_json:
|
291 |
-
df.at[idx, "strength"] = llm_json.get("strength", "")
|
292 |
-
df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
|
293 |
-
df.at[idx, "speciality"] = llm_json.get("speciality", "")
|
294 |
-
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
|
295 |
-
all_infos.append({"repo id": repo_id, **llm_json})
|
296 |
-
except Exception as e:
|
297 |
-
all_infos.append({"repo id": repo_id, "error": str(e)})
|
298 |
-
df.to_csv(csv_filename, index=False)
|
299 |
-
all_info_str = "\n\n".join([str(info) for info in all_infos])
|
300 |
-
from openai import OpenAI
|
301 |
-
import os
|
302 |
-
client = OpenAI(api_key=os.getenv("modal_api"))
|
303 |
-
client.base_url = os.getenv("base_url")
|
304 |
-
selection_prompt = (
|
305 |
-
"You are a helpful assistant. You are given a list of repo analyses in JSON format. "
|
306 |
-
"Choose the 3 repos that are the most impressive, relevant, or useful. "
|
307 |
-
"Return ONLY a JSON array of the 3 best repo ids, in order of preference, under the key 'top_repos'. "
|
308 |
-
"Example: {\"top_repos\": [\"repo1\", \"repo2\", \"repo3\"]}"
|
309 |
-
)
|
310 |
-
user_content = "Here are the repo analyses:\n" + all_info_str
|
311 |
-
response = client.chat.completions.create(
|
312 |
-
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
313 |
-
messages=[
|
314 |
-
{"role": "system", "content": selection_prompt},
|
315 |
-
{"role": "user", "content": user_content}
|
316 |
-
],
|
317 |
-
max_tokens=256,
|
318 |
-
temperature=0.3
|
319 |
-
)
|
320 |
-
selection_json = parse_llm_json_response(response.choices[0].message.content)
|
321 |
-
top_repos = selection_json.get("top_repos", [])
|
322 |
-
# Add a new assistant message to the chat state
|
323 |
-
new_message = ("", f"The top 3 repo IDs are: {', '.join(top_repos)}")
|
324 |
-
if state is None:
|
325 |
-
state = []
|
326 |
-
state = state + [list(new_message)]
|
327 |
-
return state
|
328 |
-
except Exception as e:
|
329 |
-
new_message = ("", f"Error in batch analysis: {e}")
|
330 |
-
if state is None:
|
331 |
-
state = []
|
332 |
-
state = state + [list(new_message)]
|
333 |
-
return state
|
334 |
-
|
335 |
-
with gr.Blocks() as demo:
|
336 |
-
page_state = gr.State(0)
|
337 |
-
|
338 |
-
# --- Start Page: Option Selection ---
|
339 |
-
with gr.Column(visible=True) as start_page:
|
340 |
-
gr.Markdown("## Welcome! How would you like to proceed?")
|
341 |
-
option_a_btn = gr.Button("A) I know which repos I want to search and research about")
|
342 |
-
option_b_btn = gr.Button("B) I don't know exactly what I want (Chatbot)")
|
343 |
-
|
344 |
-
# --- Page 1: Input ---
|
345 |
-
with gr.Column(visible=False) as input_page:
|
346 |
-
gr.Markdown("## Enter Keyword or Repo IDs")
|
347 |
-
keyword_input = gr.Textbox(label="Enter keywords to search repos (comma or newline separated)", lines=2, placeholder="e.g. audio, vision\ntext")
|
348 |
-
keyword_btn = gr.Button("Search and Update Repo List")
|
349 |
-
repo_id_box = repo_id_input.render()
|
350 |
-
df_box = df_output.render()
|
351 |
-
submit_btn = gr.Button("Submit Repo IDs")
|
352 |
-
next_btn = gr.Button("Next: Go to Analysis")
|
353 |
-
back_to_start_btn = gr.Button("Back to Start")
|
354 |
-
|
355 |
-
# --- Page 2: Analysis ---
|
356 |
-
with gr.Column(visible=False) as analysis_page:
|
357 |
-
gr.Markdown("## Combine and Display Repo Files")
|
358 |
-
combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
|
359 |
-
combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
|
360 |
-
llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
|
361 |
-
df_display = gr.Dataframe(
|
362 |
-
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
|
363 |
-
datatype=["str", "str", "str", "str", "str", "str"]
|
364 |
-
)
|
365 |
-
back_btn = gr.Button("Back to Input")
|
366 |
-
back_to_start_btn2 = gr.Button("Back to Start")
|
367 |
-
|
368 |
-
# --- Page 3: Chatbot ---
|
369 |
-
with gr.Column(visible=False) as chatbot_page:
|
370 |
-
gr.Markdown("## Repo Recommendation Chatbot")
|
371 |
-
chatbot = gr.Chatbot()
|
372 |
-
state = gr.State([])
|
373 |
-
user_input = gr.Textbox(label="Your message", placeholder="Describe your ideal repo or answer the assistant's questions...")
|
374 |
-
send_btn = gr.Button("Send")
|
375 |
-
end_btn = gr.Button("End Chat and Extract Keywords")
|
376 |
-
keywords_output = gr.Textbox(label="Extracted Keywords for Repo Search", interactive=False)
|
377 |
-
go_to_results_btn = gr.Button("Find Repos with These Keywords")
|
378 |
-
back_to_start_btn3 = gr.Button("Back to Start")
|
379 |
-
|
380 |
-
# --- Page 4: Results after Chatbot ---
|
381 |
-
with gr.Column(visible=False) as results_page:
|
382 |
-
gr.Markdown("## Repo Results Based on Your Conversation")
|
383 |
-
results_df = gr.Dataframe(
|
384 |
-
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
|
385 |
-
datatype=["str", "str", "str", "str", "str", "str"]
|
386 |
-
)
|
387 |
-
analyze_next_btn = gr.Button("Download, Combine & Analyze Next Repo")
|
388 |
-
combined_txt_results = gr.Textbox(label="Combined Repo Files", lines=20)
|
389 |
-
llm_output_txt_results = gr.Textbox(label="LLM Analysis Output", lines=10)
|
390 |
-
back_to_start_btn4 = gr.Button("Back to Start")
|
391 |
-
go_to_batch_btn = gr.Button("Go to Batch Analysis Page", visible=True)
|
392 |
-
|
393 |
-
# --- Page 5: Batch Analysis Page ---
|
394 |
-
with gr.Column(visible=False) as batch_page:
|
395 |
-
gr.Markdown("## Batch Analysis & Top 3 Selection")
|
396 |
-
batch_btn = gr.Button("Batch Analyze All & Select Top 3", visible=True)
|
397 |
-
batch_info_txt = gr.Textbox(label="All Repo Analyses", lines=10)
|
398 |
-
top3_txt = gr.Textbox(label="Top 3 Repo IDs", lines=1)
|
399 |
-
show_top3_chat_btn = gr.Button("Show Top 3 Repo IDs in Chat", visible=True)
|
400 |
-
show_top3_page_btn = gr.Button("Show Top 3 Repos on New Page", visible=True)
|
401 |
-
back_to_results_from_batch_btn = gr.Button("Back to Results")
|
402 |
-
|
403 |
-
# --- Page 6: Top 3 Repos Page ---
|
404 |
-
with gr.Column(visible=False) as top3_page:
|
405 |
-
gr.Markdown("## Top 3 Recommended Repos")
|
406 |
-
top3_df = gr.Dataframe(headers=["repo id"], datatype=["str"])
|
407 |
-
back_to_results_btn = gr.Button("Back to Results")
|
408 |
-
|
409 |
-
# Navigation logic
|
410 |
-
option_a_btn.click(go_to_input, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
411 |
-
option_b_btn.click(
|
412 |
-
lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), [["", CHATBOT_INITIAL_MESSAGE]]),
|
413 |
-
inputs=None,
|
414 |
-
outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page, state]
|
415 |
-
)
|
416 |
-
next_btn.click(go_to_analysis, inputs=None, outputs=[input_page, analysis_page, chatbot_page, results_page, batch_page, top3_page])
|
417 |
-
back_btn.click(go_to_input, inputs=None, outputs=[input_page, analysis_page, chatbot_page, results_page, batch_page, top3_page])
|
418 |
-
back_to_start_btn.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
419 |
-
back_to_start_btn2.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
420 |
-
back_to_start_btn3.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
421 |
-
back_to_start_btn4.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
422 |
-
go_to_batch_btn.click(lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)), inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
423 |
-
back_to_results_from_batch_btn.click(lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)), inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
424 |
-
back_to_results_btn.click(lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)), inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
|
425 |
-
|
426 |
-
# Keyword and repo input logic
|
427 |
-
keyword_btn.click(keyword_search_and_update, inputs=keyword_input, outputs=df_box)
|
428 |
-
submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)
|
429 |
-
|
430 |
-
# Analysis logic
|
431 |
-
combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])
|
432 |
-
|
433 |
-
# Chatbot logic
|
434 |
-
def user_send(user_message, history):
|
435 |
-
assistant_reply = chat_with_user(user_message, history)
|
436 |
-
history = history + [[user_message, assistant_reply]]
|
437 |
-
return history, history, ""
|
438 |
-
|
439 |
-
def end_chat(history):
|
440 |
-
keywords = extract_keywords_from_conversation(history)
|
441 |
-
global generated_keywords
|
442 |
-
generated_keywords.clear()
|
443 |
-
generated_keywords.extend([k.strip() for k in keywords.split(",") if k.strip()])
|
444 |
-
return keywords
|
445 |
-
|
446 |
-
def go_to_results_from_chatbot(keywords):
|
447 |
-
# Use the keywords to search and update the CSV, then display the DataFrame
|
448 |
-
df = use_keywords_to_search_and_update_csv(keywords)
|
449 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), df
|
450 |
-
|
451 |
-
send_btn.click(user_send, inputs=[user_input, state], outputs=[chatbot, state, user_input])
|
452 |
-
end_btn.click(end_chat, inputs=state, outputs=keywords_output)
|
453 |
-
go_to_results_btn.click(
|
454 |
-
go_to_results_from_chatbot,
|
455 |
-
inputs=keywords_output,
|
456 |
-
outputs=[chatbot_page, input_page, analysis_page, results_page, batch_page, top3_page, results_df]
|
457 |
-
)
|
458 |
-
|
459 |
-
# Add logic for the new button on results_page
|
460 |
-
analyze_next_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt_results, llm_output_txt_results, results_df])
|
461 |
-
batch_btn.click(batch_analyze_and_select_top, inputs=None, outputs=[batch_info_txt, top3_txt, df_output])
|
462 |
-
show_top3_chat_btn.click(batch_analyze_and_select_top_for_chat, inputs=[state], outputs=[state])
|
463 |
-
|
464 |
-
def show_top3_page():
|
465 |
-
# Run batch analysis, get top 3, save to CSV, and return DataFrame
|
466 |
-
all_info_str, top3_str, df = batch_analyze_and_select_top()
|
467 |
-
import pandas as pd
|
468 |
-
import ast
|
469 |
-
try:
|
470 |
-
top3_ids = ast.literal_eval(top3_str)
|
471 |
-
if isinstance(top3_ids, str):
|
472 |
-
top3_ids = [top3_ids]
|
473 |
-
except Exception:
|
474 |
-
top3_ids = []
|
475 |
-
top3_df_data = pd.DataFrame({"repo id": top3_ids})
|
476 |
-
top3_df_data.to_csv("top3_repos.csv", index=False)
|
477 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), top3_df_data
|
478 |
-
|
479 |
-
show_top3_page_btn.click(show_top3_page, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page, top3_df])
|
480 |
-
|
481 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_utils.py
CHANGED
@@ -35,6 +35,10 @@ def download_filtered_space_files(space_id: str, local_dir: str = "repo_files",
|
|
35 |
rel_path = os.path.relpath(src_file, repo_path)
|
36 |
dest_file = os.path.join(local_dir, rel_path)
|
37 |
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
|
|
|
|
|
|
|
|
|
38 |
shutil.copy2(src_file, dest_file)
|
39 |
copied_files += 1
|
40 |
|
|
|
35 |
rel_path = os.path.relpath(src_file, repo_path)
|
36 |
dest_file = os.path.join(local_dir, rel_path)
|
37 |
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
|
38 |
+
|
39 |
+
# Debug: Show exactly which file is being downloaded
|
40 |
+
print(f"DEBUG: Downloading file: {rel_path}")
|
41 |
+
|
42 |
shutil.copy2(src_file, dest_file)
|
43 |
copied_files += 1
|
44 |
|