naman1102 commited on
Commit
bc78434
·
1 Parent(s): f03a154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py CHANGED
@@ -209,6 +209,126 @@ def use_keywords_to_search_and_update_csv(keywords):
209
  df = read_csv_as_text(csv_filename)
210
  return df
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  with gr.Blocks() as demo:
213
  page_state = gr.State(0)
214
 
@@ -266,6 +386,14 @@ with gr.Blocks() as demo:
266
  llm_output_txt_results = gr.Textbox(label="LLM Analysis Output", lines=10)
267
  back_to_start_btn4 = gr.Button("Back to Start")
268
 
 
 
 
 
 
 
 
 
269
  # Navigation logic
270
  option_a_btn.click(go_to_input, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
271
  option_b_btn.click(go_to_chatbot, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
@@ -312,4 +440,10 @@ with gr.Blocks() as demo:
312
  # Add logic for the new button on results_page
313
  analyze_next_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt_results, llm_output_txt_results, results_df])
314
 
 
 
 
 
 
 
315
  demo.launch()
 
209
  df = read_csv_as_text(csv_filename)
210
  return df
211
 
212
+ def batch_analyze_and_select_top():
213
+ csv_filename = "repo_ids.csv"
214
+ try:
215
+ df = read_csv_as_text(csv_filename)
216
+ all_infos = []
217
+ # Analyze each repo and update CSV
218
+ for idx, row in df.iterrows():
219
+ repo_id = row["repo id"]
220
+ try:
221
+ download_space_repo(repo_id, local_dir="repo_files")
222
+ txt_path = combine_repo_files_for_llm()
223
+ llm_output = analyze_combined_file(txt_path)
224
+ last_start = llm_output.rfind('{')
225
+ last_end = llm_output.rfind('}')
226
+ if last_start != -1 and last_end != -1 and last_end > last_start:
227
+ final_json_str = llm_output[last_start:last_end+1]
228
+ else:
229
+ final_json_str = llm_output
230
+ llm_json = parse_llm_json_response(final_json_str)
231
+ if isinstance(llm_json, dict) and "error" not in llm_json:
232
+ df.at[idx, "strength"] = llm_json.get("strength", "")
233
+ df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
234
+ df.at[idx, "speciality"] = llm_json.get("speciality", "")
235
+ df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
236
+ all_infos.append({"repo id": repo_id, **llm_json})
237
+ except Exception as e:
238
+ all_infos.append({"repo id": repo_id, "error": str(e)})
239
+ df.to_csv(csv_filename, index=False)
240
+ # Display all info
241
+ all_info_str = "\n\n".join([str(info) for info in all_infos])
242
+ # Let LLM choose the best 3
243
+ from openai import OpenAI
244
+ import os
245
+ client = OpenAI(api_key=os.getenv("modal_api"))
246
+ client.base_url = os.getenv("base_url")
247
+ selection_prompt = (
248
+ "You are a helpful assistant. You are given a list of repo analyses in JSON format. "
249
+ "Choose the 3 repos that are the most impressive, relevant, or useful. "
250
+ "Return ONLY a JSON array of the 3 best repo ids, in order of preference, under the key 'top_repos'. "
251
+ "Example: {\"top_repos\": [\"repo1\", \"repo2\", \"repo3\"]}"
252
+ )
253
+ user_content = "Here are the repo analyses:\n" + all_info_str
254
+ response = client.chat.completions.create(
255
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
256
+ messages=[
257
+ {"role": "system", "content": selection_prompt},
258
+ {"role": "user", "content": user_content}
259
+ ],
260
+ max_tokens=256,
261
+ temperature=0.3
262
+ )
263
+ selection_json = parse_llm_json_response(response.choices[0].message.content)
264
+ top_repos = selection_json.get("top_repos", [])
265
+ return all_info_str, str(top_repos), df
266
+ except Exception as e:
267
+ return f"Error in batch analysis: {e}", "", pd.DataFrame()
268
+
269
+ def batch_analyze_and_select_top_for_chat(state):
270
+ csv_filename = "repo_ids.csv"
271
+ try:
272
+ df = read_csv_as_text(csv_filename)
273
+ all_infos = []
274
+ for idx, row in df.iterrows():
275
+ repo_id = row["repo id"]
276
+ try:
277
+ download_space_repo(repo_id, local_dir="repo_files")
278
+ txt_path = combine_repo_files_for_llm()
279
+ llm_output = analyze_combined_file(txt_path)
280
+ last_start = llm_output.rfind('{')
281
+ last_end = llm_output.rfind('}')
282
+ if last_start != -1 and last_end != -1 and last_end > last_start:
283
+ final_json_str = llm_output[last_start:last_end+1]
284
+ else:
285
+ final_json_str = llm_output
286
+ llm_json = parse_llm_json_response(final_json_str)
287
+ if isinstance(llm_json, dict) and "error" not in llm_json:
288
+ df.at[idx, "strength"] = llm_json.get("strength", "")
289
+ df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
290
+ df.at[idx, "speciality"] = llm_json.get("speciality", "")
291
+ df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
292
+ all_infos.append({"repo id": repo_id, **llm_json})
293
+ except Exception as e:
294
+ all_infos.append({"repo id": repo_id, "error": str(e)})
295
+ df.to_csv(csv_filename, index=False)
296
+ all_info_str = "\n\n".join([str(info) for info in all_infos])
297
+ from openai import OpenAI
298
+ import os
299
+ client = OpenAI(api_key=os.getenv("modal_api"))
300
+ client.base_url = os.getenv("base_url")
301
+ selection_prompt = (
302
+ "You are a helpful assistant. You are given a list of repo analyses in JSON format. "
303
+ "Choose the 3 repos that are the most impressive, relevant, or useful. "
304
+ "Return ONLY a JSON array of the 3 best repo ids, in order of preference, under the key 'top_repos'. "
305
+ "Example: {\"top_repos\": [\"repo1\", \"repo2\", \"repo3\"]}"
306
+ )
307
+ user_content = "Here are the repo analyses:\n" + all_info_str
308
+ response = client.chat.completions.create(
309
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
310
+ messages=[
311
+ {"role": "system", "content": selection_prompt},
312
+ {"role": "user", "content": user_content}
313
+ ],
314
+ max_tokens=256,
315
+ temperature=0.3
316
+ )
317
+ selection_json = parse_llm_json_response(response.choices[0].message.content)
318
+ top_repos = selection_json.get("top_repos", [])
319
+ # Add a new assistant message to the chat state
320
+ new_message = ("", f"The top 3 repo IDs are: {', '.join(top_repos)}")
321
+ if state is None:
322
+ state = []
323
+ state = state + [list(new_message)]
324
+ return state
325
+ except Exception as e:
326
+ new_message = ("", f"Error in batch analysis: {e}")
327
+ if state is None:
328
+ state = []
329
+ state = state + [list(new_message)]
330
+ return state
331
+
332
  with gr.Blocks() as demo:
333
  page_state = gr.State(0)
334
 
 
386
  llm_output_txt_results = gr.Textbox(label="LLM Analysis Output", lines=10)
387
  back_to_start_btn4 = gr.Button("Back to Start")
388
 
389
+ # Add at the end, after results_page
390
+ batch_btn = gr.Button("Batch Analyze All & Select Top 3", visible=True)
391
+ batch_info_txt = gr.Textbox(label="All Repo Analyses", lines=10)
392
+ top3_txt = gr.Textbox(label="Top 3 Repo IDs", lines=1)
393
+
394
+ # Add a button to show top 3 in chat
395
+ show_top3_chat_btn = gr.Button("Show Top 3 Repo IDs in Chat", visible=True)
396
+
397
  # Navigation logic
398
  option_a_btn.click(go_to_input, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
399
  option_b_btn.click(go_to_chatbot, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
 
440
  # Add logic for the new button on results_page
441
  analyze_next_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt_results, llm_output_txt_results, results_df])
442
 
443
+ # Add logic for the batch button
444
+ batch_btn.click(batch_analyze_and_select_top, inputs=None, outputs=[batch_info_txt, top3_txt, df_output])
445
+
446
+ # Add logic for showing top 3 in chat
447
+ show_top3_chat_btn.click(batch_analyze_and_select_top_for_chat, inputs=[state], outputs=[state])
448
+
449
  demo.launch()