naman1102 commited on
Commit
39207e4
Β·
1 Parent(s): dbb5879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -9
app.py CHANGED
@@ -28,6 +28,110 @@ CHATBOT_INITIAL_MESSAGE = "Hello! Please tell me about your ideal Hugging Face r
28
 
29
  # --- Helper Functions (Logic) ---
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def write_repos_to_csv(repo_ids: List[str]) -> None:
32
  """Writes a list of repo IDs to the CSV file, overwriting the previous content."""
33
  try:
@@ -124,7 +228,7 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
124
  if not repo_found_in_df:
125
  logger.warning(f"Repo ID {repo_id} not found in CSV for updating.")
126
 
127
- # Write CSV with better error handling and flushing
128
  try:
129
  df.to_csv(CSV_FILE, index=False)
130
  # Force file system flush
@@ -432,6 +536,19 @@ def create_ui() -> gr.Blocks:
432
  pass
433
 
434
  gr.Markdown("### πŸ“Š Results Dashboard")
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  gr.Markdown("πŸ’‘ **Tip:** Click on any repository name to explore it in detail!")
436
 
437
  # Modal popup for repository action selection
@@ -450,6 +567,7 @@ def create_ui() -> gr.Blocks:
450
  explore_repo_btn = gr.Button("πŸ” Open in Repo Explorer", variant="secondary", size="lg")
451
  cancel_modal_btn = gr.Button("❌ Cancel", size="lg")
452
 
 
453
  df_output = gr.Dataframe(
454
  headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
455
  wrap=True,
@@ -514,7 +632,7 @@ def create_ui() -> gr.Blocks:
514
  </div>
515
  """
516
  )
517
-
518
  # --- Event Handler Functions ---
519
 
520
  def handle_repo_id_submission(text: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
@@ -677,10 +795,10 @@ def create_ui() -> gr.Blocks:
677
 
678
  return "", gr.update(visible=False), gr.update()
679
 
680
- def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str]:
681
  """Analyzes all repositories in the CSV file with progress tracking."""
682
  if not repo_ids:
683
- return pd.DataFrame(), "Status: No repositories to analyze. Please submit repo IDs first."
684
 
685
  total_repos = len(repo_ids)
686
 
@@ -762,21 +880,31 @@ def create_ui() -> gr.Blocks:
762
  # Complete the progress
763
  progress(1.0, desc="Batch analysis completed!")
764
 
 
 
 
 
 
 
765
  # Final status with detailed breakdown
766
  final_status = f"πŸŽ‰ Batch Analysis Complete!\nβœ… Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
767
  if csv_update_failures > 0:
768
  final_status += f"\n⚠️ CSV Update Issues: {csv_update_failures}/{total_repos}"
769
 
770
- # Get final updated dataframe
771
- updated_df = read_csv_to_dataframe()
 
 
 
 
772
 
773
  logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
774
- return updated_df, final_status
775
 
776
  except Exception as e:
777
  logger.error(f"Error in batch analysis: {e}")
778
  error_status = f"❌ Batch analysis failed: {e}"
779
- return read_csv_to_dataframe(), error_status
780
 
781
  def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
782
  """Handle visiting the Hugging Face Space for the repository."""
@@ -829,7 +957,7 @@ def create_ui() -> gr.Blocks:
829
  ).then(
830
  fn=handle_analyze_all_repos,
831
  inputs=[repo_ids_state, user_requirements_state],
832
- outputs=[df_output, status_box_analysis]
833
  )
834
 
835
  # Chatbot Tab
@@ -893,6 +1021,13 @@ def create_ui() -> gr.Blocks:
893
  outputs=[selected_repo_display, repo_action_modal, tabs]
894
  )
895
 
 
 
 
 
 
 
 
896
  return app
897
 
898
  if __name__ == "__main__":
 
28
 
29
  # --- Helper Functions (Logic) ---
30
 
31
+ def get_top_relevant_repos(df: pd.DataFrame, user_requirements: str, top_n: int = 3) -> pd.DataFrame:
32
+ """
33
+ Uses LLM to select the top N most relevant repositories based on user requirements and analysis data.
34
+ """
35
+ try:
36
+ if df.empty:
37
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
38
+
39
+ # Filter out rows with no analysis data
40
+ analyzed_df = df.copy()
41
+ analyzed_df = analyzed_df[
42
+ (analyzed_df['strength'].str.strip() != '') |
43
+ (analyzed_df['weaknesses'].str.strip() != '') |
44
+ (analyzed_df['speciality'].str.strip() != '') |
45
+ (analyzed_df['relevance rating'].str.strip() != '')
46
+ ]
47
+
48
+ if analyzed_df.empty:
49
+ logger.warning("No analyzed repositories found for LLM selection")
50
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
51
+
52
+ # Create a prompt for the LLM
53
+ csv_data = ""
54
+ for idx, row in analyzed_df.iterrows():
55
+ csv_data += f"Repository: {row['repo id']}\n"
56
+ csv_data += f"Strengths: {row['strength']}\n"
57
+ csv_data += f"Weaknesses: {row['weaknesses']}\n"
58
+ csv_data += f"Speciality: {row['speciality']}\n"
59
+ csv_data += f"Relevance: {row['relevance rating']}\n\n"
60
+
61
+ user_context = user_requirements if user_requirements.strip() else "General repository recommendation"
62
+
63
+ prompt = f"""Based on the user's requirements and the analysis of repositories below, select the top {top_n} most relevant repositories.
64
+
65
+ User Requirements:
66
+ {user_context}
67
+
68
+ Repository Analysis Data:
69
+ {csv_data}
70
+
71
+ Please analyze all repositories and select the {top_n} most relevant ones based on:
72
+ 1. How well they match the user's specific requirements
73
+ 2. Their strengths and capabilities
74
+ 3. Their relevance rating
75
+ 4. Their speciality alignment with user needs
76
+
77
+ Return ONLY a JSON list of the repository IDs in order of relevance (most relevant first). Example format:
78
+ ["repo1", "repo2", "repo3"]
79
+
80
+ Selected repositories:"""
81
+
82
+ try:
83
+ from openai import OpenAI
84
+ client = OpenAI(api_key=os.getenv("modal_api"))
85
+ client.base_url = os.getenv("base_url")
86
+
87
+ response = client.chat.completions.create(
88
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
89
+ messages=[
90
+ {"role": "system", "content": "You are an expert at analyzing and ranking repositories based on user requirements. Always return valid JSON."},
91
+ {"role": "user", "content": prompt}
92
+ ],
93
+ max_tokens=200,
94
+ temperature=0.3
95
+ )
96
+
97
+ llm_response = response.choices[0].message.content.strip()
98
+ logger.info(f"LLM response for top repos: {llm_response}")
99
+
100
+ # Extract JSON from response
101
+ import json
102
+ import re
103
+
104
+ # Try to find JSON array in the response
105
+ json_match = re.search(r'\[.*\]', llm_response)
106
+ if json_match:
107
+ selected_repos = json.loads(json_match.group())
108
+ logger.info(f"LLM selected repositories: {selected_repos}")
109
+
110
+ # Filter dataframe to only include selected repositories in order
111
+ top_repos_list = []
112
+ for repo_id in selected_repos[:top_n]:
113
+ matching_rows = analyzed_df[analyzed_df['repo id'] == repo_id]
114
+ if not matching_rows.empty:
115
+ top_repos_list.append(matching_rows.iloc[0])
116
+
117
+ if top_repos_list:
118
+ top_repos = pd.DataFrame(top_repos_list)
119
+ logger.info(f"Successfully selected {len(top_repos)} repositories using LLM")
120
+ return top_repos
121
+
122
+ # Fallback: if LLM response parsing fails, use first N analyzed repos
123
+ logger.warning("Failed to parse LLM response, using fallback selection")
124
+ return analyzed_df.head(top_n)
125
+
126
+ except Exception as llm_error:
127
+ logger.error(f"LLM selection failed: {llm_error}")
128
+ # Fallback: return first N repositories with analysis data
129
+ return analyzed_df.head(top_n)
130
+
131
+ except Exception as e:
132
+ logger.error(f"Error in LLM-based repo selection: {e}")
133
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
134
+
135
  def write_repos_to_csv(repo_ids: List[str]) -> None:
136
  """Writes a list of repo IDs to the CSV file, overwriting the previous content."""
137
  try:
 
228
  if not repo_found_in_df:
229
  logger.warning(f"Repo ID {repo_id} not found in CSV for updating.")
230
 
231
+ # Write CSV with better error handling and flushing
232
  try:
233
  df.to_csv(CSV_FILE, index=False)
234
  # Force file system flush
 
536
  pass
537
 
538
  gr.Markdown("### πŸ“Š Results Dashboard")
539
+
540
+ # Top 3 Most Relevant Repositories (initially hidden)
541
+ with gr.Column(visible=False) as top_repos_section:
542
+ gr.Markdown("### πŸ† Top 3 Most Relevant Repositories")
543
+ gr.Markdown("🎯 **These are the highest-rated repositories based on your requirements:**")
544
+ top_repos_df = gr.Dataframe(
545
+ headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
546
+ wrap=True,
547
+ interactive=False,
548
+ height=200,
549
+ info="Click on any repository name to explore or visit"
550
+ )
551
+
552
  gr.Markdown("πŸ’‘ **Tip:** Click on any repository name to explore it in detail!")
553
 
554
  # Modal popup for repository action selection
 
567
  explore_repo_btn = gr.Button("πŸ” Open in Repo Explorer", variant="secondary", size="lg")
568
  cancel_modal_btn = gr.Button("❌ Cancel", size="lg")
569
 
570
+ gr.Markdown("### πŸ“‹ All Analysis Results")
571
  df_output = gr.Dataframe(
572
  headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
573
  wrap=True,
 
632
  </div>
633
  """
634
  )
635
+
636
  # --- Event Handler Functions ---
637
 
638
  def handle_repo_id_submission(text: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
 
795
 
796
  return "", gr.update(visible=False), gr.update()
797
 
798
+ def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, pd.DataFrame, Any]:
799
  """Analyzes all repositories in the CSV file with progress tracking."""
800
  if not repo_ids:
801
+ return pd.DataFrame(), "Status: No repositories to analyze. Please submit repo IDs first.", pd.DataFrame(), gr.update(visible=False)
802
 
803
  total_repos = len(repo_ids)
804
 
 
880
  # Complete the progress
881
  progress(1.0, desc="Batch analysis completed!")
882
 
883
+ # Get final updated dataframe
884
+ updated_df = read_csv_to_dataframe()
885
+
886
+ # Get top 3 most relevant repositories
887
+ top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
888
+
889
  # Final status with detailed breakdown
890
  final_status = f"πŸŽ‰ Batch Analysis Complete!\nβœ… Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
891
  if csv_update_failures > 0:
892
  final_status += f"\n⚠️ CSV Update Issues: {csv_update_failures}/{total_repos}"
893
 
894
+ # Add top repos info if available
895
+ if not top_repos.empty:
896
+ final_status += f"\n\nπŸ† Top {len(top_repos)} most relevant repositories selected!"
897
+
898
+ # Show top repos section if we have results
899
+ show_top_section = gr.update(visible=not top_repos.empty)
900
 
901
  logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
902
+ return updated_df, final_status, top_repos, show_top_section
903
 
904
  except Exception as e:
905
  logger.error(f"Error in batch analysis: {e}")
906
  error_status = f"❌ Batch analysis failed: {e}"
907
+ return read_csv_to_dataframe(), error_status, pd.DataFrame(), gr.update(visible=False)
908
 
909
  def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
910
  """Handle visiting the Hugging Face Space for the repository."""
 
957
  ).then(
958
  fn=handle_analyze_all_repos,
959
  inputs=[repo_ids_state, user_requirements_state],
960
+ outputs=[df_output, status_box_analysis, top_repos_df, top_repos_section]
961
  )
962
 
963
  # Chatbot Tab
 
1021
  outputs=[selected_repo_display, repo_action_modal, tabs]
1022
  )
1023
 
1024
+ # Add selection event for top repositories dataframe too
1025
+ top_repos_df.select(
1026
+ fn=handle_dataframe_select,
1027
+ inputs=[top_repos_df],
1028
+ outputs=[selected_repo_display, repo_action_modal, tabs]
1029
+ )
1030
+
1031
  return app
1032
 
1033
  if __name__ == "__main__":