naman1102 commited on
Commit
123c678
·
1 Parent(s): 77de677

abstraction

Browse files
Files changed (2) hide show
  1. app.py +230 -326
  2. old_app2.py +1253 -0
app.py CHANGED
@@ -29,13 +29,47 @@ CHATBOT_SYSTEM_PROMPT = (
29
  "Your role is to ask clarifying questions to understand exactly what the user is looking for. "
30
  "Ask about their use case, preferred programming language, specific features needed, project type, etc. "
31
  "When you feel you have gathered enough detailed information about their requirements, "
32
- "tell the user: 'I think I have enough information about your requirements. Please click the Extract Keywords button to search for repositories.' "
33
  "Focus on understanding their needs, not providing solutions."
34
  )
35
- CHATBOT_INITIAL_MESSAGE = "Hello! I'm here to help you define your ideal Hugging Face repository requirements. I won't suggest specific repos - my job is to understand exactly what you're looking for. Tell me about your project: What type of application are you building? What's your use case?"
36
 
37
  # --- Helper Functions (Logic) ---
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def get_top_relevant_repos(df: pd.DataFrame, user_requirements: str, top_n: int = 3) -> pd.DataFrame:
40
  """
41
  Uses LLM to select the top N most relevant repositories based on user requirements and analysis data.
@@ -580,82 +614,54 @@ def create_ui() -> gr.Blocks:
580
 
581
  with gr.Tabs() as tabs:
582
  # --- Input Tab ---
583
- with gr.TabItem("📝 Input & Search", id="input_tab"):
584
- with gr.Row(equal_height=True):
585
- with gr.Column(scale=1):
586
- gr.Markdown("### 📁 Repository IDs")
587
- repo_id_input = gr.Textbox(
588
- label="Repository IDs",
589
- lines=8,
590
- placeholder="microsoft/DialoGPT-medium\nopenai/whisper\nhuggingface/transformers",
591
- info="Enter repo IDs separated by commas or new lines"
592
- )
593
- submit_repo_btn = gr.Button("🚀 Submit Repositories", variant="primary", size="lg")
594
-
595
- with gr.Column(scale=1):
596
- gr.Markdown("### 🔍 Keyword Search")
597
- keyword_input = gr.Textbox(
598
- label="Search Keywords",
599
- lines=8,
600
- placeholder="text generation\nimage classification\nsentiment analysis",
601
- info="Enter keywords to find relevant repositories"
602
- )
603
- search_btn = gr.Button("🔎 Search Repositories", variant="primary", size="lg")
604
 
605
  status_box_input = gr.Textbox(label="📊 Status", interactive=False, lines=2)
606
 
607
  # --- Analysis Tab ---
608
- with gr.TabItem("🔬 Analysis", id="analysis_tab"):
609
- gr.Markdown("### 🧪 Repository Analysis Engine")
610
 
611
  # Display current user requirements
612
  with gr.Row():
613
  current_requirements_display = gr.Textbox(
614
- label="📋 Current User Requirements",
615
  interactive=False,
616
- lines=3,
617
- info="Requirements extracted from AI chat conversation for relevance rating"
618
  )
619
 
620
- with gr.Row():
621
- analyze_all_btn = gr.Button("🚀 Analyze All Repositories", variant="primary", size="lg", scale=1)
622
- with gr.Column(scale=2):
623
- status_box_analysis = gr.Textbox(label="📈 Analysis Status", interactive=False, lines=2)
624
 
625
  # Progress bar for batch analysis
626
- with gr.Row():
627
- analysis_progress = gr.Progress()
628
- # progress_display = gr.Textbox(
629
- # label="📊 Batch Analysis Progress",
630
- # interactive=False,
631
- # lines=2,
632
- # visible=False,
633
- # info="Shows progress when analyzing all repositories"
634
- # )
635
-
636
- with gr.Row(equal_height=True):
637
- # with gr.Column():
638
- # content_output = gr.Textbox(
639
- # label="📄 Repository Content",
640
- # lines=20,
641
- # show_copy_button=True,
642
- # info="Raw content extracted from the repository"
643
- # )
644
- # with gr.Column():
645
- # summary_output = gr.Textbox(
646
- # label="🎯 AI Analysis Summary",
647
- # lines=20,
648
- # show_copy_button=True,
649
- # info="Detailed analysis and insights from AI"
650
- # )
651
- pass
652
 
653
  gr.Markdown("### 📊 Results Dashboard")
654
 
655
  # Top 3 Most Relevant Repositories (initially hidden)
656
  with gr.Column(visible=False) as top_repos_section:
657
  gr.Markdown("### 🏆 Top 3 Most Relevant Repositories")
658
- gr.Markdown("🎯 **These are the highest-rated repositories based on your requirements:**")
659
  top_repos_df = gr.Dataframe(
660
  headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
661
  column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"],
@@ -663,45 +669,8 @@ def create_ui() -> gr.Blocks:
663
  interactive=False
664
  )
665
 
666
- gr.Markdown("💡 **Tip:** Full text is displayed directly in the table. Click on repository names to explore or visit them!")
667
-
668
- # Text expansion modal for showing full content (kept for backwards compatibility)
669
- with gr.Row():
670
- with gr.Column():
671
- text_expansion_modal = gr.Column(visible=False)
672
- with text_expansion_modal:
673
- gr.Markdown("### 📄 Full Content View")
674
- expanded_content_title = gr.Textbox(
675
- label="Content Type",
676
- interactive=False,
677
- info="Full text content for the selected field"
678
- )
679
- expanded_content_text = gr.Textbox(
680
- label="Full Text",
681
- lines=10,
682
- interactive=False,
683
- show_copy_button=True,
684
- info="Complete untruncated content"
685
- )
686
- close_text_modal_btn = gr.Button("❌ Close", size="lg")
687
-
688
- # Modal popup for repository action selection
689
- with gr.Row():
690
- with gr.Column():
691
- repo_action_modal = gr.Column(visible=False)
692
- with repo_action_modal:
693
- gr.Markdown("### 🔗 Repository Actions")
694
- selected_repo_display = gr.Textbox(
695
- label="Selected Repository",
696
- interactive=False,
697
- info="Choose what you'd like to do with this repository"
698
- )
699
- with gr.Row():
700
- visit_repo_btn = gr.Button("🌐 Visit Hugging Face Space", variant="primary", size="lg")
701
- explore_repo_btn = gr.Button("🔍 Open in Repo Explorer", variant="secondary", size="lg")
702
- cancel_modal_btn = gr.Button("❌ Cancel", size="lg")
703
-
704
  gr.Markdown("### 📋 All Analysis Results")
 
705
  df_output = gr.Dataframe(
706
  headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
707
  column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"],
@@ -711,11 +680,12 @@ def create_ui() -> gr.Blocks:
711
 
712
  # --- Chatbot Tab ---
713
  with gr.TabItem("🤖 AI Assistant", id="chatbot_tab"):
714
- gr.Markdown("### 💬 Intelligent Repository Discovery")
 
715
 
716
  chatbot = gr.Chatbot(
717
  label="🤖 AI Assistant",
718
- height=450,
719
  type="messages",
720
  avatar_images=(
721
  "https://cdn-icons-png.flaticon.com/512/149/149071.png",
@@ -727,28 +697,28 @@ def create_ui() -> gr.Blocks:
727
  with gr.Row():
728
  msg_input = gr.Textbox(
729
  label="💭 Your Message",
730
- placeholder="Tell me about your ideal repository...",
731
  lines=1,
732
- scale=4,
733
- info="Describe what you're looking for"
734
  )
735
- send_btn = gr.Button("📤 Send", variant="primary", scale=1)
736
- end_chat_btn = gr.Button("🎯 Extract Keywords", scale=1)
737
- use_keywords_btn = gr.Button("🔎 Search Now", variant="primary", scale=1)
738
 
 
739
  with gr.Row():
 
 
 
 
 
 
 
740
  with gr.Column():
741
  extracted_keywords_output = gr.Textbox(
742
- label="🏷️ Extracted Keywords",
743
  interactive=False,
744
  show_copy_button=True,
745
- info="AI-generated search terms from our conversation"
746
- )
747
- with gr.Column():
748
- status_box_chatbot = gr.Textbox(
749
- label="📊 Chat Status",
750
- interactive=False,
751
- info="Current conversation status"
752
  )
753
 
754
  # --- Repo Explorer Tab ---
@@ -770,49 +740,47 @@ def create_ui() -> gr.Blocks:
770
 
771
  # --- Event Handler Functions ---
772
 
773
- def handle_repo_id_submission(text: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
774
- """Processes submitted repo IDs, updates state, and prepares for analysis."""
775
- if not text:
776
- return [], 0, pd.DataFrame(), "Status: Please enter repository IDs.", gr.update(selected="input_tab")
777
 
778
- repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]))
779
- write_repos_to_csv(repo_ids)
780
- df = format_dataframe_for_display(read_csv_to_dataframe())
781
- status = f"Status: {len(repo_ids)} repositories submitted. Ready for analysis."
782
- return repo_ids, 0, df, status, gr.update(selected="analysis_tab")
783
-
784
- def handle_keyword_search(keywords: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
785
- """Processes submitted keywords, finds repos, updates state, and prepares for analysis."""
786
- if not keywords:
787
- return [], 0, pd.DataFrame(), "Status: Please enter keywords.", gr.update(selected="input_tab")
788
-
789
- keyword_list = [k.strip() for k in re.split(r'[\n,]+', keywords) if k.strip()]
790
- repo_ids = []
791
- for kw in keyword_list:
792
- repo_ids.extend(search_top_spaces(kw, limit=5))
793
-
794
- unique_repo_ids = list(dict.fromkeys(repo_ids))
795
- write_repos_to_csv(unique_repo_ids)
796
- df = format_dataframe_for_display(read_csv_to_dataframe())
797
- status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
798
- return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
 
 
 
 
 
 
 
 
 
 
 
799
 
800
- def extract_user_requirements_from_chat(history: List[Dict[str, str]]) -> str:
801
- """Extract user requirements from chatbot conversation."""
802
- if not history:
803
- return ""
804
-
805
- user_messages = []
806
- for msg in history:
807
- if msg.get('role') == 'user':
808
- user_messages.append(msg.get('content', ''))
809
-
810
- if not user_messages:
811
- return ""
812
-
813
- # Combine all user messages as requirements
814
- requirements = "\n".join([f"- {msg}" for msg in user_messages if msg.strip()])
815
- return requirements
816
 
817
  def handle_user_message(user_message: str, history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str]:
818
  """Appends the user's message to the history, preparing for the bot's response."""
@@ -824,10 +792,10 @@ def create_ui() -> gr.Blocks:
824
  history.append({"role": "user", "content": user_message})
825
  return history, ""
826
 
827
- def handle_bot_response(history: List[Dict[str, str]]) -> List[Dict[str, str]]:
828
- """Generates and appends the bot's response using the compatible history format."""
829
  if not history or history[-1]["role"] != "user":
830
- return history
831
 
832
  user_message = history[-1]["content"]
833
  # Convert all messages *before* the last user message into tuples for the API
@@ -835,79 +803,76 @@ def create_ui() -> gr.Blocks:
835
 
836
  response = chat_with_user(user_message, tuple_history_for_api)
837
  history.append({"role": "assistant", "content": response})
838
- return history
839
-
840
- def handle_end_chat(history: List[Dict[str, str]]) -> Tuple[str, str, str]:
841
- """Ends the chat, extracts and sanitizes keywords from the conversation, and extracts user requirements."""
842
- if not history:
843
- return "", "Status: Chat is empty, nothing to analyze.", ""
844
 
845
- # Convert the full, valid history for the extraction logic
846
- tuple_history = convert_messages_to_tuples(history)
847
- if not tuple_history:
848
- return "", "Status: No completed conversations to analyze.", ""
 
849
 
850
- # Get raw keywords string from the LLM
851
- raw_keywords_str = extract_keywords_from_conversation(tuple_history)
852
-
853
- # Sanitize the LLM output to extract only keyword-like parts.
854
- # A keyword can contain letters, numbers, underscores, spaces, and hyphens.
855
- cleaned_keywords = re.findall(r'[\w\s-]+', raw_keywords_str)
856
-
857
- # Trim whitespace from each found keyword and filter out any empty strings
858
- cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()]
859
-
860
- if not cleaned_keywords:
861
- return "", f"Status: Could not extract valid keywords. Raw LLM output: '{raw_keywords_str}'", ""
862
-
863
- # Join them into a clean, comma-separated string for the search tool
864
- final_keywords_str = ", ".join(cleaned_keywords)
865
-
866
- # Extract user requirements for analysis
867
- user_requirements = extract_user_requirements_from_chat(history)
 
 
 
 
868
 
869
- status = "Status: Keywords extracted. User requirements saved for analysis."
870
- return final_keywords_str, status, user_requirements
871
 
872
- def handle_dataframe_select(evt: gr.SelectData, df_data) -> Tuple[str, Any, Any, str, str, Any, str]:
873
- """Handle dataframe row selection - only repo ID (column 0) shows modal since full text is now displayed directly."""
874
- print(f"DEBUG: Selection event triggered!")
875
- print(f"DEBUG: evt = {evt}")
876
- print(f"DEBUG: df_data type = {type(df_data)}")
877
-
878
  if evt is None:
879
- return "", gr.update(visible=False), gr.update(), "", "", gr.update(visible=False), ""
880
 
881
  try:
882
- # Get the selected row and column from the event
883
  row_idx = evt.index[0]
884
  col_idx = evt.index[1]
885
- print(f"DEBUG: Selected row {row_idx}, column {col_idx}")
886
 
887
- # Handle pandas DataFrame
888
- if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
 
889
 
890
- if col_idx == 0: # Repository name column - show action modal
891
- repo_id = df_data.iloc[row_idx, 0]
892
- print(f"DEBUG: Extracted repo_id = '{repo_id}'")
893
-
894
- if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan':
895
- clean_repo_id = str(repo_id).strip()
896
- logger.info(f"Showing modal for repository: {clean_repo_id}")
897
- return clean_repo_id, gr.update(visible=True), gr.update(), "", "", gr.update(visible=False), clean_repo_id
898
-
899
- # For content columns (1,2,3) and relevance (4), do nothing since full text is shown directly
900
- else:
901
- print(f"DEBUG: Clicked on column {col_idx}, full text already shown in table")
902
- return "", gr.update(visible=False), gr.update(), "", "", gr.update(visible=False), ""
903
- else:
904
- print(f"DEBUG: df_data is not a DataFrame or row_idx {row_idx} out of range")
905
-
906
  except Exception as e:
907
- print(f"DEBUG: Exception occurred: {e}")
908
- logger.error(f"Error handling dataframe selection: {e}")
909
 
910
- return "", gr.update(visible=False), gr.update(), "", "", gr.update(visible=False), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
 
912
  def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, pd.DataFrame, Any]:
913
  """Analyzes all repositories in the CSV file with progress tracking."""
@@ -1029,43 +994,7 @@ def create_ui() -> gr.Blocks:
1029
  error_status = f"❌ Batch analysis failed: {e}"
1030
  return format_dataframe_for_display(read_csv_to_dataframe()), error_status, pd.DataFrame(), gr.update(visible=False)
1031
 
1032
- def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
1033
- """Handle visiting the Hugging Face Space for the repository."""
1034
- if repo_id and repo_id.strip():
1035
- hf_url = f"https://huggingface.co/spaces/{repo_id.strip()}"
1036
- logger.info(f"User chose to visit: {hf_url}")
1037
- return gr.update(visible=False), hf_url
1038
- return gr.update(visible=False), ""
1039
-
1040
- def handle_explore_repo(selected_repo_id: str) -> Tuple[Any, Any, Any]:
1041
- """Handle navigating to the repo explorer and populate the repo ID."""
1042
- logger.info(f"DEBUG: handle_explore_repo called with selected_repo_id: '{selected_repo_id}'")
1043
- logger.info(f"DEBUG: selected_repo_id type: {type(selected_repo_id)}")
1044
- logger.info(f"DEBUG: selected_repo_id length: {len(selected_repo_id) if selected_repo_id else 'None'}")
1045
-
1046
- if selected_repo_id and selected_repo_id.strip() and selected_repo_id.strip() != 'nan':
1047
- clean_repo_id = selected_repo_id.strip()
1048
- return (
1049
- gr.update(visible=False), # close modal
1050
- gr.update(selected="repo_explorer_tab"), # switch tab
1051
- gr.update(value=clean_repo_id) # populate repo explorer input
1052
- )
1053
- else:
1054
- return (
1055
- gr.update(visible=False), # close modal
1056
- gr.update(selected="repo_explorer_tab"), # switch tab
1057
- gr.update() # don't change repo explorer input
1058
- )
1059
-
1060
- def handle_cancel_modal() -> Any:
1061
- """Handle closing the modal."""
1062
- return gr.update(visible=False)
1063
-
1064
- def handle_close_text_modal() -> Any:
1065
- """Handle closing the text expansion modal."""
1066
- return gr.update(visible=False)
1067
-
1068
- def handle_reset_everything() -> Tuple[List[str], int, str, pd.DataFrame, pd.DataFrame, Any, Any, Any, List[Dict[str, str]], str, str, str]:
1069
  """Reset everything to initial state - clear all data, CSV, and UI components."""
1070
  try:
1071
  # Clear the CSV file
@@ -1100,10 +1029,8 @@ def create_ui() -> gr.Blocks:
1100
  empty_df, # df_output
1101
  empty_df, # top_repos_df
1102
  gr.update(visible=False), # top_repos_section
1103
- gr.update(visible=False), # repo_action_modal
1104
- gr.update(visible=False), # text_expansion_modal
1105
  chatbot_reset, # chatbot
1106
- status_reset, # status_box_analysis
1107
  current_requirements_reset, # current_requirements_display
1108
  extracted_keywords_reset # extracted_keywords_output
1109
  )
@@ -1118,10 +1045,8 @@ def create_ui() -> gr.Blocks:
1118
  pd.DataFrame(), # df_output
1119
  pd.DataFrame(), # top_repos_df
1120
  gr.update(visible=False), # top_repos_section
1121
- gr.update(visible=False), # repo_action_modal
1122
- gr.update(visible=False), # text_expansion_modal
1123
  [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}], # chatbot
1124
- error_status, # status_box_analysis
1125
  "No requirements extracted yet.", # current_requirements_display
1126
  "" # extracted_keywords_output
1127
  )
@@ -1134,29 +1059,33 @@ def create_ui() -> gr.Blocks:
1134
  outputs=[chatbot]
1135
  )
1136
 
1137
- # Input Tab
1138
- submit_repo_btn.click(
1139
- fn=handle_repo_id_submission,
1140
- inputs=[repo_id_input],
1141
- outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
 
 
 
 
 
1142
  )
1143
- search_btn.click(
1144
- fn=handle_keyword_search,
1145
- inputs=[keyword_input],
1146
- outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
 
 
1147
  )
1148
 
1149
- # Analysis Tab
1150
  analyze_all_btn.click(
1151
- fn=lambda: None, # No need to show progress display since it's commented out
1152
- outputs=[]
1153
- ).then(
1154
  fn=handle_analyze_all_repos,
1155
  inputs=[repo_ids_state, user_requirements_state],
1156
  outputs=[df_output, status_box_analysis, top_repos_df, top_repos_section]
1157
  )
1158
 
1159
- # Chatbot Tab
1160
  msg_input.submit(
1161
  fn=handle_user_message,
1162
  inputs=[msg_input, chatbot],
@@ -1164,8 +1093,19 @@ def create_ui() -> gr.Blocks:
1164
  ).then(
1165
  fn=handle_bot_response,
1166
  inputs=[chatbot],
1167
- outputs=[chatbot]
 
 
 
 
 
 
 
 
 
 
1168
  )
 
1169
  send_btn.click(
1170
  fn=handle_user_message,
1171
  inputs=[msg_input, chatbot],
@@ -1173,77 +1113,41 @@ def create_ui() -> gr.Blocks:
1173
  ).then(
1174
  fn=handle_bot_response,
1175
  inputs=[chatbot],
1176
- outputs=[chatbot]
1177
- )
1178
- end_chat_btn.click(
1179
- fn=handle_end_chat,
1180
- inputs=[chatbot],
1181
- outputs=[extracted_keywords_output, status_box_chatbot, user_requirements_state]
1182
  ).then(
 
1183
  fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.",
1184
  inputs=[user_requirements_state],
1185
  outputs=[current_requirements_display]
1186
- )
1187
- use_keywords_btn.click(
1188
- fn=handle_keyword_search,
1189
- inputs=[extracted_keywords_output],
1190
- outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
1191
  )
1192
 
1193
  # Repo Explorer Tab
1194
  setup_repo_explorer_events(repo_components, repo_states)
1195
 
1196
- # Modal button events
1197
- visit_repo_btn.click(
1198
- fn=handle_visit_repo,
1199
- inputs=[selected_repo_display],
1200
- outputs=[repo_action_modal, selected_repo_display],
1201
- js="(repo_id) => { if(repo_id && repo_id.trim()) { window.open('https://huggingface.co/spaces/' + repo_id.trim(), '_blank'); } }"
1202
- )
1203
- explore_repo_btn.click(
1204
- fn=handle_explore_repo,
1205
- inputs=[selected_repo_id_state],
1206
- outputs=[
1207
- repo_action_modal,
1208
- tabs,
1209
- repo_components["repo_explorer_input"]
1210
- ],
1211
- js="""(repo_id) => {
1212
- console.log('DEBUG: Navigate to repo explorer for:', repo_id);
1213
- setTimeout(() => {
1214
- window.scrollTo({top: 0, behavior: 'smooth'});
1215
- }, 200);
1216
- }"""
1217
- )
1218
- cancel_modal_btn.click(
1219
- fn=handle_cancel_modal,
1220
- outputs=[repo_action_modal]
1221
- )
1222
-
1223
- # Text expansion modal events
1224
- close_text_modal_btn.click(
1225
- fn=handle_close_text_modal,
1226
- outputs=[text_expansion_modal]
1227
- )
1228
-
1229
- # Add dataframe selection event
1230
  df_output.select(
1231
- fn=handle_dataframe_select,
1232
  inputs=[df_output],
1233
- outputs=[selected_repo_display, repo_action_modal, tabs, expanded_content_title, expanded_content_text, text_expansion_modal, selected_repo_id_state]
 
1234
  )
1235
 
1236
- # Add selection event for top repositories dataframe too
1237
  top_repos_df.select(
1238
- fn=handle_dataframe_select,
1239
  inputs=[top_repos_df],
1240
- outputs=[selected_repo_display, repo_action_modal, tabs, expanded_content_title, expanded_content_text, text_expansion_modal, selected_repo_id_state]
 
1241
  )
1242
 
1243
  # Reset button event
1244
  reset_all_btn.click(
1245
  fn=handle_reset_everything,
1246
- outputs=[repo_ids_state, current_repo_idx_state, user_requirements_state, df_output, top_repos_df, top_repos_section, repo_action_modal, text_expansion_modal, chatbot, status_box_analysis, current_requirements_display, extracted_keywords_output]
1247
  )
1248
 
1249
  return app
 
29
  "Your role is to ask clarifying questions to understand exactly what the user is looking for. "
30
  "Ask about their use case, preferred programming language, specific features needed, project type, etc. "
31
  "When you feel you have gathered enough detailed information about their requirements, "
32
+ "tell the user: 'I think I have enough information about your requirements. I'll now search for relevant repositories automatically.' "
33
  "Focus on understanding their needs, not providing solutions."
34
  )
35
+ CHATBOT_INITIAL_MESSAGE = "Hello! I'm here to help you find the perfect Hugging Face repository. Tell me about your project - what are you trying to build? I'll ask some questions to understand your needs and then automatically find relevant repositories for you."
36
 
37
  # --- Helper Functions (Logic) ---
38
 
39
+ def is_repo_id_format(text: str) -> bool:
40
+ """Check if text looks like repository IDs (contains forward slashes)."""
41
+ lines = [line.strip() for line in re.split(r'[\n,]+', text) if line.strip()]
42
+ if not lines:
43
+ return False
44
+
45
+ # If most lines contain forward slashes, treat as repo IDs
46
+ slash_count = sum(1 for line in lines if '/' in line)
47
+ return slash_count >= len(lines) * 0.5 # At least 50% have slashes
48
+
49
+ def should_auto_extract_keywords(history: List[Dict[str, str]]) -> bool:
50
+ """Determine if we should automatically extract keywords from conversation."""
51
+ if not history or len(history) < 4: # Need at least 2 exchanges
52
+ return False
53
+
54
+ # Check if the last assistant message suggests we have enough info
55
+ last_assistant_msg = ""
56
+ for msg in reversed(history):
57
+ if msg.get('role') == 'assistant':
58
+ last_assistant_msg = msg.get('content', '').lower()
59
+ break
60
+
61
+ # Look for key phrases that indicate readiness
62
+ ready_phrases = [
63
+ "enough information",
64
+ "search for repositories",
65
+ "find repositories",
66
+ "look for repositories",
67
+ "automatically",
68
+ "ready to search"
69
+ ]
70
+
71
+ return any(phrase in last_assistant_msg for phrase in ready_phrases)
72
+
73
  def get_top_relevant_repos(df: pd.DataFrame, user_requirements: str, top_n: int = 3) -> pd.DataFrame:
74
  """
75
  Uses LLM to select the top N most relevant repositories based on user requirements and analysis data.
 
614
 
615
  with gr.Tabs() as tabs:
616
  # --- Input Tab ---
617
+ with gr.TabItem("📝 Smart Search", id="input_tab"):
618
+ gr.Markdown("### 🔍 Intelligent Repository Discovery")
619
+ gr.Markdown("💡 **Enter repository IDs (owner/repo) or keywords - I'll automatically detect which type and process accordingly!**")
620
+
621
+ with gr.Row():
622
+ smart_input = gr.Textbox(
623
+ label="Repository IDs or Keywords",
624
+ lines=6,
625
+ placeholder="Examples:\n• Repository IDs: microsoft/DialoGPT-medium, openai/whisper\n• Keywords: text generation, image classification, sentiment analysis",
626
+ info="Smart detection: Use / for repo IDs, or enter keywords for search"
627
+ )
628
+
629
+ with gr.Row():
630
+ auto_analyze_checkbox = gr.Checkbox(
631
+ label="🚀 Auto-analyze repositories",
632
+ value=True,
633
+ info="Automatically start analysis when repositories are found"
634
+ )
 
 
 
635
 
636
  status_box_input = gr.Textbox(label="📊 Status", interactive=False, lines=2)
637
 
638
  # --- Analysis Tab ---
639
+ with gr.TabItem("🔬 Analysis & Results", id="analysis_tab"):
640
+ gr.Markdown("### 🧪 Repository Analysis Results")
641
 
642
  # Display current user requirements
643
  with gr.Row():
644
  current_requirements_display = gr.Textbox(
645
+ label="📋 Active Requirements Context",
646
  interactive=False,
647
+ lines=2,
648
+ info="Requirements from AI chat for better relevance scoring"
649
  )
650
 
651
+ # Manual analysis trigger (hidden by default, shown only when auto-analyze is off)
652
+ with gr.Row(visible=False) as manual_analysis_row:
653
+ analyze_all_btn = gr.Button("🚀 Analyze All Repositories", variant="primary", size="lg")
654
+ status_box_analysis = gr.Textbox(label="📈 Analysis Status", interactive=False, lines=2)
655
 
656
  # Progress bar for batch analysis
657
+ analysis_progress = gr.Progress()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
 
659
  gr.Markdown("### 📊 Results Dashboard")
660
 
661
  # Top 3 Most Relevant Repositories (initially hidden)
662
  with gr.Column(visible=False) as top_repos_section:
663
  gr.Markdown("### 🏆 Top 3 Most Relevant Repositories")
664
+ gr.Markdown("🎯 **Click repository names to visit them directly on Hugging Face:**")
665
  top_repos_df = gr.Dataframe(
666
  headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
667
  column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"],
 
669
  interactive=False
670
  )
671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  gr.Markdown("### 📋 All Analysis Results")
673
+ gr.Markdown("💡 **Click repository names to visit them on Hugging Face**")
674
  df_output = gr.Dataframe(
675
  headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
676
  column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"],
 
680
 
681
  # --- Chatbot Tab ---
682
  with gr.TabItem("🤖 AI Assistant", id="chatbot_tab"):
683
+ gr.Markdown("### 💬 Intelligent Repository Discovery Assistant")
684
+ gr.Markdown("🎯 **Tell me what you're building, and I'll automatically find the best repositories for you!**")
685
 
686
  chatbot = gr.Chatbot(
687
  label="🤖 AI Assistant",
688
+ height=500,
689
  type="messages",
690
  avatar_images=(
691
  "https://cdn-icons-png.flaticon.com/512/149/149071.png",
 
697
  with gr.Row():
698
  msg_input = gr.Textbox(
699
  label="💭 Your Message",
700
+ placeholder="Tell me about your project...",
701
  lines=1,
702
+ scale=5,
703
+ info="Describe what you're building and I'll find the perfect repositories"
704
  )
705
+ send_btn = gr.Button("📤", variant="primary", scale=1)
 
 
706
 
707
+ # Status and extracted info (auto-updated, no manual buttons needed)
708
  with gr.Row():
709
+ with gr.Column():
710
+ chat_status = gr.Textbox(
711
+ label="🎯 Chat Status",
712
+ interactive=False,
713
+ lines=2,
714
+ info="Conversation progress and auto-actions"
715
+ )
716
  with gr.Column():
717
  extracted_keywords_output = gr.Textbox(
718
+ label="🏷️ Auto-Extracted Keywords",
719
  interactive=False,
720
  show_copy_button=True,
721
+ info="Keywords automatically extracted and used for search"
 
 
 
 
 
 
722
  )
723
 
724
  # --- Repo Explorer Tab ---
 
740
 
741
  # --- Event Handler Functions ---
742
 
743
+ def handle_smart_input(text: str, auto_analyze: bool) -> Tuple[List[str], int, pd.DataFrame, str, Any, str]:
744
+ """Smart input handler that detects if input is repo IDs or keywords and processes accordingly."""
745
+ if not text.strip():
746
+ return [], 0, pd.DataFrame(), "Status: Please enter repository IDs or keywords.", gr.update(selected="input_tab"), ""
747
 
748
+ # Determine input type
749
+ if is_repo_id_format(text):
750
+ # Process as repository IDs
751
+ repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]))
752
+ write_repos_to_csv(repo_ids)
753
+ df = format_dataframe_for_display(read_csv_to_dataframe())
754
+ status = f"✅ Found {len(repo_ids)} repository IDs. "
755
+
756
+ if auto_analyze:
757
+ status += "Starting automatic analysis..."
758
+ return repo_ids, 0, df, status, gr.update(selected="analysis_tab"), "auto_analyze"
759
+ else:
760
+ status += "Ready for manual analysis."
761
+ return repo_ids, 0, df, status, gr.update(selected="analysis_tab"), ""
762
+ else:
763
+ # Process as keywords
764
+ keyword_list = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
765
+ repo_ids = []
766
+ for kw in keyword_list:
767
+ repo_ids.extend(search_top_spaces(kw, limit=5))
768
+
769
+ unique_repo_ids = list(dict.fromkeys(repo_ids))
770
+ write_repos_to_csv(unique_repo_ids)
771
+ df = format_dataframe_for_display(read_csv_to_dataframe())
772
+ status = f"🔍 Found {len(unique_repo_ids)} repositories from keywords. "
773
+
774
+ if auto_analyze:
775
+ status += "Starting automatic analysis..."
776
+ return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab"), "auto_analyze"
777
+ else:
778
+ status += "Ready for manual analysis."
779
+ return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab"), ""
780
 
781
+ def handle_auto_analyze_toggle(auto_analyze: bool) -> Any:
782
+ """Show/hide manual analysis controls based on auto-analyze setting."""
783
+ return gr.update(visible=not auto_analyze)
 
 
 
 
 
 
 
 
 
 
 
 
 
784
 
785
  def handle_user_message(user_message: str, history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str]:
786
  """Appends the user's message to the history, preparing for the bot's response."""
 
792
  history.append({"role": "user", "content": user_message})
793
  return history, ""
794
 
795
+ def handle_bot_response(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str, str, str, List[str], int, pd.DataFrame, Any]:
796
+ """Generates bot response and automatically extracts keywords if conversation is ready."""
797
  if not history or history[-1]["role"] != "user":
798
+ return history, "", "", "", [], 0, pd.DataFrame(), gr.update()
799
 
800
  user_message = history[-1]["content"]
801
  # Convert all messages *before* the last user message into tuples for the API
 
803
 
804
  response = chat_with_user(user_message, tuple_history_for_api)
805
  history.append({"role": "assistant", "content": response})
 
 
 
 
 
 
806
 
807
+ # Check if we should auto-extract keywords and search
808
+ if should_auto_extract_keywords(history):
809
+ # Auto-extract keywords
810
+ tuple_history = convert_messages_to_tuples(history)
811
+ raw_keywords_str = extract_keywords_from_conversation(tuple_history)
812
 
813
+ # Sanitize keywords
814
+ cleaned_keywords = re.findall(r'[\w\s-]+', raw_keywords_str)
815
+ cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()]
816
+
817
+ if cleaned_keywords:
818
+ final_keywords_str = ", ".join(cleaned_keywords)
819
+
820
+ # Extract user requirements
821
+ user_requirements = extract_user_requirements_from_chat(history)
822
+
823
+ # Auto-search repositories
824
+ repo_ids = []
825
+ for kw in cleaned_keywords[:3]: # Use top 3 keywords to avoid too many results
826
+ repo_ids.extend(search_top_spaces(kw, limit=5))
827
+
828
+ unique_repo_ids = list(dict.fromkeys(repo_ids))
829
+ write_repos_to_csv(unique_repo_ids)
830
+ df = format_dataframe_for_display(read_csv_to_dataframe())
831
+
832
+ chat_status = f"🎯 Auto-extracted keywords and found {len(unique_repo_ids)} repositories. Analysis starting automatically..."
833
+
834
+ return history, chat_status, final_keywords_str, user_requirements, unique_repo_ids, 0, df, gr.update(selected="analysis_tab")
835
 
836
+ return history, "💬 Conversation continuing...", "", "", [], 0, pd.DataFrame(), gr.update()
 
837
 
838
+ def handle_repo_click(evt: gr.SelectData, df_data) -> str:
839
+ """Handle direct repository clicks - open HF space directly."""
 
 
 
 
840
  if evt is None:
841
+ return ""
842
 
843
  try:
 
844
  row_idx = evt.index[0]
845
  col_idx = evt.index[1]
 
846
 
847
+ # Only handle clicks on the repository name column (column 0)
848
+ if col_idx == 0 and isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
849
+ repo_id = df_data.iloc[row_idx, 0]
850
 
851
+ if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan':
852
+ hf_url = f"https://huggingface.co/spaces/{str(repo_id).strip()}"
853
+ logger.info(f"Opening repository: {hf_url}")
854
+ return hf_url
 
 
 
 
 
 
 
 
 
 
 
 
855
  except Exception as e:
856
+ logger.error(f"Error handling repository click: {e}")
 
857
 
858
+ return ""
859
+
860
+ def extract_user_requirements_from_chat(history: List[Dict[str, str]]) -> str:
861
+ """Extract user requirements from chatbot conversation."""
862
+ if not history:
863
+ return ""
864
+
865
+ user_messages = []
866
+ for msg in history:
867
+ if msg.get('role') == 'user':
868
+ user_messages.append(msg.get('content', ''))
869
+
870
+ if not user_messages:
871
+ return ""
872
+
873
+ # Combine all user messages as requirements
874
+ requirements = "\n".join([f"- {msg}" for msg in user_messages if msg.strip()])
875
+ return requirements
876
 
877
  def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, pd.DataFrame, Any]:
878
  """Analyzes all repositories in the CSV file with progress tracking."""
 
994
  error_status = f"❌ Batch analysis failed: {e}"
995
  return format_dataframe_for_display(read_csv_to_dataframe()), error_status, pd.DataFrame(), gr.update(visible=False)
996
 
997
+ def handle_reset_everything() -> Tuple[List[str], int, str, pd.DataFrame, pd.DataFrame, Any, List[Dict[str, str]], str, str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998
  """Reset everything to initial state - clear all data, CSV, and UI components."""
999
  try:
1000
  # Clear the CSV file
 
1029
  empty_df, # df_output
1030
  empty_df, # top_repos_df
1031
  gr.update(visible=False), # top_repos_section
 
 
1032
  chatbot_reset, # chatbot
1033
+ status_reset, # status_box_input
1034
  current_requirements_reset, # current_requirements_display
1035
  extracted_keywords_reset # extracted_keywords_output
1036
  )
 
1045
  pd.DataFrame(), # df_output
1046
  pd.DataFrame(), # top_repos_df
1047
  gr.update(visible=False), # top_repos_section
 
 
1048
  [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}], # chatbot
1049
+ error_status, # status_box_input
1050
  "No requirements extracted yet.", # current_requirements_display
1051
  "" # extracted_keywords_output
1052
  )
 
1059
  outputs=[chatbot]
1060
  )
1061
 
1062
+ # Smart Input with Auto-processing
1063
+ smart_input.submit(
1064
+ fn=handle_smart_input,
1065
+ inputs=[smart_input, auto_analyze_checkbox],
1066
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_input, tabs, status_box_input]
1067
+ ).then(
1068
+ # If auto_analyze is enabled and we got repos, start analysis automatically
1069
+ fn=lambda repo_ids, user_reqs, trigger: handle_analyze_all_repos(repo_ids, user_reqs) if trigger == "auto_analyze" and repo_ids else (pd.DataFrame(), "Ready for analysis.", pd.DataFrame(), gr.update(visible=False)),
1070
+ inputs=[repo_ids_state, user_requirements_state, status_box_input],
1071
+ outputs=[df_output, status_box_input, top_repos_df, top_repos_section]
1072
  )
1073
+
1074
+ # Auto-analyze checkbox toggle
1075
+ auto_analyze_checkbox.change(
1076
+ fn=handle_auto_analyze_toggle,
1077
+ inputs=[auto_analyze_checkbox],
1078
+ outputs=[manual_analysis_row]
1079
  )
1080
 
1081
+ # Manual analysis button (when auto-analyze is disabled)
1082
  analyze_all_btn.click(
 
 
 
1083
  fn=handle_analyze_all_repos,
1084
  inputs=[repo_ids_state, user_requirements_state],
1085
  outputs=[df_output, status_box_analysis, top_repos_df, top_repos_section]
1086
  )
1087
 
1088
+ # Chatbot with Auto-extraction and Auto-search
1089
  msg_input.submit(
1090
  fn=handle_user_message,
1091
  inputs=[msg_input, chatbot],
 
1093
  ).then(
1094
  fn=handle_bot_response,
1095
  inputs=[chatbot],
1096
+ outputs=[chatbot, chat_status, extracted_keywords_output, user_requirements_state, repo_ids_state, current_repo_idx_state, df_output, tabs]
1097
+ ).then(
1098
+ # Update requirements display when they change
1099
+ fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.",
1100
+ inputs=[user_requirements_state],
1101
+ outputs=[current_requirements_display]
1102
+ ).then(
1103
+ # If we got repos from chatbot, auto-analyze them
1104
+ fn=lambda repo_ids, user_reqs: handle_analyze_all_repos(repo_ids, user_reqs) if repo_ids else (pd.DataFrame(), "", pd.DataFrame(), gr.update(visible=False)),
1105
+ inputs=[repo_ids_state, user_requirements_state],
1106
+ outputs=[df_output, chat_status, top_repos_df, top_repos_section]
1107
  )
1108
+
1109
  send_btn.click(
1110
  fn=handle_user_message,
1111
  inputs=[msg_input, chatbot],
 
1113
  ).then(
1114
  fn=handle_bot_response,
1115
  inputs=[chatbot],
1116
+ outputs=[chatbot, chat_status, extracted_keywords_output, user_requirements_state, repo_ids_state, current_repo_idx_state, df_output, tabs]
 
 
 
 
 
1117
  ).then(
1118
+ # Update requirements display when they change
1119
  fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.",
1120
  inputs=[user_requirements_state],
1121
  outputs=[current_requirements_display]
1122
+ ).then(
1123
+ # If we got repos from chatbot, auto-analyze them
1124
+ fn=lambda repo_ids, user_reqs: handle_analyze_all_repos(repo_ids, user_reqs) if repo_ids else (pd.DataFrame(), "", pd.DataFrame(), gr.update(visible=False)),
1125
+ inputs=[repo_ids_state, user_requirements_state],
1126
+ outputs=[df_output, chat_status, top_repos_df, top_repos_section]
1127
  )
1128
 
1129
  # Repo Explorer Tab
1130
  setup_repo_explorer_events(repo_components, repo_states)
1131
 
1132
+ # Direct Repository Clicks - Open HF Space
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133
  df_output.select(
1134
+ fn=handle_repo_click,
1135
  inputs=[df_output],
1136
+ outputs=[status_box_input],
1137
+ js="(url) => { if(url && url.trim()) { window.open(url, '_blank'); } }"
1138
  )
1139
 
 
1140
  top_repos_df.select(
1141
+ fn=handle_repo_click,
1142
  inputs=[top_repos_df],
1143
+ outputs=[status_box_input],
1144
+ js="(url) => { if(url && url.trim()) { window.open(url, '_blank'); } }"
1145
  )
1146
 
1147
  # Reset button event
1148
  reset_all_btn.click(
1149
  fn=handle_reset_everything,
1150
+ outputs=[repo_ids_state, current_repo_idx_state, user_requirements_state, df_output, top_repos_df, top_repos_section, chatbot, status_box_input, current_requirements_display, extracted_keywords_output]
1151
  )
1152
 
1153
  return app
old_app2.py ADDED
@@ -0,0 +1,1253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import regex as re
3
+ import csv
4
+ import pandas as pd
5
+ from typing import List, Dict, Tuple, Any
6
+ import logging
7
+ import os
8
+ import time
9
+
10
+ # Import core logic from other modules, as in app_old.py
11
+ from analyzer import (
12
+ combine_repo_files_for_llm,
13
+ parse_llm_json_response,
14
+ analyze_combined_file,
15
+ handle_load_repository
16
+ )
17
+ from hf_utils import download_filtered_space_files, search_top_spaces
18
+ from chatbot_page import chat_with_user, extract_keywords_from_conversation
19
+ from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
20
+
21
+ # --- Configuration ---
22
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
23
+ logger = logging.getLogger(__name__)
24
+
25
+ CSV_FILE = "repo_ids.csv"
26
+ CHATBOT_SYSTEM_PROMPT = (
27
+ "You are a helpful assistant whose ONLY job is to gather information about the user's ideal repository requirements. "
28
+ "DO NOT suggest any specific repositories or give repository recommendations. "
29
+ "Your role is to ask clarifying questions to understand exactly what the user is looking for. "
30
+ "Ask about their use case, preferred programming language, specific features needed, project type, etc. "
31
+ "When you feel you have gathered enough detailed information about their requirements, "
32
+ "tell the user: 'I think I have enough information about your requirements. Please click the Extract Keywords button to search for repositories.' "
33
+ "Focus on understanding their needs, not providing solutions."
34
+ )
35
+ CHATBOT_INITIAL_MESSAGE = "Hello! I'm here to help you define your ideal Hugging Face repository requirements. I won't suggest specific repos - my job is to understand exactly what you're looking for. Tell me about your project: What type of application are you building? What's your use case?"
36
+
37
+ # --- Helper Functions (Logic) ---
38
+
39
+ def get_top_relevant_repos(df: pd.DataFrame, user_requirements: str, top_n: int = 3) -> pd.DataFrame:
40
+ """
41
+ Uses LLM to select the top N most relevant repositories based on user requirements and analysis data.
42
+ """
43
+ try:
44
+ if df.empty:
45
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
46
+
47
+ # Filter out rows with no analysis data
48
+ analyzed_df = df.copy()
49
+ analyzed_df = analyzed_df[
50
+ (analyzed_df['strength'].str.strip() != '') |
51
+ (analyzed_df['weaknesses'].str.strip() != '') |
52
+ (analyzed_df['speciality'].str.strip() != '') |
53
+ (analyzed_df['relevance rating'].str.strip() != '')
54
+ ]
55
+
56
+ if analyzed_df.empty:
57
+ logger.warning("No analyzed repositories found for LLM selection")
58
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
59
+
60
+ # Create a prompt for the LLM
61
+ csv_data = ""
62
+ for idx, row in analyzed_df.iterrows():
63
+ csv_data += f"Repository: {row['repo id']}\n"
64
+ csv_data += f"Strengths: {row['strength']}\n"
65
+ csv_data += f"Weaknesses: {row['weaknesses']}\n"
66
+ csv_data += f"Speciality: {row['speciality']}\n"
67
+ csv_data += f"Relevance: {row['relevance rating']}\n\n"
68
+
69
+ user_context = user_requirements if user_requirements.strip() else "General repository recommendation"
70
+
71
+ prompt = f"""Based on the user's requirements and the analysis of repositories below, select the top {top_n} most relevant repositories.
72
+
73
+ User Requirements:
74
+ {user_context}
75
+
76
+ Repository Analysis Data:
77
+ {csv_data}
78
+
79
+ Please analyze all repositories and select the {top_n} most relevant ones based on:
80
+ 1. How well they match the user's specific requirements
81
+ 2. Their strengths and capabilities
82
+ 3. Their relevance rating
83
+ 4. Their speciality alignment with user needs
84
+
85
+ Return ONLY a JSON list of the repository IDs in order of relevance (most relevant first). Example format:
86
+ ["repo1", "repo2", "repo3"]
87
+
88
+ Selected repositories:"""
89
+
90
+ try:
91
+ from openai import OpenAI
92
+ client = OpenAI(api_key=os.getenv("modal_api"))
93
+ client.base_url = os.getenv("base_url")
94
+
95
+ response = client.chat.completions.create(
96
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
97
+ messages=[
98
+ {"role": "system", "content": "You are an expert at analyzing and ranking repositories based on user requirements. Always return valid JSON."},
99
+ {"role": "user", "content": prompt}
100
+ ],
101
+ max_tokens=200,
102
+ temperature=0.3
103
+ )
104
+
105
+ llm_response = response.choices[0].message.content.strip()
106
+ logger.info(f"LLM response for top repos: {llm_response}")
107
+
108
+ # Extract JSON from response
109
+ import json
110
+ import re
111
+
112
+ # Try to find JSON array in the response
113
+ json_match = re.search(r'\[.*\]', llm_response)
114
+ if json_match:
115
+ selected_repos = json.loads(json_match.group())
116
+ logger.info(f"LLM selected repositories: {selected_repos}")
117
+
118
+ # Filter dataframe to only include selected repositories in order
119
+ top_repos_list = []
120
+ for repo_id in selected_repos[:top_n]:
121
+ matching_rows = analyzed_df[analyzed_df['repo id'] == repo_id]
122
+ if not matching_rows.empty:
123
+ top_repos_list.append(matching_rows.iloc[0])
124
+
125
+ if top_repos_list:
126
+ top_repos = pd.DataFrame(top_repos_list)
127
+ logger.info(f"Successfully selected {len(top_repos)} repositories using LLM")
128
+ return top_repos
129
+
130
+ # Fallback: if LLM response parsing fails, use first N analyzed repos
131
+ logger.warning("Failed to parse LLM response, using fallback selection")
132
+ return analyzed_df.head(top_n)
133
+
134
+ except Exception as llm_error:
135
+ logger.error(f"LLM selection failed: {llm_error}")
136
+ # Fallback: return first N repositories with analysis data
137
+ return analyzed_df.head(top_n)
138
+
139
+ except Exception as e:
140
+ logger.error(f"Error in LLM-based repo selection: {e}")
141
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
142
+
143
+ def write_repos_to_csv(repo_ids: List[str]) -> None:
144
+ """Writes a list of repo IDs to the CSV file, overwriting the previous content."""
145
+ try:
146
+ with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
147
+ writer = csv.writer(csvfile)
148
+ writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
149
+ for repo_id in repo_ids:
150
+ writer.writerow([repo_id, "", "", "", ""])
151
+ logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
152
+ except Exception as e:
153
+ logger.error(f"Error writing to CSV: {e}")
154
+
155
+ def format_text_for_dataframe(text: str, max_length: int = 200) -> str:
156
+ """Format text for better display in dataframe by truncating and cleaning."""
157
+ if not text or pd.isna(text):
158
+ return ""
159
+
160
+ # Clean the text
161
+ text = str(text).strip()
162
+
163
+ # Remove excessive whitespace and newlines
164
+ text = re.sub(r'\s+', ' ', text)
165
+
166
+ # Truncate if too long
167
+ if len(text) > max_length:
168
+ text = text[:max_length-3] + "..."
169
+
170
+ return text
171
+
172
+ def read_csv_to_dataframe() -> pd.DataFrame:
173
+ """Reads the CSV file into a pandas DataFrame with full text preserved."""
174
+ try:
175
+ df = pd.read_csv(CSV_FILE, dtype=str).fillna('')
176
+
177
+ # Keep the full text intact - don't truncate here
178
+ # The truncation will be handled in the UI display layer
179
+
180
+ return df
181
+ except FileNotFoundError:
182
+ return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
183
+ except Exception as e:
184
+ logger.error(f"Error reading CSV: {e}")
185
+ return pd.DataFrame()
186
+
187
+ def format_dataframe_for_display(df: pd.DataFrame) -> pd.DataFrame:
188
+ """Returns dataframe with full text (no truncation) for display."""
189
+ if df.empty:
190
+ return df
191
+
192
+ # Return the dataframe as-is without any text truncation
193
+ # This will show the full text content in the CSV display
194
+ return df.copy()
195
+
196
+ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") -> Tuple[str, str, pd.DataFrame]:
197
+ """
198
+ Downloads, analyzes a single repo, updates the CSV, and returns results.
199
+ Now includes user requirements for better relevance rating.
200
+ This function combines the logic of downloading, analyzing, and updating the CSV for one repo.
201
+ """
202
+ try:
203
+ logger.info(f"Starting analysis for repo: {repo_id}")
204
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
205
+ txt_path = combine_repo_files_for_llm()
206
+
207
+ with open(txt_path, "r", encoding="utf-8") as f:
208
+ combined_content = f.read()
209
+
210
+ llm_output = analyze_combined_file(txt_path, user_requirements)
211
+
212
+ last_start = llm_output.rfind('{')
213
+ last_end = llm_output.rfind('}')
214
+ final_json_str = llm_output[last_start:last_end+1] if last_start != -1 and last_end != -1 else "{}"
215
+
216
+ llm_json = parse_llm_json_response(final_json_str)
217
+
218
+ summary = ""
219
+ if isinstance(llm_json, dict) and "error" not in llm_json:
220
+ strengths = llm_json.get("strength", "N/A")
221
+ weaknesses = llm_json.get("weaknesses", "N/A")
222
+ relevance = llm_json.get("relevance rating", "N/A")
223
+ summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}\n\nRelevance: {relevance}"
224
+ else:
225
+ summary = f"JSON extraction: FAILED\nRaw response might not be valid JSON."
226
+
227
+ # Update CSV
228
+ df = read_csv_to_dataframe()
229
+ repo_found_in_df = False
230
+ for idx, row in df.iterrows():
231
+ if row["repo id"] == repo_id:
232
+ if isinstance(llm_json, dict):
233
+ df.at[idx, "strength"] = llm_json.get("strength", "")
234
+ df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
235
+ df.at[idx, "speciality"] = llm_json.get("speciality", "")
236
+ df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
237
+ repo_found_in_df = True
238
+ break
239
+
240
+ if not repo_found_in_df:
241
+ logger.warning(f"Repo ID {repo_id} not found in CSV for updating.")
242
+
243
+ # Write CSV with better error handling and flushing
244
+ try:
245
+ df.to_csv(CSV_FILE, index=False)
246
+ # Force file system flush
247
+ os.sync() if hasattr(os, 'sync') else None
248
+ logger.info(f"Successfully updated CSV for {repo_id}")
249
+ except Exception as csv_error:
250
+ logger.error(f"Failed to write CSV for {repo_id}: {csv_error}")
251
+ # Try once more with a small delay
252
+ time.sleep(0.2)
253
+ try:
254
+ df.to_csv(CSV_FILE, index=False)
255
+ logger.info(f"Successfully updated CSV for {repo_id} on retry")
256
+ except Exception as retry_error:
257
+ logger.error(f"Failed to write CSV for {repo_id} on retry: {retry_error}")
258
+
259
+ logger.info(f"Successfully analyzed and updated CSV for {repo_id}")
260
+ return combined_content, summary, df
261
+
262
+ except Exception as e:
263
+ logger.error(f"An error occurred during analysis of {repo_id}: {e}")
264
+ error_summary = f"Error analyzing repo: {e}"
265
+ return "", error_summary, format_dataframe_for_display(read_csv_to_dataframe())
266
+
267
+ # --- NEW: Helper for Chat History Conversion ---
268
+ def convert_messages_to_tuples(history: List[Dict[str, str]]) -> List[Tuple[str, str]]:
269
+ """
270
+ Converts Gradio's 'messages' format to the old 'tuple' format for compatibility.
271
+ This robust version correctly handles histories that start with an assistant message.
272
+ """
273
+ tuple_history = []
274
+ # Iterate through the history to find user messages
275
+ for i, msg in enumerate(history):
276
+ if msg['role'] == 'user':
277
+ # Once a user message is found, check if the next message is from the assistant
278
+ if i + 1 < len(history) and history[i+1]['role'] == 'assistant':
279
+ user_content = msg['content']
280
+ assistant_content = history[i+1]['content']
281
+ tuple_history.append((user_content, assistant_content))
282
+ return tuple_history
283
+
284
+ # --- Gradio UI ---
285
+
286
+ def create_ui() -> gr.Blocks:
287
+ """Creates and configures the entire Gradio interface."""
288
+
289
+ css = """
290
+ /* Modern sleek design */
291
+ .gradio-container {
292
+ font-family: 'Inter', 'system-ui', sans-serif;
293
+ background: linear-gradient(135deg, #0a0a0a 0%, #1a1a1a 100%);
294
+ min-height: 100vh;
295
+ }
296
+
297
+ .gr-form {
298
+ background: rgba(255, 255, 255, 0.95);
299
+ backdrop-filter: blur(10px);
300
+ border-radius: 16px;
301
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
302
+ padding: 24px;
303
+ margin: 16px;
304
+ border: 1px solid rgba(255, 255, 255, 0.2);
305
+ }
306
+
307
+ .gr-button {
308
+ background: linear-gradient(45deg, #667eea, #764ba2);
309
+ border: none;
310
+ border-radius: 12px;
311
+ color: white;
312
+ font-weight: 600;
313
+ padding: 12px 24px;
314
+ transition: all 0.3s ease;
315
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4);
316
+ }
317
+
318
+ .gr-button:hover {
319
+ transform: translateY(-2px);
320
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6);
321
+ }
322
+
323
+ .gr-textbox {
324
+ border: 2px solid rgba(102, 126, 234, 0.2);
325
+ border-radius: 12px;
326
+ background: rgba(255, 255, 255, 0.9);
327
+ transition: all 0.3s ease;
328
+ }
329
+
330
+ .gr-textbox:focus {
331
+ border-color: #667eea;
332
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
333
+ }
334
+
335
+ .gr-panel {
336
+ background: rgba(255, 255, 255, 0.95);
337
+ border-radius: 16px;
338
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
339
+ border: 1px solid rgba(255, 255, 255, 0.2);
340
+ }
341
+
342
+ .gr-tab-nav {
343
+ background: rgba(255, 255, 255, 0.95);
344
+ border-radius: 12px 12px 0 0;
345
+ backdrop-filter: blur(10px);
346
+ }
347
+
348
+ .gr-tab-nav button {
349
+ background: transparent;
350
+ border: none;
351
+ padding: 16px 24px;
352
+ font-weight: 600;
353
+ color: #666;
354
+ transition: all 0.3s ease;
355
+ }
356
+
357
+ .gr-tab-nav button.selected {
358
+ background: linear-gradient(45deg, #667eea, #764ba2);
359
+ color: white;
360
+ border-radius: 8px;
361
+ }
362
+
363
+ .chatbot {
364
+ border-radius: 16px;
365
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
366
+ }
367
+
368
+ /* Hide Gradio footer */
369
+ footer {
370
+ display: none !important;
371
+ }
372
+
373
+ /* Custom scrollbar */
374
+ ::-webkit-scrollbar {
375
+ width: 8px;
376
+ }
377
+
378
+ ::-webkit-scrollbar-track {
379
+ background: rgba(255, 255, 255, 0.1);
380
+ border-radius: 4px;
381
+ }
382
+
383
+ ::-webkit-scrollbar-thumb {
384
+ background: linear-gradient(45deg, #667eea, #764ba2);
385
+ border-radius: 4px;
386
+ }
387
+
388
+ /* Improved dataframe styling for full text display */
389
+ .gr-dataframe {
390
+ border-radius: 12px;
391
+ overflow: hidden;
392
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
393
+ background: rgba(255, 255, 255, 0.98);
394
+ }
395
+
396
+ .gr-dataframe table {
397
+ width: 100%;
398
+ table-layout: fixed;
399
+ border-collapse: collapse;
400
+ }
401
+
402
+ /* Column width specifications for both dataframes */
403
+ .gr-dataframe th,
404
+ .gr-dataframe td {
405
+ padding: 12px 16px;
406
+ text-align: left;
407
+ border-bottom: 1px solid rgba(0, 0, 0, 0.1);
408
+ font-size: 0.95rem;
409
+ line-height: 1.4;
410
+ }
411
+
412
+ /* Specific column widths - applying to both dataframes */
413
+ .gr-dataframe th:nth-child(1),
414
+ .gr-dataframe td:nth-child(1) { width: 16.67% !important; min-width: 16.67% !important; max-width: 16.67% !important; }
415
+ .gr-dataframe th:nth-child(2),
416
+ .gr-dataframe td:nth-child(2) { width: 25% !important; min-width: 25% !important; max-width: 25% !important; }
417
+ .gr-dataframe th:nth-child(3),
418
+ .gr-dataframe td:nth-child(3) { width: 25% !important; min-width: 25% !important; max-width: 25% !important; }
419
+ .gr-dataframe th:nth-child(4),
420
+ .gr-dataframe td:nth-child(4) { width: 20.83% !important; min-width: 20.83% !important; max-width: 20.83% !important; }
421
+ .gr-dataframe th:nth-child(5),
422
+ .gr-dataframe td:nth-child(5) { width: 12.5% !important; min-width: 12.5% !important; max-width: 12.5% !important; }
423
+
424
+ /* Additional specific targeting for both dataframes */
425
+ div[data-testid="dataframe"] table th:nth-child(1),
426
+ div[data-testid="dataframe"] table td:nth-child(1) { width: 16.67% !important; }
427
+ div[data-testid="dataframe"] table th:nth-child(2),
428
+ div[data-testid="dataframe"] table td:nth-child(2) { width: 25% !important; }
429
+ div[data-testid="dataframe"] table th:nth-child(3),
430
+ div[data-testid="dataframe"] table td:nth-child(3) { width: 25% !important; }
431
+ div[data-testid="dataframe"] table th:nth-child(4),
432
+ div[data-testid="dataframe"] table td:nth-child(4) { width: 20.83% !important; }
433
+ div[data-testid="dataframe"] table th:nth-child(5),
434
+ div[data-testid="dataframe"] table td:nth-child(5) { width: 12.5% !important; }
435
+
436
+ /* Make repository names clickable */
437
+ .gr-dataframe td:nth-child(1) {
438
+ cursor: pointer;
439
+ color: #667eea;
440
+ font-weight: 600;
441
+ transition: all 0.3s ease;
442
+ }
443
+
444
+ .gr-dataframe td:nth-child(1):hover {
445
+ background-color: rgba(102, 126, 234, 0.1);
446
+ color: #764ba2;
447
+ transform: scale(1.02);
448
+ }
449
+
450
+ /* Content columns - readable styling with scroll for long text */
451
+ .gr-dataframe td:nth-child(2),
452
+ .gr-dataframe td:nth-child(3),
453
+ .gr-dataframe td:nth-child(4),
454
+ .gr-dataframe td:nth-child(5) {
455
+ cursor: default;
456
+ font-size: 0.9rem;
457
+ }
458
+
459
+ .gr-dataframe tbody tr:hover {
460
+ background-color: rgba(102, 126, 234, 0.05);
461
+ }
462
+
463
+ /* JavaScript for auto-scroll to top on tab change */
464
+ <script>
465
+ document.addEventListener('DOMContentLoaded', function() {
466
+ // Function to scroll to top
467
+ function scrollToTop() {
468
+ window.scrollTo({
469
+ top: 0,
470
+ behavior: 'smooth'
471
+ });
472
+ }
473
+
474
+ // Observer for tab changes
475
+ const observer = new MutationObserver(function(mutations) {
476
+ mutations.forEach(function(mutation) {
477
+ if (mutation.type === 'attributes' && mutation.attributeName === 'class') {
478
+ const target = mutation.target;
479
+ if (target.classList && target.classList.contains('selected')) {
480
+ // Tab was selected, scroll to top
481
+ setTimeout(scrollToTop, 100);
482
+ }
483
+ }
484
+ });
485
+ });
486
+
487
+ // Observe tab navigation buttons
488
+ const tabButtons = document.querySelectorAll('.gr-tab-nav button');
489
+ tabButtons.forEach(button => {
490
+ observer.observe(button, { attributes: true });
491
+
492
+ // Also add click listener for immediate scroll
493
+ button.addEventListener('click', function() {
494
+ setTimeout(scrollToTop, 150);
495
+ });
496
+ });
497
+
498
+ // Enhanced listener for programmatic tab changes (button-triggered navigation)
499
+ let lastSelectedTab = null;
500
+ const checkInterval = setInterval(function() {
501
+ const currentSelectedTab = document.querySelector('.gr-tab-nav button.selected');
502
+ if (currentSelectedTab && currentSelectedTab !== lastSelectedTab) {
503
+ lastSelectedTab = currentSelectedTab;
504
+ setTimeout(scrollToTop, 100);
505
+ }
506
+ }, 100);
507
+
508
+ // Additional scroll trigger for repo explorer navigation
509
+ window.addEventListener('repoExplorerNavigation', function() {
510
+ setTimeout(scrollToTop, 200);
511
+ });
512
+
513
+ // Watch for specific tab transitions to repo explorer
514
+ const repoExplorerObserver = new MutationObserver(function(mutations) {
515
+ mutations.forEach(function(mutation) {
516
+ if (mutation.type === 'attributes' && mutation.attributeName === 'class') {
517
+ const target = mutation.target;
518
+ if (target.textContent && target.textContent.includes('🔍 Repo Explorer') && target.classList.contains('selected')) {
519
+ setTimeout(scrollToTop, 150);
520
+ }
521
+ }
522
+ });
523
+ });
524
+
525
+ // Start observing for repo explorer specific changes
526
+ setTimeout(function() {
527
+ const repoExplorerTab = Array.from(document.querySelectorAll('.gr-tab-nav button')).find(btn =>
528
+ btn.textContent && btn.textContent.includes('🔍 Repo Explorer')
529
+ );
530
+ if (repoExplorerTab) {
531
+ repoExplorerObserver.observe(repoExplorerTab, { attributes: true });
532
+ }
533
+ }, 1000);
534
+ });
535
+ </script>
536
+ """
537
+
538
+ with gr.Blocks(
539
+ theme=gr.themes.Soft(
540
+ primary_hue="blue",
541
+ secondary_hue="purple",
542
+ neutral_hue="gray",
543
+ font=["Inter", "system-ui", "sans-serif"]
544
+ ),
545
+ css=css,
546
+ title="🚀 HF Repo Analyzer"
547
+ ) as app:
548
+
549
+ # --- State Management ---
550
+ # Using simple, separate state objects for robustness.
551
+ repo_ids_state = gr.State([])
552
+ current_repo_idx_state = gr.State(0)
553
+ user_requirements_state = gr.State("") # Store user requirements from chatbot
554
+ loaded_repo_content_state = gr.State("") # Store loaded repository content
555
+ current_repo_id_state = gr.State("") # Store current repository ID
556
+ selected_repo_id_state = gr.State("") # Store selected repository ID for modal actions
557
+
558
+ gr.Markdown(
559
+ """
560
+ <div style="text-align: center; padding: 40px 20px; background: rgba(255, 255, 255, 0.1); border-radius: 20px; margin: 20px auto; max-width: 900px; backdrop-filter: blur(10px);">
561
+ <h1 style="font-size: 3.5rem; font-weight: 800; margin: 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;">
562
+ 🚀 HF Repo Analyzer
563
+ </h1>
564
+ <p style="font-size: 1.3rem; color: rgba(255, 255, 255, 0.9); margin: 16px 0 0 0; font-weight: 400; line-height: 1.6;">
565
+ Discover, analyze, and evaluate Hugging Face repositories with AI-powered insights
566
+ </p>
567
+ <div style="height: 4px; width: 80px; background: linear-gradient(45deg, #667eea, #764ba2); margin: 24px auto; border-radius: 2px;"></div>
568
+ </div>
569
+ """
570
+ )
571
+
572
+ # Global Reset Button - visible on all tabs
573
+ with gr.Row():
574
+ with gr.Column(scale=4):
575
+ pass
576
+ with gr.Column(scale=1):
577
+ reset_all_btn = gr.Button("🔄 Reset Everything", variant="stop", size="lg")
578
+ with gr.Column(scale=1):
579
+ pass
580
+
581
+ with gr.Tabs() as tabs:
582
+ # --- Input Tab ---
583
+ with gr.TabItem("📝 Input & Search", id="input_tab"):
584
+ with gr.Row(equal_height=True):
585
+ with gr.Column(scale=1):
586
+ gr.Markdown("### 📁 Repository IDs")
587
+ repo_id_input = gr.Textbox(
588
+ label="Repository IDs",
589
+ lines=8,
590
+ placeholder="microsoft/DialoGPT-medium\nopenai/whisper\nhuggingface/transformers",
591
+ info="Enter repo IDs separated by commas or new lines"
592
+ )
593
+ submit_repo_btn = gr.Button("🚀 Submit Repositories", variant="primary", size="lg")
594
+
595
+ with gr.Column(scale=1):
596
+ gr.Markdown("### 🔍 Keyword Search")
597
+ keyword_input = gr.Textbox(
598
+ label="Search Keywords",
599
+ lines=8,
600
+ placeholder="text generation\nimage classification\nsentiment analysis",
601
+ info="Enter keywords to find relevant repositories"
602
+ )
603
+ search_btn = gr.Button("🔎 Search Repositories", variant="primary", size="lg")
604
+
605
+ status_box_input = gr.Textbox(label="📊 Status", interactive=False, lines=2)
606
+
607
+ # --- Analysis Tab ---
608
+ with gr.TabItem("🔬 Analysis", id="analysis_tab"):
609
+ gr.Markdown("### 🧪 Repository Analysis Engine")
610
+
611
+ # Display current user requirements
612
+ with gr.Row():
613
+ current_requirements_display = gr.Textbox(
614
+ label="📋 Current User Requirements",
615
+ interactive=False,
616
+ lines=3,
617
+ info="Requirements extracted from AI chat conversation for relevance rating"
618
+ )
619
+
620
+ with gr.Row():
621
+ analyze_all_btn = gr.Button("🚀 Analyze All Repositories", variant="primary", size="lg", scale=1)
622
+ with gr.Column(scale=2):
623
+ status_box_analysis = gr.Textbox(label="📈 Analysis Status", interactive=False, lines=2)
624
+
625
+ # Progress bar for batch analysis
626
+ with gr.Row():
627
+ analysis_progress = gr.Progress()
628
+ # progress_display = gr.Textbox(
629
+ # label="📊 Batch Analysis Progress",
630
+ # interactive=False,
631
+ # lines=2,
632
+ # visible=False,
633
+ # info="Shows progress when analyzing all repositories"
634
+ # )
635
+
636
+ with gr.Row(equal_height=True):
637
+ # with gr.Column():
638
+ # content_output = gr.Textbox(
639
+ # label="📄 Repository Content",
640
+ # lines=20,
641
+ # show_copy_button=True,
642
+ # info="Raw content extracted from the repository"
643
+ # )
644
+ # with gr.Column():
645
+ # summary_output = gr.Textbox(
646
+ # label="🎯 AI Analysis Summary",
647
+ # lines=20,
648
+ # show_copy_button=True,
649
+ # info="Detailed analysis and insights from AI"
650
+ # )
651
+ pass
652
+
653
+ gr.Markdown("### 📊 Results Dashboard")
654
+
655
+ # Top 3 Most Relevant Repositories (initially hidden)
656
+ with gr.Column(visible=False) as top_repos_section:
657
+ gr.Markdown("### 🏆 Top 3 Most Relevant Repositories")
658
+ gr.Markdown("🎯 **These are the highest-rated repositories based on your requirements:**")
659
+ top_repos_df = gr.Dataframe(
660
+ headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
661
+ column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"],
662
+ wrap=True,
663
+ interactive=False
664
+ )
665
+
666
+ gr.Markdown("💡 **Tip:** Full text is displayed directly in the table. Click on repository names to explore or visit them!")
667
+
668
+ # Text expansion modal for showing full content (kept for backwards compatibility)
669
+ with gr.Row():
670
+ with gr.Column():
671
+ text_expansion_modal = gr.Column(visible=False)
672
+ with text_expansion_modal:
673
+ gr.Markdown("### 📄 Full Content View")
674
+ expanded_content_title = gr.Textbox(
675
+ label="Content Type",
676
+ interactive=False,
677
+ info="Full text content for the selected field"
678
+ )
679
+ expanded_content_text = gr.Textbox(
680
+ label="Full Text",
681
+ lines=10,
682
+ interactive=False,
683
+ show_copy_button=True,
684
+ info="Complete untruncated content"
685
+ )
686
+ close_text_modal_btn = gr.Button("❌ Close", size="lg")
687
+
688
+ # Modal popup for repository action selection
689
+ with gr.Row():
690
+ with gr.Column():
691
+ repo_action_modal = gr.Column(visible=False)
692
+ with repo_action_modal:
693
+ gr.Markdown("### 🔗 Repository Actions")
694
+ selected_repo_display = gr.Textbox(
695
+ label="Selected Repository",
696
+ interactive=False,
697
+ info="Choose what you'd like to do with this repository"
698
+ )
699
+ with gr.Row():
700
+ visit_repo_btn = gr.Button("🌐 Visit Hugging Face Space", variant="primary", size="lg")
701
+ explore_repo_btn = gr.Button("🔍 Open in Repo Explorer", variant="secondary", size="lg")
702
+ cancel_modal_btn = gr.Button("❌ Cancel", size="lg")
703
+
704
+ gr.Markdown("### 📋 All Analysis Results")
705
+ df_output = gr.Dataframe(
706
+ headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
707
+ column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"],
708
+ wrap=True,
709
+ interactive=False
710
+ )
711
+
712
+ # --- Chatbot Tab ---
713
+ with gr.TabItem("🤖 AI Assistant", id="chatbot_tab"):
714
+ gr.Markdown("### 💬 Intelligent Repository Discovery")
715
+
716
+ chatbot = gr.Chatbot(
717
+ label="🤖 AI Assistant",
718
+ height=450,
719
+ type="messages",
720
+ avatar_images=(
721
+ "https://cdn-icons-png.flaticon.com/512/149/149071.png",
722
+ "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
723
+ ),
724
+ show_copy_button=True
725
+ )
726
+
727
+ with gr.Row():
728
+ msg_input = gr.Textbox(
729
+ label="💭 Your Message",
730
+ placeholder="Tell me about your ideal repository...",
731
+ lines=1,
732
+ scale=4,
733
+ info="Describe what you're looking for"
734
+ )
735
+ send_btn = gr.Button("📤 Send", variant="primary", scale=1)
736
+ end_chat_btn = gr.Button("🎯 Extract Keywords", scale=1)
737
+ use_keywords_btn = gr.Button("🔎 Search Now", variant="primary", scale=1)
738
+
739
+ with gr.Row():
740
+ with gr.Column():
741
+ extracted_keywords_output = gr.Textbox(
742
+ label="🏷️ Extracted Keywords",
743
+ interactive=False,
744
+ show_copy_button=True,
745
+ info="AI-generated search terms from our conversation"
746
+ )
747
+ with gr.Column():
748
+ status_box_chatbot = gr.Textbox(
749
+ label="📊 Chat Status",
750
+ interactive=False,
751
+ info="Current conversation status"
752
+ )
753
+
754
+ # --- Repo Explorer Tab ---
755
+ with gr.TabItem("🔍 Repo Explorer", id="repo_explorer_tab"):
756
+ repo_components, repo_states = create_repo_explorer_tab()
757
+
758
+ # --- Footer ---
759
+ gr.Markdown(
760
+ """
761
+ <div style="text-align: center; padding: 30px 20px; margin-top: 40px; background: rgba(255, 255, 255, 0.1); border-radius: 16px; backdrop-filter: blur(10px);">
762
+ <p style="margin: 0; color: rgba(255, 255, 255, 0.8); font-size: 0.95rem; font-weight: 500;">
763
+ 🚀 Powered by <span style="background: linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700;">Gradio</span>
764
+ & <span style="background: linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700;">Hugging Face</span>
765
+ </p>
766
+ <div style="height: 2px; width: 60px; background: linear-gradient(45deg, #667eea, #764ba2); margin: 16px auto; border-radius: 1px;"></div>
767
+ </div>
768
+ """
769
+ )
770
+
771
+ # --- Event Handler Functions ---
772
+
773
+ def handle_repo_id_submission(text: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
774
+ """Processes submitted repo IDs, updates state, and prepares for analysis."""
775
+ if not text:
776
+ return [], 0, pd.DataFrame(), "Status: Please enter repository IDs.", gr.update(selected="input_tab")
777
+
778
+ repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]))
779
+ write_repos_to_csv(repo_ids)
780
+ df = format_dataframe_for_display(read_csv_to_dataframe())
781
+ status = f"Status: {len(repo_ids)} repositories submitted. Ready for analysis."
782
+ return repo_ids, 0, df, status, gr.update(selected="analysis_tab")
783
+
784
+ def handle_keyword_search(keywords: str) -> Tuple[List[str], int, pd.DataFrame, str, Any]:
785
+ """Processes submitted keywords, finds repos, updates state, and prepares for analysis."""
786
+ if not keywords:
787
+ return [], 0, pd.DataFrame(), "Status: Please enter keywords.", gr.update(selected="input_tab")
788
+
789
+ keyword_list = [k.strip() for k in re.split(r'[\n,]+', keywords) if k.strip()]
790
+ repo_ids = []
791
+ for kw in keyword_list:
792
+ repo_ids.extend(search_top_spaces(kw, limit=5))
793
+
794
+ unique_repo_ids = list(dict.fromkeys(repo_ids))
795
+ write_repos_to_csv(unique_repo_ids)
796
+ df = format_dataframe_for_display(read_csv_to_dataframe())
797
+ status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
798
+ return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
799
+
800
+ def extract_user_requirements_from_chat(history: List[Dict[str, str]]) -> str:
801
+ """Extract user requirements from chatbot conversation."""
802
+ if not history:
803
+ return ""
804
+
805
+ user_messages = []
806
+ for msg in history:
807
+ if msg.get('role') == 'user':
808
+ user_messages.append(msg.get('content', ''))
809
+
810
+ if not user_messages:
811
+ return ""
812
+
813
+ # Combine all user messages as requirements
814
+ requirements = "\n".join([f"- {msg}" for msg in user_messages if msg.strip()])
815
+ return requirements
816
+
817
+ def handle_user_message(user_message: str, history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str]:
818
+ """Appends the user's message to the history, preparing for the bot's response."""
819
+ # Initialize chatbot with welcome message if empty
820
+ if not history:
821
+ history = [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}]
822
+
823
+ if user_message:
824
+ history.append({"role": "user", "content": user_message})
825
+ return history, ""
826
+
827
+ def handle_bot_response(history: List[Dict[str, str]]) -> List[Dict[str, str]]:
828
+ """Generates and appends the bot's response using the compatible history format."""
829
+ if not history or history[-1]["role"] != "user":
830
+ return history
831
+
832
+ user_message = history[-1]["content"]
833
+ # Convert all messages *before* the last user message into tuples for the API
834
+ tuple_history_for_api = convert_messages_to_tuples(history[:-1])
835
+
836
+ response = chat_with_user(user_message, tuple_history_for_api)
837
+ history.append({"role": "assistant", "content": response})
838
+ return history
839
+
840
+ def handle_end_chat(history: List[Dict[str, str]]) -> Tuple[str, str, str]:
841
+ """Ends the chat, extracts and sanitizes keywords from the conversation, and extracts user requirements."""
842
+ if not history:
843
+ return "", "Status: Chat is empty, nothing to analyze.", ""
844
+
845
+ # Convert the full, valid history for the extraction logic
846
+ tuple_history = convert_messages_to_tuples(history)
847
+ if not tuple_history:
848
+ return "", "Status: No completed conversations to analyze.", ""
849
+
850
+ # Get raw keywords string from the LLM
851
+ raw_keywords_str = extract_keywords_from_conversation(tuple_history)
852
+
853
+ # Sanitize the LLM output to extract only keyword-like parts.
854
+ # A keyword can contain letters, numbers, underscores, spaces, and hyphens.
855
+ cleaned_keywords = re.findall(r'[\w\s-]+', raw_keywords_str)
856
+
857
+ # Trim whitespace from each found keyword and filter out any empty strings
858
+ cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()]
859
+
860
+ if not cleaned_keywords:
861
+ return "", f"Status: Could not extract valid keywords. Raw LLM output: '{raw_keywords_str}'", ""
862
+
863
+ # Join them into a clean, comma-separated string for the search tool
864
+ final_keywords_str = ", ".join(cleaned_keywords)
865
+
866
+ # Extract user requirements for analysis
867
+ user_requirements = extract_user_requirements_from_chat(history)
868
+
869
+ status = "Status: Keywords extracted. User requirements saved for analysis."
870
+ return final_keywords_str, status, user_requirements
871
+
872
+ def handle_dataframe_select(evt: gr.SelectData, df_data) -> Tuple[str, Any, Any, str, str, Any, str]:
873
+ """Handle dataframe row selection - only repo ID (column 0) shows modal since full text is now displayed directly."""
874
+ print(f"DEBUG: Selection event triggered!")
875
+ print(f"DEBUG: evt = {evt}")
876
+ print(f"DEBUG: df_data type = {type(df_data)}")
877
+
878
+ if evt is None:
879
+ return "", gr.update(visible=False), gr.update(), "", "", gr.update(visible=False), ""
880
+
881
+ try:
882
+ # Get the selected row and column from the event
883
+ row_idx = evt.index[0]
884
+ col_idx = evt.index[1]
885
+ print(f"DEBUG: Selected row {row_idx}, column {col_idx}")
886
+
887
+ # Handle pandas DataFrame
888
+ if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
889
+
890
+ if col_idx == 0: # Repository name column - show action modal
891
+ repo_id = df_data.iloc[row_idx, 0]
892
+ print(f"DEBUG: Extracted repo_id = '{repo_id}'")
893
+
894
+ if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan':
895
+ clean_repo_id = str(repo_id).strip()
896
+ logger.info(f"Showing modal for repository: {clean_repo_id}")
897
+ return clean_repo_id, gr.update(visible=True), gr.update(), "", "", gr.update(visible=False), clean_repo_id
898
+
899
+ # For content columns (1,2,3) and relevance (4), do nothing since full text is shown directly
900
+ else:
901
+ print(f"DEBUG: Clicked on column {col_idx}, full text already shown in table")
902
+ return "", gr.update(visible=False), gr.update(), "", "", gr.update(visible=False), ""
903
+ else:
904
+ print(f"DEBUG: df_data is not a DataFrame or row_idx {row_idx} out of range")
905
+
906
+ except Exception as e:
907
+ print(f"DEBUG: Exception occurred: {e}")
908
+ logger.error(f"Error handling dataframe selection: {e}")
909
+
910
+ return "", gr.update(visible=False), gr.update(), "", "", gr.update(visible=False), ""
911
+
912
+ def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, pd.DataFrame, Any]:
913
+ """Analyzes all repositories in the CSV file with progress tracking."""
914
+ if not repo_ids:
915
+ return pd.DataFrame(), "Status: No repositories to analyze. Please submit repo IDs first.", pd.DataFrame(), gr.update(visible=False)
916
+
917
+ total_repos = len(repo_ids)
918
+
919
+ try:
920
+ # Start the progress tracking
921
+ progress(0, desc="Initializing batch analysis...")
922
+
923
+ successful_analyses = 0
924
+ failed_analyses = 0
925
+ csv_update_failures = 0
926
+
927
+ for i, repo_id in enumerate(repo_ids):
928
+ # Update progress
929
+ progress_percent = (i / total_repos)
930
+ progress(progress_percent, desc=f"Analyzing {repo_id} ({i+1}/{total_repos})")
931
+
932
+ try:
933
+ logger.info(f"Batch analysis: Processing {repo_id} ({i+1}/{total_repos})")
934
+
935
+ # Analyze the repository
936
+ content, summary, df = analyze_and_update_single_repo(repo_id, user_requirements)
937
+
938
+ # Verify the CSV was actually updated by checking if the repo has analysis data
939
+ updated_df = read_csv_to_dataframe()
940
+ repo_updated = False
941
+
942
+ for idx, row in updated_df.iterrows():
943
+ if row["repo id"] == repo_id:
944
+ # Check if any analysis field is populated
945
+ if (row.get("strength", "").strip() or
946
+ row.get("weaknesses", "").strip() or
947
+ row.get("speciality", "").strip() or
948
+ row.get("relevance rating", "").strip()):
949
+ repo_updated = True
950
+ break
951
+
952
+ if repo_updated:
953
+ successful_analyses += 1
954
+ else:
955
+ # CSV update failed - try once more
956
+ logger.warning(f"CSV update failed for {repo_id}, attempting retry...")
957
+ time.sleep(0.5) # Wait a bit longer
958
+
959
+ # Force re-read and re-update
960
+ df_retry = read_csv_to_dataframe()
961
+ retry_success = False
962
+
963
+ # Re-parse the analysis if available
964
+ if summary and "JSON extraction: SUCCESS" in summary:
965
+ # Extract the analysis from summary - this is a fallback
966
+ logger.info(f"Attempting to re-update CSV for {repo_id}")
967
+ content_retry, summary_retry, df_retry = analyze_and_update_single_repo(repo_id, user_requirements)
968
+
969
+ # Check again
970
+ final_df = read_csv_to_dataframe()
971
+ for idx, row in final_df.iterrows():
972
+ if row["repo id"] == repo_id:
973
+ if (row.get("strength", "").strip() or
974
+ row.get("weaknesses", "").strip() or
975
+ row.get("speciality", "").strip() or
976
+ row.get("relevance rating", "").strip()):
977
+ retry_success = True
978
+ break
979
+
980
+ if retry_success:
981
+ successful_analyses += 1
982
+ else:
983
+ csv_update_failures += 1
984
+
985
+ # Longer delay to prevent file conflicts
986
+ time.sleep(0.3)
987
+
988
+ except Exception as e:
989
+ logger.error(f"Error analyzing {repo_id}: {e}")
990
+ failed_analyses += 1
991
+ # Still wait to prevent rapid failures
992
+ time.sleep(0.2)
993
+
994
+ # Complete the progress
995
+ progress(1.0, desc="Batch analysis completed!")
996
+
997
+ # Get final updated dataframe
998
+ updated_df = read_csv_to_dataframe()
999
+
1000
+ # Filter out rows with no analysis data for consistent display with top 3
1001
+ analyzed_df = updated_df.copy()
1002
+ analyzed_df = analyzed_df[
1003
+ (analyzed_df['strength'].str.strip() != '') |
1004
+ (analyzed_df['weaknesses'].str.strip() != '') |
1005
+ (analyzed_df['speciality'].str.strip() != '') |
1006
+ (analyzed_df['relevance rating'].str.strip() != '')
1007
+ ]
1008
+
1009
+ # Get top 3 most relevant repositories using full data
1010
+ top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
1011
+
1012
+ # Final status with detailed breakdown
1013
+ final_status = f"🎉 Batch Analysis Complete!\n✅ Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
1014
+ if csv_update_failures > 0:
1015
+ final_status += f"\n⚠️ CSV Update Issues: {csv_update_failures}/{total_repos}"
1016
+
1017
+ # Add top repos info if available
1018
+ if not top_repos.empty:
1019
+ final_status += f"\n\n🏆 Top {len(top_repos)} most relevant repositories selected!"
1020
+
1021
+ # Show top repos section if we have results
1022
+ show_top_section = gr.update(visible=not top_repos.empty)
1023
+
1024
+ logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
1025
+ return format_dataframe_for_display(analyzed_df), final_status, format_dataframe_for_display(top_repos), show_top_section
1026
+
1027
+ except Exception as e:
1028
+ logger.error(f"Error in batch analysis: {e}")
1029
+ error_status = f"❌ Batch analysis failed: {e}"
1030
+ return format_dataframe_for_display(read_csv_to_dataframe()), error_status, pd.DataFrame(), gr.update(visible=False)
1031
+
1032
+ def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
1033
+ """Handle visiting the Hugging Face Space for the repository."""
1034
+ if repo_id and repo_id.strip():
1035
+ hf_url = f"https://huggingface.co/spaces/{repo_id.strip()}"
1036
+ logger.info(f"User chose to visit: {hf_url}")
1037
+ return gr.update(visible=False), hf_url
1038
+ return gr.update(visible=False), ""
1039
+
1040
+ def handle_explore_repo(selected_repo_id: str) -> Tuple[Any, Any, Any]:
1041
+ """Handle navigating to the repo explorer and populate the repo ID."""
1042
+ logger.info(f"DEBUG: handle_explore_repo called with selected_repo_id: '{selected_repo_id}'")
1043
+ logger.info(f"DEBUG: selected_repo_id type: {type(selected_repo_id)}")
1044
+ logger.info(f"DEBUG: selected_repo_id length: {len(selected_repo_id) if selected_repo_id else 'None'}")
1045
+
1046
+ if selected_repo_id and selected_repo_id.strip() and selected_repo_id.strip() != 'nan':
1047
+ clean_repo_id = selected_repo_id.strip()
1048
+ return (
1049
+ gr.update(visible=False), # close modal
1050
+ gr.update(selected="repo_explorer_tab"), # switch tab
1051
+ gr.update(value=clean_repo_id) # populate repo explorer input
1052
+ )
1053
+ else:
1054
+ return (
1055
+ gr.update(visible=False), # close modal
1056
+ gr.update(selected="repo_explorer_tab"), # switch tab
1057
+ gr.update() # don't change repo explorer input
1058
+ )
1059
+
1060
+ def handle_cancel_modal() -> Any:
1061
+ """Handle closing the modal."""
1062
+ return gr.update(visible=False)
1063
+
1064
+ def handle_close_text_modal() -> Any:
1065
+ """Handle closing the text expansion modal."""
1066
+ return gr.update(visible=False)
1067
+
1068
+ def handle_reset_everything() -> Tuple[List[str], int, str, pd.DataFrame, pd.DataFrame, Any, Any, Any, List[Dict[str, str]], str, str, str]:
1069
+ """Reset everything to initial state - clear all data, CSV, and UI components."""
1070
+ try:
1071
+ # Clear the CSV file
1072
+ if os.path.exists(CSV_FILE):
1073
+ os.remove(CSV_FILE)
1074
+ logger.info("CSV file deleted for reset")
1075
+
1076
+ # Create empty dataframe
1077
+ empty_df = pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
1078
+
1079
+ # Reset state variables
1080
+ repo_ids_reset = []
1081
+ current_idx_reset = 0
1082
+ user_requirements_reset = ""
1083
+
1084
+ # Reset status
1085
+ status_reset = "Status: Everything has been reset. Ready to start fresh!"
1086
+
1087
+ # Reset UI components
1088
+ current_requirements_reset = "No requirements extracted yet."
1089
+ extracted_keywords_reset = ""
1090
+
1091
+ # Reset chatbot to initial message
1092
+ chatbot_reset = [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}]
1093
+
1094
+ logger.info("Complete system reset performed")
1095
+
1096
+ return (
1097
+ repo_ids_reset, # repo_ids_state
1098
+ current_idx_reset, # current_repo_idx_state
1099
+ user_requirements_reset, # user_requirements_state
1100
+ empty_df, # df_output
1101
+ empty_df, # top_repos_df
1102
+ gr.update(visible=False), # top_repos_section
1103
+ gr.update(visible=False), # repo_action_modal
1104
+ gr.update(visible=False), # text_expansion_modal
1105
+ chatbot_reset, # chatbot
1106
+ status_reset, # status_box_analysis
1107
+ current_requirements_reset, # current_requirements_display
1108
+ extracted_keywords_reset # extracted_keywords_output
1109
+ )
1110
+
1111
+ except Exception as e:
1112
+ logger.error(f"Error during reset: {e}")
1113
+ error_status = f"Reset failed: {e}"
1114
+ return (
1115
+ [], # repo_ids_state
1116
+ 0, # current_repo_idx_state
1117
+ "", # user_requirements_state
1118
+ pd.DataFrame(), # df_output
1119
+ pd.DataFrame(), # top_repos_df
1120
+ gr.update(visible=False), # top_repos_section
1121
+ gr.update(visible=False), # repo_action_modal
1122
+ gr.update(visible=False), # text_expansion_modal
1123
+ [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}], # chatbot
1124
+ error_status, # status_box_analysis
1125
+ "No requirements extracted yet.", # current_requirements_display
1126
+ "" # extracted_keywords_output
1127
+ )
1128
+
1129
+ # --- Component Event Wiring ---
1130
+
1131
+ # Initialize chatbot with welcome message on app load
1132
+ app.load(
1133
+ fn=lambda: [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}],
1134
+ outputs=[chatbot]
1135
+ )
1136
+
1137
+ # Input Tab
1138
+ submit_repo_btn.click(
1139
+ fn=handle_repo_id_submission,
1140
+ inputs=[repo_id_input],
1141
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
1142
+ )
1143
+ search_btn.click(
1144
+ fn=handle_keyword_search,
1145
+ inputs=[keyword_input],
1146
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
1147
+ )
1148
+
1149
+ # Analysis Tab
1150
+ analyze_all_btn.click(
1151
+ fn=lambda: None, # No need to show progress display since it's commented out
1152
+ outputs=[]
1153
+ ).then(
1154
+ fn=handle_analyze_all_repos,
1155
+ inputs=[repo_ids_state, user_requirements_state],
1156
+ outputs=[df_output, status_box_analysis, top_repos_df, top_repos_section]
1157
+ )
1158
+
1159
+ # Chatbot Tab
1160
+ msg_input.submit(
1161
+ fn=handle_user_message,
1162
+ inputs=[msg_input, chatbot],
1163
+ outputs=[chatbot, msg_input]
1164
+ ).then(
1165
+ fn=handle_bot_response,
1166
+ inputs=[chatbot],
1167
+ outputs=[chatbot]
1168
+ )
1169
+ send_btn.click(
1170
+ fn=handle_user_message,
1171
+ inputs=[msg_input, chatbot],
1172
+ outputs=[chatbot, msg_input]
1173
+ ).then(
1174
+ fn=handle_bot_response,
1175
+ inputs=[chatbot],
1176
+ outputs=[chatbot]
1177
+ )
1178
+ end_chat_btn.click(
1179
+ fn=handle_end_chat,
1180
+ inputs=[chatbot],
1181
+ outputs=[extracted_keywords_output, status_box_chatbot, user_requirements_state]
1182
+ ).then(
1183
+ fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.",
1184
+ inputs=[user_requirements_state],
1185
+ outputs=[current_requirements_display]
1186
+ )
1187
+ use_keywords_btn.click(
1188
+ fn=handle_keyword_search,
1189
+ inputs=[extracted_keywords_output],
1190
+ outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
1191
+ )
1192
+
1193
+ # Repo Explorer Tab
1194
+ setup_repo_explorer_events(repo_components, repo_states)
1195
+
1196
+ # Modal button events
1197
+ visit_repo_btn.click(
1198
+ fn=handle_visit_repo,
1199
+ inputs=[selected_repo_display],
1200
+ outputs=[repo_action_modal, selected_repo_display],
1201
+ js="(repo_id) => { if(repo_id && repo_id.trim()) { window.open('https://huggingface.co/spaces/' + repo_id.trim(), '_blank'); } }"
1202
+ )
1203
+ explore_repo_btn.click(
1204
+ fn=handle_explore_repo,
1205
+ inputs=[selected_repo_id_state],
1206
+ outputs=[
1207
+ repo_action_modal,
1208
+ tabs,
1209
+ repo_components["repo_explorer_input"]
1210
+ ],
1211
+ js="""(repo_id) => {
1212
+ console.log('DEBUG: Navigate to repo explorer for:', repo_id);
1213
+ setTimeout(() => {
1214
+ window.scrollTo({top: 0, behavior: 'smooth'});
1215
+ }, 200);
1216
+ }"""
1217
+ )
1218
+ cancel_modal_btn.click(
1219
+ fn=handle_cancel_modal,
1220
+ outputs=[repo_action_modal]
1221
+ )
1222
+
1223
+ # Text expansion modal events
1224
+ close_text_modal_btn.click(
1225
+ fn=handle_close_text_modal,
1226
+ outputs=[text_expansion_modal]
1227
+ )
1228
+
1229
+ # Add dataframe selection event
1230
+ df_output.select(
1231
+ fn=handle_dataframe_select,
1232
+ inputs=[df_output],
1233
+ outputs=[selected_repo_display, repo_action_modal, tabs, expanded_content_title, expanded_content_text, text_expansion_modal, selected_repo_id_state]
1234
+ )
1235
+
1236
+ # Add selection event for top repositories dataframe too
1237
+ top_repos_df.select(
1238
+ fn=handle_dataframe_select,
1239
+ inputs=[top_repos_df],
1240
+ outputs=[selected_repo_display, repo_action_modal, tabs, expanded_content_title, expanded_content_text, text_expansion_modal, selected_repo_id_state]
1241
+ )
1242
+
1243
+ # Reset button event
1244
+ reset_all_btn.click(
1245
+ fn=handle_reset_everything,
1246
+ outputs=[repo_ids_state, current_repo_idx_state, user_requirements_state, df_output, top_repos_df, top_repos_section, repo_action_modal, text_expansion_modal, chatbot, status_box_analysis, current_requirements_display, extracted_keywords_output]
1247
+ )
1248
+
1249
+ return app
1250
+
1251
+ if __name__ == "__main__":
1252
+ app = create_ui()
1253
+ app.launch(debug=True)