naman1102 commited on
Commit
c9bd851
Β·
1 Parent(s): db1867d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -34
app.py CHANGED
@@ -165,17 +165,12 @@ def format_text_for_dataframe(text: str, max_length: int = 200) -> str:
165
  return text
166
 
167
  def read_csv_to_dataframe() -> pd.DataFrame:
168
- """Reads the CSV file into a pandas DataFrame with formatted text for display."""
169
  try:
170
  df = pd.read_csv(CSV_FILE, dtype=str).fillna('')
171
 
172
- # Format text columns for better display
173
- if not df.empty:
174
- df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
175
- df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
176
- df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
177
- df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
178
- # Keep relevance rating as is since it should be short
179
 
180
  return df
181
  except FileNotFoundError:
@@ -184,6 +179,24 @@ def read_csv_to_dataframe() -> pd.DataFrame:
184
  logger.error(f"Error reading CSV: {e}")
185
  return pd.DataFrame()
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") -> Tuple[str, str, pd.DataFrame]:
188
  """
189
  Downloads, analyzes a single repo, updates the CSV, and returns results.
@@ -248,12 +261,12 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
248
  logger.error(f"Failed to write CSV for {repo_id} on retry: {retry_error}")
249
 
250
  logger.info(f"Successfully analyzed and updated CSV for {repo_id}")
251
- return combined_content, summary, df
252
 
253
  except Exception as e:
254
  logger.error(f"An error occurred during analysis of {repo_id}: {e}")
255
  error_summary = f"Error analyzing repo: {e}"
256
- return "", error_summary, read_csv_to_dataframe()
257
 
258
  # --- NEW: Helper for Chat History Conversion ---
259
  def convert_messages_to_tuples(history: List[Dict[str, str]]) -> List[Tuple[str, str]]:
@@ -526,6 +539,15 @@ def create_ui() -> gr.Blocks:
526
  </div>
527
  """
528
  )
 
 
 
 
 
 
 
 
 
529
 
530
  with gr.Tabs() as tabs:
531
  # --- Input Tab ---
@@ -569,7 +591,6 @@ def create_ui() -> gr.Blocks:
569
  with gr.Row():
570
  analyze_next_btn = gr.Button("⚑ Analyze Next Repository", variant="primary", size="lg", scale=1)
571
  analyze_all_btn = gr.Button("πŸš€ Analyze All Repositories", variant="secondary", size="lg", scale=1)
572
- reset_all_btn = gr.Button("πŸ”„ Reset Everything", variant="stop", size="lg", scale=1)
573
  with gr.Column(scale=2):
574
  status_box_analysis = gr.Textbox(label="πŸ“ˆ Analysis Status", interactive=False, lines=2)
575
 
@@ -726,7 +747,7 @@ def create_ui() -> gr.Blocks:
726
 
727
  repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]))
728
  write_repos_to_csv(repo_ids)
729
- df = read_csv_to_dataframe()
730
  status = f"Status: {len(repo_ids)} repositories submitted. Ready for analysis."
731
  return repo_ids, 0, df, status, gr.update(selected="analysis_tab")
732
 
@@ -742,7 +763,7 @@ def create_ui() -> gr.Blocks:
742
 
743
  unique_repo_ids = list(dict.fromkeys(repo_ids))
744
  write_repos_to_csv(unique_repo_ids)
745
- df = read_csv_to_dataframe()
746
  status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
747
  return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
748
 
@@ -768,7 +789,7 @@ def create_ui() -> gr.Blocks:
768
  if not repo_ids:
769
  return pd.DataFrame(), 0, "Status: No repositories to analyze. Please submit repo IDs first."
770
  if current_idx >= len(repo_ids):
771
- return read_csv_to_dataframe(), current_idx, "Status: All repositories have been analyzed."
772
 
773
  repo_id_to_analyze = repo_ids[current_idx]
774
  status = f"Status: Analyzing repository {current_idx + 1}/{len(repo_ids)}: {repo_id_to_analyze}"
@@ -856,24 +877,29 @@ def create_ui() -> gr.Blocks:
856
  # Handle pandas DataFrame
857
  if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
858
 
859
- # Column mapping: 0=repo, 1=strength, 2=weakness, 3=speciality, 4=relevance
860
- if col_idx == 1: # Strengths column
861
- full_text = str(df_data.iloc[row_idx, 1])
862
- repo_name = str(df_data.iloc[row_idx, 0])
863
- title = f"Strengths - {repo_name}"
864
- return "", gr.update(visible=False), gr.update(), title, full_text, gr.update(visible=True)
865
-
866
- elif col_idx == 2: # Weaknesses column
867
- full_text = str(df_data.iloc[row_idx, 2])
868
- repo_name = str(df_data.iloc[row_idx, 0])
869
- title = f"Weaknesses - {repo_name}"
870
- return "", gr.update(visible=False), gr.update(), title, full_text, gr.update(visible=True)
871
-
872
- elif col_idx == 3: # Speciality column
873
- full_text = str(df_data.iloc[row_idx, 3])
874
  repo_name = str(df_data.iloc[row_idx, 0])
875
- title = f"Speciality - {repo_name}"
876
- return "", gr.update(visible=False), gr.update(), title, full_text, gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
 
878
  elif col_idx == 0: # Repository name column - show action modal
879
  repo_id = df_data.iloc[row_idx, 0]
@@ -985,7 +1011,7 @@ def create_ui() -> gr.Blocks:
985
  # Get final updated dataframe
986
  updated_df = read_csv_to_dataframe()
987
 
988
- # Get top 3 most relevant repositories
989
  top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
990
 
991
  # Final status with detailed breakdown
@@ -1001,12 +1027,12 @@ def create_ui() -> gr.Blocks:
1001
  show_top_section = gr.update(visible=not top_repos.empty)
1002
 
1003
  logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
1004
- return updated_df, final_status, top_repos, show_top_section
1005
 
1006
  except Exception as e:
1007
  logger.error(f"Error in batch analysis: {e}")
1008
  error_status = f"❌ Batch analysis failed: {e}"
1009
- return read_csv_to_dataframe(), error_status, pd.DataFrame(), gr.update(visible=False)
1010
 
1011
  def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
1012
  """Handle visiting the Hugging Face Space for the repository."""
 
165
  return text
166
 
167
  def read_csv_to_dataframe() -> pd.DataFrame:
168
+ """Reads the CSV file into a pandas DataFrame with full text preserved."""
169
  try:
170
  df = pd.read_csv(CSV_FILE, dtype=str).fillna('')
171
 
172
+ # Keep the full text intact - don't truncate here
173
+ # The truncation will be handled in the UI display layer
 
 
 
 
 
174
 
175
  return df
176
  except FileNotFoundError:
 
179
  logger.error(f"Error reading CSV: {e}")
180
  return pd.DataFrame()
181
 
182
+ def format_dataframe_for_display(df: pd.DataFrame) -> pd.DataFrame:
183
+ """Formats dataframe for display with truncated text while preserving original data."""
184
+ if df.empty:
185
+ return df
186
+
187
+ # Create a copy for display purposes
188
+ display_df = df.copy()
189
+
190
+ # Apply formatting only for display
191
+ if not display_df.empty:
192
+ display_df['repo id'] = display_df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
193
+ display_df['strength'] = display_df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
194
+ display_df['weaknesses'] = display_df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
195
+ display_df['speciality'] = display_df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
196
+ # Keep relevance rating as is since it should be short
197
+
198
+ return display_df
199
+
200
  def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") -> Tuple[str, str, pd.DataFrame]:
201
  """
202
  Downloads, analyzes a single repo, updates the CSV, and returns results.
 
261
  logger.error(f"Failed to write CSV for {repo_id} on retry: {retry_error}")
262
 
263
  logger.info(f"Successfully analyzed and updated CSV for {repo_id}")
264
+ return combined_content, summary, format_dataframe_for_display(df)
265
 
266
  except Exception as e:
267
  logger.error(f"An error occurred during analysis of {repo_id}: {e}")
268
  error_summary = f"Error analyzing repo: {e}"
269
+ return "", error_summary, format_dataframe_for_display(read_csv_to_dataframe())
270
 
271
  # --- NEW: Helper for Chat History Conversion ---
272
  def convert_messages_to_tuples(history: List[Dict[str, str]]) -> List[Tuple[str, str]]:
 
539
  </div>
540
  """
541
  )
542
+
543
+ # Global Reset Button - visible on all tabs
544
+ with gr.Row():
545
+ with gr.Column(scale=4):
546
+ pass
547
+ with gr.Column(scale=1):
548
+ reset_all_btn = gr.Button("πŸ”„ Reset Everything", variant="stop", size="lg")
549
+ with gr.Column(scale=1):
550
+ pass
551
 
552
  with gr.Tabs() as tabs:
553
  # --- Input Tab ---
 
591
  with gr.Row():
592
  analyze_next_btn = gr.Button("⚑ Analyze Next Repository", variant="primary", size="lg", scale=1)
593
  analyze_all_btn = gr.Button("πŸš€ Analyze All Repositories", variant="secondary", size="lg", scale=1)
 
594
  with gr.Column(scale=2):
595
  status_box_analysis = gr.Textbox(label="πŸ“ˆ Analysis Status", interactive=False, lines=2)
596
 
 
747
 
748
  repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]))
749
  write_repos_to_csv(repo_ids)
750
+ df = format_dataframe_for_display(read_csv_to_dataframe())
751
  status = f"Status: {len(repo_ids)} repositories submitted. Ready for analysis."
752
  return repo_ids, 0, df, status, gr.update(selected="analysis_tab")
753
 
 
763
 
764
  unique_repo_ids = list(dict.fromkeys(repo_ids))
765
  write_repos_to_csv(unique_repo_ids)
766
+ df = format_dataframe_for_display(read_csv_to_dataframe())
767
  status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
768
  return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
769
 
 
789
  if not repo_ids:
790
  return pd.DataFrame(), 0, "Status: No repositories to analyze. Please submit repo IDs first."
791
  if current_idx >= len(repo_ids):
792
+ return format_dataframe_for_display(read_csv_to_dataframe()), current_idx, "Status: All repositories have been analyzed."
793
 
794
  repo_id_to_analyze = repo_ids[current_idx]
795
  status = f"Status: Analyzing repository {current_idx + 1}/{len(repo_ids)}: {repo_id_to_analyze}"
 
877
  # Handle pandas DataFrame
878
  if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
879
 
880
+ # For text expansion, read full data from CSV to get untruncated text
881
+ if col_idx in [1, 2, 3]: # Strength, Weakness, or Speciality columns
 
 
 
 
 
 
 
 
 
 
 
 
 
882
  repo_name = str(df_data.iloc[row_idx, 0])
883
+
884
+ # Read full data from CSV file to get untruncated text
885
+ full_df = read_csv_to_dataframe()
886
+
887
+ # Find the matching row in the full data
888
+ matching_rows = full_df[full_df['repo id'] == repo_name]
889
+ if not matching_rows.empty:
890
+ full_row = matching_rows.iloc[0]
891
+
892
+ if col_idx == 1: # Strengths column
893
+ full_text = str(full_row['strength'])
894
+ title = f"Strengths - {repo_name}"
895
+ elif col_idx == 2: # Weaknesses column
896
+ full_text = str(full_row['weaknesses'])
897
+ title = f"Weaknesses - {repo_name}"
898
+ elif col_idx == 3: # Speciality column
899
+ full_text = str(full_row['speciality'])
900
+ title = f"Speciality - {repo_name}"
901
+
902
+ return "", gr.update(visible=False), gr.update(), title, full_text, gr.update(visible=True)
903
 
904
  elif col_idx == 0: # Repository name column - show action modal
905
  repo_id = df_data.iloc[row_idx, 0]
 
1011
  # Get final updated dataframe
1012
  updated_df = read_csv_to_dataframe()
1013
 
1014
+ # Get top 3 most relevant repositories using full data
1015
  top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
1016
 
1017
  # Final status with detailed breakdown
 
1027
  show_top_section = gr.update(visible=not top_repos.empty)
1028
 
1029
  logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
1030
+ return format_dataframe_for_display(updated_df), final_status, format_dataframe_for_display(top_repos), show_top_section
1031
 
1032
  except Exception as e:
1033
  logger.error(f"Error in batch analysis: {e}")
1034
  error_status = f"❌ Batch analysis failed: {e}"
1035
+ return format_dataframe_for_display(read_csv_to_dataframe()), error_status, pd.DataFrame(), gr.update(visible=False)
1036
 
1037
  def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
1038
  """Handle visiting the Hugging Face Space for the repository."""