gloignon commited on
Commit
f926cf7
·
verified ·
1 Parent(s): bddd5ad

fixed keyword removal bug

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -68,38 +68,43 @@ def process_zip_and_search(keywords_text, search_mode):
68
  global raw_corpus, lemmatized_corpus, initial_df # Use the texts stored at corpus upload and initial DataFrame
69
 
70
  # Read the keywords (no lemmatization of keywords)
71
- keywords = [(keyword.strip().lower()) for keyword in keywords_text.strip().split("\n") if keyword.strip()]
72
-
 
 
 
 
73
  # Select the appropriate corpus based on the search mode
74
  corpus = lemmatized_corpus if search_mode == "Lemmes" else raw_corpus
75
-
76
  # Prepare a dictionary to store the results (initialize with Document Name and empty results)
77
  results = {doc_name: {keyword: "" for keyword in keywords} for doc_name in corpus.keys()}
78
-
79
  # Search for keyword frequencies in each text file
80
  for doc_name, text in corpus.items():
81
  for keyword in keywords:
82
  keyword_count = text.count(keyword) # Count occurrences of each keyword
83
  if keyword_count > 0:
84
  results[doc_name][keyword] = keyword_count
85
-
86
  # Convert the results dictionary to a DataFrame
87
  df_keywords = pd.DataFrame(results).T # Transpose to have files as rows and keywords as columns
88
-
89
  # Reset index to make the document names a column
90
  df_keywords.reset_index(inplace=True)
91
-
92
  # Rename the first column to 'Nom du document'
93
  df_keywords.rename(columns={"index": "Nom du document"}, inplace=True)
94
-
95
  # Replace 0 frequencies with empty strings
96
  df_keywords.replace(0, "", inplace=True)
97
-
98
  # Merge the initial DataFrame with the keyword search results
99
  final_df = pd.merge(initial_df, df_keywords, on="Nom du document", how="left")
100
-
101
  return final_df
102
 
 
103
  # Function to export the DataFrame to Excel
104
  def export_to_excel(df):
105
  # Create a temporary directory for storing the Excel file
@@ -107,7 +112,7 @@ def export_to_excel(df):
107
  excel_path = tmp.name
108
  # Save the DataFrame to Excel
109
  df.to_excel(excel_path, index=False)
110
- return excel_path
111
 
112
  # Create Gradio interface with one results table and export functionality
113
  with gr.Blocks() as demo:
@@ -148,4 +153,4 @@ with gr.Blocks() as demo:
148
  export_button.click(fn=export_to_excel, inputs=result_table, outputs=download_link)
149
 
150
  # Launch the app
151
- demo.launch()
 
68
  global raw_corpus, lemmatized_corpus, initial_df # Use the texts stored at corpus upload and initial DataFrame
69
 
70
  # Read the keywords (no lemmatization of keywords)
71
+ keywords = [keyword.strip().lower() for keyword in keywords_text.strip().split("\n") if keyword.strip()]
72
+
73
+ if not keywords:
74
+ # If no keywords are provided, return the initial DataFrame (without the keyword columns)
75
+ return initial_df
76
+
77
  # Select the appropriate corpus based on the search mode
78
  corpus = lemmatized_corpus if search_mode == "Lemmes" else raw_corpus
79
+
80
  # Prepare a dictionary to store the results (initialize with Document Name and empty results)
81
  results = {doc_name: {keyword: "" for keyword in keywords} for doc_name in corpus.keys()}
82
+
83
  # Search for keyword frequencies in each text file
84
  for doc_name, text in corpus.items():
85
  for keyword in keywords:
86
  keyword_count = text.count(keyword) # Count occurrences of each keyword
87
  if keyword_count > 0:
88
  results[doc_name][keyword] = keyword_count
89
+
90
  # Convert the results dictionary to a DataFrame
91
  df_keywords = pd.DataFrame(results).T # Transpose to have files as rows and keywords as columns
92
+
93
  # Reset index to make the document names a column
94
  df_keywords.reset_index(inplace=True)
95
+
96
  # Rename the first column to 'Nom du document'
97
  df_keywords.rename(columns={"index": "Nom du document"}, inplace=True)
98
+
99
  # Replace 0 frequencies with empty strings
100
  df_keywords.replace(0, "", inplace=True)
101
+
102
  # Merge the initial DataFrame with the keyword search results
103
  final_df = pd.merge(initial_df, df_keywords, on="Nom du document", how="left")
104
+
105
  return final_df
106
 
107
+
108
  # Function to export the DataFrame to Excel
109
  def export_to_excel(df):
110
  # Create a temporary directory for storing the Excel file
 
112
  excel_path = tmp.name
113
  # Save the DataFrame to Excel
114
  df.to_excel(excel_path, index=False)
115
+ return excel_pathp
116
 
117
  # Create Gradio interface with one results table and export functionality
118
  with gr.Blocks() as demo:
 
153
  export_button.click(fn=export_to_excel, inputs=result_table, outputs=download_link)
154
 
155
  # Launch the app
156
+ demo.launch()