Spaces:

gloignon
/

taupinet

Runtime error

App Files Files Community

gloignon commited on Oct 11, 2024

Commit

f926cf7

verified ·

1 Parent(s): bddd5ad

fixed keyword removal bug

Browse files

Files changed (1) hide show

app.py +17 -12

app.py CHANGED Viewed

@@ -68,38 +68,43 @@ def process_zip_and_search(keywords_text, search_mode):
     global raw_corpus, lemmatized_corpus, initial_df  # Use the texts stored at corpus upload and initial DataFrame
     # Read the keywords (no lemmatization of keywords)
-    keywords = [(keyword.strip().lower()) for keyword in keywords_text.strip().split("\n") if keyword.strip()]
     # Select the appropriate corpus based on the search mode
     corpus = lemmatized_corpus if search_mode == "Lemmes" else raw_corpus
     # Prepare a dictionary to store the results (initialize with Document Name and empty results)
     results = {doc_name: {keyword: "" for keyword in keywords} for doc_name in corpus.keys()}
     # Search for keyword frequencies in each text file
     for doc_name, text in corpus.items():
         for keyword in keywords:
             keyword_count = text.count(keyword)  # Count occurrences of each keyword
             if keyword_count > 0:
                 results[doc_name][keyword] = keyword_count
     # Convert the results dictionary to a DataFrame
     df_keywords = pd.DataFrame(results).T  # Transpose to have files as rows and keywords as columns
     # Reset index to make the document names a column
     df_keywords.reset_index(inplace=True)
     # Rename the first column to 'Nom du document'
     df_keywords.rename(columns={"index": "Nom du document"}, inplace=True)
     # Replace 0 frequencies with empty strings
     df_keywords.replace(0, "", inplace=True)
     # Merge the initial DataFrame with the keyword search results
     final_df = pd.merge(initial_df, df_keywords, on="Nom du document", how="left")
     return final_df
 # Function to export the DataFrame to Excel
 def export_to_excel(df):
     # Create a temporary directory for storing the Excel file
@@ -107,7 +112,7 @@ def export_to_excel(df):
         excel_path = tmp.name
         # Save the DataFrame to Excel
         df.to_excel(excel_path, index=False)
-    return excel_path
 # Create Gradio interface with one results table and export functionality
 with gr.Blocks() as demo:
@@ -148,4 +153,4 @@ with gr.Blocks() as demo:
     export_button.click(fn=export_to_excel, inputs=result_table, outputs=download_link)
 # Launch the app
-demo.launch()

     global raw_corpus, lemmatized_corpus, initial_df  # Use the texts stored at corpus upload and initial DataFrame
     # Read the keywords (no lemmatization of keywords)
+    keywords = [keyword.strip().lower() for keyword in keywords_text.strip().split("\n") if keyword.strip()]
+    if not keywords:
+        # If no keywords are provided, return the initial DataFrame (without the keyword columns)
+        return initial_df
     # Select the appropriate corpus based on the search mode
     corpus = lemmatized_corpus if search_mode == "Lemmes" else raw_corpus
     # Prepare a dictionary to store the results (initialize with Document Name and empty results)
     results = {doc_name: {keyword: "" for keyword in keywords} for doc_name in corpus.keys()}
     # Search for keyword frequencies in each text file
     for doc_name, text in corpus.items():
         for keyword in keywords:
             keyword_count = text.count(keyword)  # Count occurrences of each keyword
             if keyword_count > 0:
                 results[doc_name][keyword] = keyword_count
     # Convert the results dictionary to a DataFrame
     df_keywords = pd.DataFrame(results).T  # Transpose to have files as rows and keywords as columns
     # Reset index to make the document names a column
     df_keywords.reset_index(inplace=True)
     # Rename the first column to 'Nom du document'
     df_keywords.rename(columns={"index": "Nom du document"}, inplace=True)
     # Replace 0 frequencies with empty strings
     df_keywords.replace(0, "", inplace=True)
     # Merge the initial DataFrame with the keyword search results
     final_df = pd.merge(initial_df, df_keywords, on="Nom du document", how="left")
     return final_df
 # Function to export the DataFrame to Excel
 def export_to_excel(df):
     # Create a temporary directory for storing the Excel file
         excel_path = tmp.name
         # Save the DataFrame to Excel
         df.to_excel(excel_path, index=False)
+    return excel_pathp
 # Create Gradio interface with one results table and export functionality
 with gr.Blocks() as demo:
     export_button.click(fn=export_to_excel, inputs=result_table, outputs=download_link)
 # Launch the app
+demo.launch()