Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Running

App Files Files Community

naman1102 commited on 13 days ago

Commit

9d332ff

1 Parent(s): adcb6a8

csv

Browse files

Files changed (3) hide show

app.py +10 -1
app_old.py +0 -481
hf_utils.py +4 -0

app.py CHANGED Viewed

@@ -977,6 +977,15 @@ def create_ui() -> gr.Blocks:
                 # Get final updated dataframe
                 updated_df = read_csv_to_dataframe()
                 # Get top 3 most relevant repositories using full data
                 top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
@@ -993,7 +1002,7 @@ def create_ui() -> gr.Blocks:
                 show_top_section = gr.update(visible=not top_repos.empty)
                 logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
-                return format_dataframe_for_display(updated_df), final_status, format_dataframe_for_display(top_repos), show_top_section
             except Exception as e:
                 logger.error(f"Error in batch analysis: {e}")

                 # Get final updated dataframe
                 updated_df = read_csv_to_dataframe()
+                # Filter out rows with no analysis data for consistent display with top 3
+                analyzed_df = updated_df.copy()
+                analyzed_df = analyzed_df[
+                    (analyzed_df['strength'].str.strip() != '') |
+                    (analyzed_df['weaknesses'].str.strip() != '') |
+                    (analyzed_df['speciality'].str.strip() != '') |
+                    (analyzed_df['relevance rating'].str.strip() != '')
+                ]
                 # Get top 3 most relevant repositories using full data
                 top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3)
                 show_top_section = gr.update(visible=not top_repos.empty)
                 logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
+                return format_dataframe_for_display(analyzed_df), final_status, format_dataframe_for_display(top_repos), show_top_section
             except Exception as e:
                 logger.error(f"Error in batch analysis: {e}")

app_old.py DELETED Viewed

@@ -1,481 +0,0 @@
-import gradio as gr
-import regex as re
-import csv
-import pandas as pd
-from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
-from hf_utils import download_filtered_space_files, search_top_spaces
-from chatbot_page import chat_with_user, extract_keywords_from_conversation
-# Import chatbot logic
-from analyzer import analyze_code
-# Chatbot system prompt
-CHATBOT_SYSTEM_PROMPT = (
-    "You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
-    "Ask questions to clarify what they want, their use case, preferred language, features, etc. "
-    "When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
-    "Return only the keywords as a comma-separated list."
-)
-# Initial assistant message for chatbot
-CHATBOT_INITIAL_MESSAGE = "Hello! Please tell me about your ideal Hugging Face repo. What use case, preferred language, or features are you looking for?"
-def read_csv_as_text(csv_filename):
-    return pd.read_csv(csv_filename, dtype=str)
-def process_repo_input(text):
-    if not text:
-        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-    # Split by newlines and commas, strip whitespace
-    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
-    # Write to CSV
-    csv_filename = "repo_ids.csv"
-    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-        for repo_id in repo_ids:
-            writer.writerow([repo_id, "", "", "", ""])
-    # Read the CSV into a DataFrame to display
-    df = read_csv_as_text(csv_filename)
-    return df
-# Store the last entered repo ids and the current index in global variables for button access
-last_repo_ids = []
-current_repo_idx = 0
-# Store extracted keywords for the chatbot flow
-generated_keywords = []
-def process_repo_input_and_store(text):
-    global last_repo_ids, current_repo_idx
-    if not text:
-        last_repo_ids = []
-        current_repo_idx = 0
-        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
-    last_repo_ids = repo_ids
-    current_repo_idx = 0
-    csv_filename = "repo_ids.csv"
-    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-        for repo_id in repo_ids:
-            writer.writerow([repo_id, "", "", "", ""])
-    df = read_csv_as_text(csv_filename)
-    return df
-def keyword_search_and_update(keyword):
-    global last_repo_ids, current_repo_idx
-    if not keyword:
-        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-    # Accept multiple keywords, comma or newline separated
-    keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
-    repo_ids = []
-    for kw in keyword_list:
-        repo_ids.extend(search_top_spaces(kw, limit=5))
-    # Remove duplicates while preserving order
-    seen = set()
-    unique_repo_ids = []
-    for rid in repo_ids:
-        if rid not in seen:
-            unique_repo_ids.append(rid)
-            seen.add(rid)
-    last_repo_ids = unique_repo_ids
-    current_repo_idx = 0
-    csv_filename = "repo_ids.csv"
-    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-        for repo_id in unique_repo_ids:
-            writer.writerow([repo_id, "", "", "", ""])
-    df = read_csv_as_text(csv_filename)
-    return df
-def show_combined_repo_and_llm():
-    global current_repo_idx
-    if not last_repo_ids:
-        return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
-    if current_repo_idx >= len(last_repo_ids):
-        return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
-    repo_id = last_repo_ids[current_repo_idx]
-    try:
-        download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
-    except Exception as e:
-        return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
-    txt_path = combine_repo_files_for_llm()
-    try:
-        with open(txt_path, "r", encoding="utf-8") as f:
-            combined_content = f.read()
-    except Exception as e:
-        return f"Error reading {txt_path}: {e}", "", read_csv_as_text("repo_ids.csv")
-    llm_output = analyze_combined_file(txt_path)
-    # Extract only the last JSON object (final summary) for CSV writing
-    last_start = llm_output.rfind('{')
-    last_end = llm_output.rfind('}')
-    if last_start != -1 and last_end != -1 and last_end > last_start:
-        final_json_str = llm_output[last_start:last_end+1]
-    else:
-        final_json_str = llm_output
-    llm_json = parse_llm_json_response(final_json_str)
-    # Update CSV for the current repo id
-    csv_filename = "repo_ids.csv"
-    extraction_status = ""
-    strengths = ""
-    weaknesses = ""
-    try:
-        df = read_csv_as_text(csv_filename)
-        for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
-            df[col] = df[col].astype(str)
-        updated = False
-        for idx, row in df.iterrows():
-            if row["repo id"] == repo_id:
-                if isinstance(llm_json, dict) and "error" not in llm_json:
-                    extraction_status = "JSON extraction: SUCCESS"
-                    strengths = llm_json.get("strength", "")
-                    weaknesses = llm_json.get("weaknesses", "")
-                    df.at[idx, "strength"] = strengths
-                    df.at[idx, "weaknesses"] = weaknesses
-                    df.at[idx, "speciality"] = llm_json.get("speciality", "")
-                    df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
-                    updated = True
-                else:
-                    extraction_status = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
-                break
-        # If not updated (repo_id not found), append a new row
-        if not updated and isinstance(llm_json, dict) and "error" not in llm_json:
-            extraction_status = "JSON extraction: SUCCESS (new row)"
-            strengths = llm_json.get("strength", "")
-            weaknesses = llm_json.get("weaknesses", "")
-            new_row = {
-                "repo id": repo_id,
-                "strength": strengths,
-                "weaknesses": weaknesses,
-                "speciality": llm_json.get("speciality", ""),
-                "relevance rating": llm_json.get("relevance rating", "")
-            }
-            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
-        df.to_csv(csv_filename, index=False)
-    except Exception as e:
-        df = read_csv_as_text(csv_filename)
-        extraction_status = f"CSV update error: {e}"
-    # Move to next repo for next click
-    current_repo_idx += 1
-    summary = f"{extraction_status}\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
-    return combined_content, summary, df
-def go_to_analysis():
-    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
-def go_to_input():
-    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
-def go_to_chatbot():
-    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
-def go_to_start():
-    return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
-def go_to_results():
-    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
-repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
-df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
-    datatype=["str", "str", "str", "str", "str", "str"]
-)
-def use_keywords_to_search_and_update_csv(keywords):
-    global last_repo_ids, current_repo_idx
-    if not keywords:
-        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-    # Split keywords and search for each
-    keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
-    repo_ids = []
-    for kw in keyword_list:
-        repo_ids.extend(search_top_spaces(kw, limit=3))  # limit=3 per keyword
-    # Remove duplicates while preserving order
-    seen = set()
-    unique_repo_ids = []
-    for rid in repo_ids:
-        if rid not in seen:
-            unique_repo_ids.append(rid)
-            seen.add(rid)
-    last_repo_ids = unique_repo_ids
-    current_repo_idx = 0
-    csv_filename = "repo_ids.csv"
-    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
-        for repo_id in unique_repo_ids:
-            writer.writerow([repo_id, "", "", "", ""])
-    df = read_csv_as_text(csv_filename)
-    return df
-def batch_analyze_and_select_top():
-    csv_filename = "repo_ids.csv"
-    try:
-        df = read_csv_as_text(csv_filename)
-        all_infos = []
-        # Analyze each repo and update CSV
-        for idx, row in df.iterrows():
-            repo_id = row["repo id"]
-            try:
-                download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
-                txt_path = combine_repo_files_for_llm()
-                llm_output = analyze_combined_file(txt_path)
-                last_start = llm_output.rfind('{')
-                last_end = llm_output.rfind('}')
-                if last_start != -1 and last_end != -1 and last_end > last_start:
-                    final_json_str = llm_output[last_start:last_end+1]
-                else:
-                    final_json_str = llm_output
-                llm_json = parse_llm_json_response(final_json_str)
-                if isinstance(llm_json, dict) and "error" not in llm_json:
-                    df.at[idx, "strength"] = llm_json.get("strength", "")
-                    df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
-                    df.at[idx, "speciality"] = llm_json.get("speciality", "")
-                    df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
-                all_infos.append({"repo id": repo_id, **llm_json})
-            except Exception as e:
-                all_infos.append({"repo id": repo_id, "error": str(e)})
-        df.to_csv(csv_filename, index=False)
-        # Display all info
-        all_info_str = "\n\n".join([str(info) for info in all_infos])
-        # Let LLM choose the best 3
-        from openai import OpenAI
-        import os
-        client = OpenAI(api_key=os.getenv("modal_api"))
-        client.base_url = os.getenv("base_url")
-        selection_prompt = (
-            "You are a helpful assistant. You are given a list of repo analyses in JSON format. "
-            "Choose the 3 repos that are the most impressive, relevant, or useful. "
-            "Return ONLY a JSON array of the 3 best repo ids, in order of preference, under the key 'top_repos'. "
-            "Example: {\"top_repos\": [\"repo1\", \"repo2\", \"repo3\"]}"
-        )
-        user_content = "Here are the repo analyses:\n" + all_info_str
-        response = client.chat.completions.create(
-            model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
-            messages=[
-                {"role": "system", "content": selection_prompt},
-                {"role": "user", "content": user_content}
-            ],
-            max_tokens=256,
-            temperature=0.3
-        )
-        selection_json = parse_llm_json_response(response.choices[0].message.content)
-        top_repos = selection_json.get("top_repos", [])
-        return all_info_str, str(top_repos), df
-    except Exception as e:
-        return f"Error in batch analysis: {e}", "", pd.DataFrame()
-def batch_analyze_and_select_top_for_chat(state):
-    csv_filename = "repo_ids.csv"
-    try:
-        df = read_csv_as_text(csv_filename)
-        all_infos = []
-        for idx, row in df.iterrows():
-            repo_id = row["repo id"]
-            try:
-                download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
-                txt_path = combine_repo_files_for_llm()
-                llm_output = analyze_combined_file(txt_path)
-                last_start = llm_output.rfind('{')
-                last_end = llm_output.rfind('}')
-                if last_start != -1 and last_end != -1 and last_end > last_start:
-                    final_json_str = llm_output[last_start:last_end+1]
-                else:
-                    final_json_str = llm_output
-                llm_json = parse_llm_json_response(final_json_str)
-                if isinstance(llm_json, dict) and "error" not in llm_json:
-                    df.at[idx, "strength"] = llm_json.get("strength", "")
-                    df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
-                    df.at[idx, "speciality"] = llm_json.get("speciality", "")
-                    df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
-                all_infos.append({"repo id": repo_id, **llm_json})
-            except Exception as e:
-                all_infos.append({"repo id": repo_id, "error": str(e)})
-        df.to_csv(csv_filename, index=False)
-        all_info_str = "\n\n".join([str(info) for info in all_infos])
-        from openai import OpenAI
-        import os
-        client = OpenAI(api_key=os.getenv("modal_api"))
-        client.base_url = os.getenv("base_url")
-        selection_prompt = (
-            "You are a helpful assistant. You are given a list of repo analyses in JSON format. "
-            "Choose the 3 repos that are the most impressive, relevant, or useful. "
-            "Return ONLY a JSON array of the 3 best repo ids, in order of preference, under the key 'top_repos'. "
-            "Example: {\"top_repos\": [\"repo1\", \"repo2\", \"repo3\"]}"
-        )
-        user_content = "Here are the repo analyses:\n" + all_info_str
-        response = client.chat.completions.create(
-            model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
-            messages=[
-                {"role": "system", "content": selection_prompt},
-                {"role": "user", "content": user_content}
-            ],
-            max_tokens=256,
-            temperature=0.3
-        )
-        selection_json = parse_llm_json_response(response.choices[0].message.content)
-        top_repos = selection_json.get("top_repos", [])
-        # Add a new assistant message to the chat state
-        new_message = ("", f"The top 3 repo IDs are: {', '.join(top_repos)}")
-        if state is None:
-            state = []
-        state = state + [list(new_message)]
-        return state
-    except Exception as e:
-        new_message = ("", f"Error in batch analysis: {e}")
-        if state is None:
-            state = []
-        state = state + [list(new_message)]
-        return state
-with gr.Blocks() as demo:
-    page_state = gr.State(0)
-    # --- Start Page: Option Selection ---
-    with gr.Column(visible=True) as start_page:
-        gr.Markdown("## Welcome! How would you like to proceed?")
-        option_a_btn = gr.Button("A) I know which repos I want to search and research about")
-        option_b_btn = gr.Button("B) I don't know exactly what I want (Chatbot)")
-    # --- Page 1: Input ---
-    with gr.Column(visible=False) as input_page:
-        gr.Markdown("## Enter Keyword or Repo IDs")
-        keyword_input = gr.Textbox(label="Enter keywords to search repos (comma or newline separated)", lines=2, placeholder="e.g. audio, vision\ntext")
-        keyword_btn = gr.Button("Search and Update Repo List")
-        repo_id_box = repo_id_input.render()
-        df_box = df_output.render()
-        submit_btn = gr.Button("Submit Repo IDs")
-        next_btn = gr.Button("Next: Go to Analysis")
-        back_to_start_btn = gr.Button("Back to Start")
-    # --- Page 2: Analysis ---
-    with gr.Column(visible=False) as analysis_page:
-        gr.Markdown("## Combine and Display Repo Files")
-        combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
-        combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
-        llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
-        df_display = gr.Dataframe(
-            headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
-            datatype=["str", "str", "str", "str", "str", "str"]
-        )
-        back_btn = gr.Button("Back to Input")
-        back_to_start_btn2 = gr.Button("Back to Start")
-    # --- Page 3: Chatbot ---
-    with gr.Column(visible=False) as chatbot_page:
-        gr.Markdown("## Repo Recommendation Chatbot")
-        chatbot = gr.Chatbot()
-        state = gr.State([])
-        user_input = gr.Textbox(label="Your message", placeholder="Describe your ideal repo or answer the assistant's questions...")
-        send_btn = gr.Button("Send")
-        end_btn = gr.Button("End Chat and Extract Keywords")
-        keywords_output = gr.Textbox(label="Extracted Keywords for Repo Search", interactive=False)
-        go_to_results_btn = gr.Button("Find Repos with These Keywords")
-        back_to_start_btn3 = gr.Button("Back to Start")
-    # --- Page 4: Results after Chatbot ---
-    with gr.Column(visible=False) as results_page:
-        gr.Markdown("## Repo Results Based on Your Conversation")
-        results_df = gr.Dataframe(
-            headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
-            datatype=["str", "str", "str", "str", "str", "str"]
-        )
-        analyze_next_btn = gr.Button("Download, Combine & Analyze Next Repo")
-        combined_txt_results = gr.Textbox(label="Combined Repo Files", lines=20)
-        llm_output_txt_results = gr.Textbox(label="LLM Analysis Output", lines=10)
-        back_to_start_btn4 = gr.Button("Back to Start")
-        go_to_batch_btn = gr.Button("Go to Batch Analysis Page", visible=True)
-    # --- Page 5: Batch Analysis Page ---
-    with gr.Column(visible=False) as batch_page:
-        gr.Markdown("## Batch Analysis & Top 3 Selection")
-        batch_btn = gr.Button("Batch Analyze All & Select Top 3", visible=True)
-        batch_info_txt = gr.Textbox(label="All Repo Analyses", lines=10)
-        top3_txt = gr.Textbox(label="Top 3 Repo IDs", lines=1)
-        show_top3_chat_btn = gr.Button("Show Top 3 Repo IDs in Chat", visible=True)
-        show_top3_page_btn = gr.Button("Show Top 3 Repos on New Page", visible=True)
-        back_to_results_from_batch_btn = gr.Button("Back to Results")
-    # --- Page 6: Top 3 Repos Page ---
-    with gr.Column(visible=False) as top3_page:
-        gr.Markdown("## Top 3 Recommended Repos")
-        top3_df = gr.Dataframe(headers=["repo id"], datatype=["str"])
-        back_to_results_btn = gr.Button("Back to Results")
-    # Navigation logic
-    option_a_btn.click(go_to_input, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    option_b_btn.click(
-        lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), [["", CHATBOT_INITIAL_MESSAGE]]),
-        inputs=None,
-        outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page, state]
-    )
-    next_btn.click(go_to_analysis, inputs=None, outputs=[input_page, analysis_page, chatbot_page, results_page, batch_page, top3_page])
-    back_btn.click(go_to_input, inputs=None, outputs=[input_page, analysis_page, chatbot_page, results_page, batch_page, top3_page])
-    back_to_start_btn.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    back_to_start_btn2.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    back_to_start_btn3.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    back_to_start_btn4.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    go_to_batch_btn.click(lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)), inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    back_to_results_from_batch_btn.click(lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)), inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    back_to_results_btn.click(lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)), inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page])
-    # Keyword and repo input logic
-    keyword_btn.click(keyword_search_and_update, inputs=keyword_input, outputs=df_box)
-    submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)
-    # Analysis logic
-    combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])
-    # Chatbot logic
-    def user_send(user_message, history):
-        assistant_reply = chat_with_user(user_message, history)
-        history = history + [[user_message, assistant_reply]]
-        return history, history, ""
-    def end_chat(history):
-        keywords = extract_keywords_from_conversation(history)
-        global generated_keywords
-        generated_keywords.clear()
-        generated_keywords.extend([k.strip() for k in keywords.split(",") if k.strip()])
-        return keywords
-    def go_to_results_from_chatbot(keywords):
-        # Use the keywords to search and update the CSV, then display the DataFrame
-        df = use_keywords_to_search_and_update_csv(keywords)
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), df
-    send_btn.click(user_send, inputs=[user_input, state], outputs=[chatbot, state, user_input])
-    end_btn.click(end_chat, inputs=state, outputs=keywords_output)
-    go_to_results_btn.click(
-        go_to_results_from_chatbot,
-        inputs=keywords_output,
-        outputs=[chatbot_page, input_page, analysis_page, results_page, batch_page, top3_page, results_df]
-    )
-    # Add logic for the new button on results_page
-    analyze_next_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt_results, llm_output_txt_results, results_df])
-    batch_btn.click(batch_analyze_and_select_top, inputs=None, outputs=[batch_info_txt, top3_txt, df_output])
-    show_top3_chat_btn.click(batch_analyze_and_select_top_for_chat, inputs=[state], outputs=[state])
-    def show_top3_page():
-        # Run batch analysis, get top 3, save to CSV, and return DataFrame
-        all_info_str, top3_str, df = batch_analyze_and_select_top()
-        import pandas as pd
-        import ast
-        try:
-            top3_ids = ast.literal_eval(top3_str)
-            if isinstance(top3_ids, str):
-                top3_ids = [top3_ids]
-        except Exception:
-            top3_ids = []
-        top3_df_data = pd.DataFrame({"repo id": top3_ids})
-        top3_df_data.to_csv("top3_repos.csv", index=False)
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), top3_df_data
-    show_top3_page_btn.click(show_top3_page, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page, batch_page, top3_page, top3_df])
-demo.launch()

hf_utils.py CHANGED Viewed

@@ -35,6 +35,10 @@ def download_filtered_space_files(space_id: str, local_dir: str = "repo_files",
                 rel_path = os.path.relpath(src_file, repo_path)
                 dest_file = os.path.join(local_dir, rel_path)
                 os.makedirs(os.path.dirname(dest_file), exist_ok=True)
                 shutil.copy2(src_file, dest_file)
                 copied_files += 1

                 rel_path = os.path.relpath(src_file, repo_path)
                 dest_file = os.path.join(local_dir, rel_path)
                 os.makedirs(os.path.dirname(dest_file), exist_ok=True)
+                # Debug: Show exactly which file is being downloaded
+                print(f"DEBUG: Downloading file: {rel_path}")
                 shutil.copy2(src_file, dest_file)
                 copied_files += 1