Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Running

File size: 5,510 Bytes

1efd29f
6245b3b
b138e3b
 
1a943f1
5b7f342
 
e0b6f12
1efd29f
10a33ac
 
 
63c0f13
 
b138e3b
63c0f13
 
b138e3b
 
 
 
 
 
 
 
10a33ac
b138e3b
1efd29f
3414412
5b7f342
3414412
5b7f342
 
3414412
5b7f342
 
3414412
5b7f342
 
 
3414412
5b7f342
 
 
 
 
 
10a33ac
5b7f342
 
791be58
3414412
5b7f342
c933ec3
3414412
10a33ac
3414412
5b7f342
3414412
5b7f342
10a33ac
5b7f342
 
 
791be58
5b7f342
10a33ac
791be58
1a943f1
3414412
1a943f1
b0690d0
 
 
1a943f1
10a33ac
aeebb75
 
1a943f1
3414412
b0690d0
 
 
 
 
 
1a943f1
 
b0690d0
 
1a943f1
 
 
10a33ac
b0690d0
3414412
 
b0690d0
 
5b7f342
 
 
 
 
 
 
 
 
 
 
 
 
3414412
5b7f342
791be58
aeebb75
b0690d0
aeebb75
 
5b7f342
1efd29f

import gradio as gr
import regex as re
import csv
import pandas as pd
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
from hf_utils import download_space_repo

# from hf_utils import download_space_repo

def read_csv_as_text(csv_filename):
    return pd.read_csv(csv_filename, dtype=str)

def process_repo_input(text):
    if not text:
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    # Split by newlines and commas, strip whitespace
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    # Write to CSV
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    # Read the CSV into a DataFrame to display
    df = read_csv_as_text(csv_filename)
    return df

# Store the last entered repo ids and the current index in global variables for button access
last_repo_ids = []
current_repo_idx = 0

def process_repo_input_and_store(text):
    global last_repo_ids, current_repo_idx
    if not text:
        last_repo_ids = []
        current_repo_idx = 0
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    last_repo_ids = repo_ids
    current_repo_idx = 0
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    df = read_csv_as_text(csv_filename)
    return df

def show_combined_repo_and_llm():
    global current_repo_idx
    if not last_repo_ids:
        return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
    if current_repo_idx >= len(last_repo_ids):
        return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
    repo_id = last_repo_ids[current_repo_idx]
    try:
        download_space_repo(repo_id, local_dir="repo_files")
    except Exception as e:
        return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
    txt_path = combine_repo_files_for_llm()
    try:
        with open(txt_path, "r", encoding="utf-8") as f:
            combined_content = f.read()
    except Exception as e:
        return f"Error reading {txt_path}: {e}", "", read_csv_as_text("repo_ids.csv")
    llm_output = analyze_combined_file(txt_path)
    llm_json = parse_llm_json_response(llm_output)
    # Update CSV for the current repo id
    csv_filename = "repo_ids.csv"
    extraction_status = ""
    strengths = ""
    weaknesses = ""
    try:
        df = read_csv_as_text(csv_filename)
        for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
            df[col] = df[col].astype(str)
        for idx, row in df.iterrows():
            if row["repo id"] == repo_id:
                if isinstance(llm_json, dict) and "error" not in llm_json:
                    extraction_status = "JSON extraction: SUCCESS"
                    strengths = llm_json.get("strength", "")
                    weaknesses = llm_json.get("weaknesses", "")
                    df.at[idx, "strength"] = strengths
                    df.at[idx, "weaknesses"] = weaknesses
                    df.at[idx, "speciality"] = llm_json.get("speciality", "")
                    df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
                else:
                    extraction_status = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
                break
        df.to_csv(csv_filename, index=False)
    except Exception as e:
        df = read_csv_as_text(csv_filename)
        extraction_status = f"CSV update error: {e}"
    # Move to next repo for next click
    current_repo_idx += 1
    summary = f"{extraction_status}\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
    return combined_content, summary, df

repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"])

with gr.Blocks() as demo:
    gr.Markdown("## Repo ID Input")
    repo_id_box = repo_id_input.render()
    df_box = df_output.render()
    submit_btn = gr.Button("Submit Repo IDs")
    submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)

    gr.Markdown("---")
    gr.Markdown("## Combine and Display Repo Files")
    combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
    combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
    llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
    df_display = gr.Dataframe(
        headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"]
    )
    combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])

demo.launch()