File size: 5,092 Bytes
1efd29f
6245b3b
b138e3b
 
1a943f1
5b7f342
 
e0b6f12
1efd29f
63c0f13
 
b138e3b
63c0f13
 
b138e3b
 
 
 
 
 
 
 
 
 
1efd29f
3414412
5b7f342
3414412
5b7f342
 
3414412
5b7f342
 
3414412
5b7f342
 
 
3414412
5b7f342
 
 
 
 
 
 
 
 
791be58
3414412
5b7f342
3414412
 
 
 
5b7f342
3414412
5b7f342
3414412
5b7f342
 
 
791be58
5b7f342
3414412
791be58
1a943f1
3414412
1a943f1
 
 
3414412
aeebb75
 
 
1a943f1
3414412
 
1a943f1
 
 
 
 
 
 
 
3414412
 
 
 
aeebb75
5b7f342
 
 
 
 
 
 
 
 
 
 
 
 
3414412
5b7f342
791be58
aeebb75
 
 
 
 
5b7f342
1efd29f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import regex as re
import csv
import pandas as pd
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
from hf_utils import download_space_repo

# from hf_utils import download_space_repo

def process_repo_input(text):
    if not text:
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    # Split by newlines and commas, strip whitespace
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    # Write to CSV
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    # Read the CSV into a DataFrame to display
    df = pd.read_csv(csv_filename)
    return df

# Store the last entered repo ids and the current index in global variables for button access
last_repo_ids = []
current_repo_idx = 0

def process_repo_input_and_store(text):
    global last_repo_ids, current_repo_idx
    if not text:
        last_repo_ids = []
        current_repo_idx = 0
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    last_repo_ids = repo_ids
    current_repo_idx = 0
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    df = pd.read_csv(csv_filename)
    return df

def show_combined_repo_and_llm():
    global current_repo_idx
    if not last_repo_ids:
        return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame(), None
    if current_repo_idx >= len(last_repo_ids):
        return "All repo IDs have been processed.", "", pd.read_csv("repo_ids.csv"), None
    repo_id = last_repo_ids[current_repo_idx]
    try:
        download_space_repo(repo_id, local_dir="repo_files")
    except Exception as e:
        return f"Error downloading repo: {e}", "", pd.read_csv("repo_ids.csv"), None
    txt_path = combine_repo_files_for_llm()
    try:
        with open(txt_path, "r", encoding="utf-8") as f:
            combined_content = f.read()
    except Exception as e:
        return f"Error reading {txt_path}: {e}", "", pd.read_csv("repo_ids.csv"), None
    llm_output = analyze_combined_file(txt_path)
    llm_json = parse_llm_json_response(llm_output)
    # Update CSV for the current repo id
    csv_filename = "repo_ids.csv"
    try:
        df = pd.read_csv(csv_filename)
        highlight_idx = None
        # Cast columns to string to avoid dtype issues
        for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
            df[col] = df[col].astype(str)
        for idx, row in df.iterrows():
            if row["repo id"] == repo_id:
                highlight_idx = idx
                if isinstance(llm_json, dict):
                    df.at[idx, "strength"] = llm_json.get("strength", "")
                    df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
                    df.at[idx, "speciality"] = llm_json.get("speciality", "")
                    df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
                break
        df.to_csv(csv_filename, index=False)
    except Exception as e:
        df = pd.read_csv(csv_filename)
        highlight_idx = None
    # Move to next repo for next click
    current_repo_idx += 1
    return combined_content, llm_output, (df, [highlight_idx] if highlight_idx is not None else [])

repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"])

with gr.Blocks() as demo:
    gr.Markdown("## Repo ID Input")
    repo_id_box = repo_id_input.render()
    df_box = df_output.render()
    submit_btn = gr.Button("Submit Repo IDs")
    submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)

    gr.Markdown("---")
    gr.Markdown("## Combine and Display Repo Files")
    combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
    combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
    llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
    df_display = gr.Dataframe(
        headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
        highlight_row=True
    )
    combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])

demo.launch()