Spaces:
Sleeping
Sleeping
File size: 5,092 Bytes
1efd29f 6245b3b b138e3b 1a943f1 5b7f342 e0b6f12 1efd29f 63c0f13 b138e3b 63c0f13 b138e3b 1efd29f 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 791be58 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 791be58 5b7f342 3414412 791be58 1a943f1 3414412 1a943f1 3414412 aeebb75 1a943f1 3414412 1a943f1 3414412 aeebb75 5b7f342 3414412 5b7f342 791be58 aeebb75 5b7f342 1efd29f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
import regex as re
import csv
import pandas as pd
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
from hf_utils import download_space_repo
# from hf_utils import download_space_repo
def process_repo_input(text):
if not text:
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
# Split by newlines and commas, strip whitespace
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
# Write to CSV
csv_filename = "repo_ids.csv"
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
for repo_id in repo_ids:
writer.writerow([repo_id, "", "", "", ""])
# Read the CSV into a DataFrame to display
df = pd.read_csv(csv_filename)
return df
# Store the last entered repo ids and the current index in global variables for button access
last_repo_ids = []
current_repo_idx = 0
def process_repo_input_and_store(text):
global last_repo_ids, current_repo_idx
if not text:
last_repo_ids = []
current_repo_idx = 0
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
last_repo_ids = repo_ids
current_repo_idx = 0
csv_filename = "repo_ids.csv"
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
for repo_id in repo_ids:
writer.writerow([repo_id, "", "", "", ""])
df = pd.read_csv(csv_filename)
return df
def show_combined_repo_and_llm():
global current_repo_idx
if not last_repo_ids:
return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame(), None
if current_repo_idx >= len(last_repo_ids):
return "All repo IDs have been processed.", "", pd.read_csv("repo_ids.csv"), None
repo_id = last_repo_ids[current_repo_idx]
try:
download_space_repo(repo_id, local_dir="repo_files")
except Exception as e:
return f"Error downloading repo: {e}", "", pd.read_csv("repo_ids.csv"), None
txt_path = combine_repo_files_for_llm()
try:
with open(txt_path, "r", encoding="utf-8") as f:
combined_content = f.read()
except Exception as e:
return f"Error reading {txt_path}: {e}", "", pd.read_csv("repo_ids.csv"), None
llm_output = analyze_combined_file(txt_path)
llm_json = parse_llm_json_response(llm_output)
# Update CSV for the current repo id
csv_filename = "repo_ids.csv"
try:
df = pd.read_csv(csv_filename)
highlight_idx = None
# Cast columns to string to avoid dtype issues
for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
df[col] = df[col].astype(str)
for idx, row in df.iterrows():
if row["repo id"] == repo_id:
highlight_idx = idx
if isinstance(llm_json, dict):
df.at[idx, "strength"] = llm_json.get("strength", "")
df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
df.at[idx, "speciality"] = llm_json.get("speciality", "")
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
break
df.to_csv(csv_filename, index=False)
except Exception as e:
df = pd.read_csv(csv_filename)
highlight_idx = None
# Move to next repo for next click
current_repo_idx += 1
return combined_content, llm_output, (df, [highlight_idx] if highlight_idx is not None else [])
repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"])
with gr.Blocks() as demo:
gr.Markdown("## Repo ID Input")
repo_id_box = repo_id_input.render()
df_box = df_output.render()
submit_btn = gr.Button("Submit Repo IDs")
submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)
gr.Markdown("---")
gr.Markdown("## Combine and Display Repo Files")
combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
df_display = gr.Dataframe(
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
highlight_row=True
)
combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])
demo.launch() |