Spaces:
Sleeping
Sleeping
import gradio as gr | |
import regex as re | |
import csv | |
import pandas as pd | |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response | |
from hf_utils import download_space_repo | |
# from hf_utils import download_space_repo | |
def read_csv_as_text(csv_filename): | |
return pd.read_csv(csv_filename, dtype=str) | |
def process_repo_input(text): | |
if not text: | |
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) | |
# Split by newlines and commas, strip whitespace | |
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()] | |
# Write to CSV | |
csv_filename = "repo_ids.csv" | |
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile: | |
writer = csv.writer(csvfile) | |
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) | |
for repo_id in repo_ids: | |
writer.writerow([repo_id, "", "", "", ""]) | |
# Read the CSV into a DataFrame to display | |
df = read_csv_as_text(csv_filename) | |
return df | |
# Store the last entered repo ids and the current index in global variables for button access | |
last_repo_ids = [] | |
current_repo_idx = 0 | |
def process_repo_input_and_store(text): | |
global last_repo_ids, current_repo_idx | |
if not text: | |
last_repo_ids = [] | |
current_repo_idx = 0 | |
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) | |
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()] | |
last_repo_ids = repo_ids | |
current_repo_idx = 0 | |
csv_filename = "repo_ids.csv" | |
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile: | |
writer = csv.writer(csvfile) | |
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) | |
for repo_id in repo_ids: | |
writer.writerow([repo_id, "", "", "", ""]) | |
df = read_csv_as_text(csv_filename) | |
return df | |
def show_combined_repo_and_llm(): | |
global current_repo_idx | |
if not last_repo_ids: | |
return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame() | |
if current_repo_idx >= len(last_repo_ids): | |
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv") | |
repo_id = last_repo_ids[current_repo_idx] | |
try: | |
download_space_repo(repo_id, local_dir="repo_files") | |
except Exception as e: | |
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv") | |
txt_path = combine_repo_files_for_llm() | |
try: | |
with open(txt_path, "r", encoding="utf-8") as f: | |
combined_content = f.read() | |
except Exception as e: | |
return f"Error reading {txt_path}: {e}", "", read_csv_as_text("repo_ids.csv") | |
llm_output = analyze_combined_file(txt_path) | |
llm_json = parse_llm_json_response(llm_output) | |
# Update CSV for the current repo id | |
csv_filename = "repo_ids.csv" | |
extraction_status = "" | |
strengths = "" | |
weaknesses = "" | |
try: | |
df = read_csv_as_text(csv_filename) | |
for col in ["strength", "weaknesses", "speciality", "relevance rating"]: | |
df[col] = df[col].astype(str) | |
for idx, row in df.iterrows(): | |
if row["repo id"] == repo_id: | |
if isinstance(llm_json, dict) and "error" not in llm_json: | |
extraction_status = "JSON extraction: SUCCESS" | |
strengths = llm_json.get("strength", "") | |
weaknesses = llm_json.get("weaknesses", "") | |
df.at[idx, "strength"] = strengths | |
df.at[idx, "weaknesses"] = weaknesses | |
df.at[idx, "speciality"] = llm_json.get("speciality", "") | |
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "") | |
else: | |
extraction_status = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}" | |
break | |
df.to_csv(csv_filename, index=False) | |
except Exception as e: | |
df = read_csv_as_text(csv_filename) | |
extraction_status = f"CSV update error: {e}" | |
# Move to next repo for next click | |
current_repo_idx += 1 | |
summary = f"{extraction_status}\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}" | |
return combined_content, summary, df | |
repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3") | |
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"]) | |
with gr.Blocks() as demo: | |
gr.Markdown("## Repo ID Input") | |
repo_id_box = repo_id_input.render() | |
df_box = df_output.render() | |
submit_btn = gr.Button("Submit Repo IDs") | |
submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box) | |
gr.Markdown("---") | |
gr.Markdown("## Combine and Display Repo Files") | |
combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze") | |
combined_txt = gr.Textbox(label="Combined Repo Files", lines=20) | |
llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10) | |
df_display = gr.Dataframe( | |
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"] | |
) | |
combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display]) | |
demo.launch() |