File size: 5,510 Bytes
1efd29f 6245b3b b138e3b 1a943f1 5b7f342 e0b6f12 1efd29f 10a33ac 63c0f13 b138e3b 63c0f13 b138e3b 10a33ac b138e3b 1efd29f 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 3414412 5b7f342 10a33ac 5b7f342 791be58 3414412 5b7f342 c933ec3 3414412 10a33ac 3414412 5b7f342 3414412 5b7f342 10a33ac 5b7f342 791be58 5b7f342 10a33ac 791be58 1a943f1 3414412 1a943f1 b0690d0 1a943f1 10a33ac aeebb75 1a943f1 3414412 b0690d0 1a943f1 b0690d0 1a943f1 10a33ac b0690d0 3414412 b0690d0 5b7f342 3414412 5b7f342 791be58 aeebb75 b0690d0 aeebb75 5b7f342 1efd29f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio as gr
import regex as re
import csv
import pandas as pd
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
from hf_utils import download_space_repo
# from hf_utils import download_space_repo
def read_csv_as_text(csv_filename):
return pd.read_csv(csv_filename, dtype=str)
def process_repo_input(text):
if not text:
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
# Split by newlines and commas, strip whitespace
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
# Write to CSV
csv_filename = "repo_ids.csv"
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
for repo_id in repo_ids:
writer.writerow([repo_id, "", "", "", ""])
# Read the CSV into a DataFrame to display
df = read_csv_as_text(csv_filename)
return df
# Store the last entered repo ids and the current index in global variables for button access
last_repo_ids = []
current_repo_idx = 0
def process_repo_input_and_store(text):
global last_repo_ids, current_repo_idx
if not text:
last_repo_ids = []
current_repo_idx = 0
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
last_repo_ids = repo_ids
current_repo_idx = 0
csv_filename = "repo_ids.csv"
with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
for repo_id in repo_ids:
writer.writerow([repo_id, "", "", "", ""])
df = read_csv_as_text(csv_filename)
return df
def show_combined_repo_and_llm():
global current_repo_idx
if not last_repo_ids:
return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
if current_repo_idx >= len(last_repo_ids):
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
repo_id = last_repo_ids[current_repo_idx]
try:
download_space_repo(repo_id, local_dir="repo_files")
except Exception as e:
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
txt_path = combine_repo_files_for_llm()
try:
with open(txt_path, "r", encoding="utf-8") as f:
combined_content = f.read()
except Exception as e:
return f"Error reading {txt_path}: {e}", "", read_csv_as_text("repo_ids.csv")
llm_output = analyze_combined_file(txt_path)
llm_json = parse_llm_json_response(llm_output)
# Update CSV for the current repo id
csv_filename = "repo_ids.csv"
extraction_status = ""
strengths = ""
weaknesses = ""
try:
df = read_csv_as_text(csv_filename)
for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
df[col] = df[col].astype(str)
for idx, row in df.iterrows():
if row["repo id"] == repo_id:
if isinstance(llm_json, dict) and "error" not in llm_json:
extraction_status = "JSON extraction: SUCCESS"
strengths = llm_json.get("strength", "")
weaknesses = llm_json.get("weaknesses", "")
df.at[idx, "strength"] = strengths
df.at[idx, "weaknesses"] = weaknesses
df.at[idx, "speciality"] = llm_json.get("speciality", "")
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
else:
extraction_status = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
break
df.to_csv(csv_filename, index=False)
except Exception as e:
df = read_csv_as_text(csv_filename)
extraction_status = f"CSV update error: {e}"
# Move to next repo for next click
current_repo_idx += 1
summary = f"{extraction_status}\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
return combined_content, summary, df
repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"])
with gr.Blocks() as demo:
gr.Markdown("## Repo ID Input")
repo_id_box = repo_id_input.render()
df_box = df_output.render()
submit_btn = gr.Button("Submit Repo IDs")
submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)
gr.Markdown("---")
gr.Markdown("## Combine and Display Repo Files")
combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
df_display = gr.Dataframe(
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"]
)
combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])
demo.launch() |