File size: 13,912 Bytes
1efd29f
6245b3b
b138e3b
 
1a943f1
bd91ae0
f84e71c
885c1f9
 
 
 
 
 
 
 
 
 
1efd29f
10a33ac
 
 
63c0f13
 
b138e3b
63c0f13
 
b138e3b
 
 
 
 
 
 
 
10a33ac
b138e3b
1efd29f
3414412
5b7f342
3414412
5b7f342
d27a85c
 
 
5b7f342
3414412
5b7f342
 
3414412
5b7f342
 
 
3414412
5b7f342
 
 
 
 
 
10a33ac
5b7f342
 
bd91ae0
 
 
 
6673deb
 
 
 
 
 
 
 
 
 
 
 
 
bd91ae0
 
 
 
 
6673deb
bd91ae0
 
 
 
791be58
3414412
5b7f342
c933ec3
3414412
10a33ac
3414412
5b7f342
3414412
5b7f342
10a33ac
5b7f342
 
 
791be58
5b7f342
10a33ac
791be58
1a943f1
3414412
1a943f1
b0690d0
 
 
1a943f1
10a33ac
aeebb75
 
1a943f1
3414412
b0690d0
 
 
 
 
 
1a943f1
 
b0690d0
 
1a943f1
 
 
10a33ac
b0690d0
3414412
 
b0690d0
 
5b7f342
e5959c0
d27a85c
e5959c0
 
d27a85c
5b2420c
 
d27a85c
5b2420c
 
d27a85c
 
 
 
e5959c0
5b7f342
e5959c0
d603a2f
e5959c0
5b7f342
f84e71c
 
885c1f9
d27a85c
 
 
 
ac1a436
 
 
 
 
 
 
 
 
 
 
 
 
d27a85c
 
 
 
 
ac1a436
d27a85c
 
 
 
5b7f342
e5959c0
 
5b2420c
 
 
 
 
 
e5959c0
5b2420c
e5959c0
6673deb
e5959c0
 
 
 
 
5b2420c
5b7f342
e5959c0
 
 
 
 
 
 
 
d603a2f
e5959c0
 
5b2420c
 
885c1f9
5b2420c
885c1f9
 
 
 
 
 
 
d27a85c
5b2420c
 
d27a85c
 
 
 
 
 
 
6673deb
 
 
d27a85c
 
5b2420c
d27a85c
 
 
 
 
 
 
 
e5959c0
 
bd91ae0
e5959c0
bd91ae0
e5959c0
aeebb75
5b7f342
885c1f9
 
 
 
 
 
 
 
d27a85c
 
 
885c1f9
 
d27a85c
 
 
 
 
885c1f9
 
d08a622
 
 
 
 
885c1f9
6673deb
 
 
1efd29f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import gradio as gr
import regex as re
import csv
import pandas as pd
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
from hf_utils import download_space_repo, search_top_spaces
from chatbot_page import chat_with_user, extract_keywords_from_conversation
# Import chatbot logic
from analyzer import analyze_code

# Chatbot system prompt
CHATBOT_SYSTEM_PROMPT = (
    "You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
    "Ask questions to clarify what they want, their use case, preferred language, features, etc. "
    "When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
    "Return only the keywords as a comma-separated list."
)

def read_csv_as_text(csv_filename):
    return pd.read_csv(csv_filename, dtype=str)

def process_repo_input(text):
    if not text:
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    # Split by newlines and commas, strip whitespace
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    # Write to CSV
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    # Read the CSV into a DataFrame to display
    df = read_csv_as_text(csv_filename)
    return df

# Store the last entered repo ids and the current index in global variables for button access
last_repo_ids = []
current_repo_idx = 0

# Store extracted keywords for the chatbot flow
generated_keywords = []

def process_repo_input_and_store(text):
    global last_repo_ids, current_repo_idx
    if not text:
        last_repo_ids = []
        current_repo_idx = 0
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    last_repo_ids = repo_ids
    current_repo_idx = 0
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    df = read_csv_as_text(csv_filename)
    return df

def keyword_search_and_update(keyword):
    global last_repo_ids, current_repo_idx
    if not keyword:
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    # Accept multiple keywords, comma or newline separated
    keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
    repo_ids = []
    for kw in keyword_list:
        repo_ids.extend(search_top_spaces(kw, limit=5))
    # Remove duplicates while preserving order
    seen = set()
    unique_repo_ids = []
    for rid in repo_ids:
        if rid not in seen:
            unique_repo_ids.append(rid)
            seen.add(rid)
    last_repo_ids = unique_repo_ids
    current_repo_idx = 0
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in unique_repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    df = read_csv_as_text(csv_filename)
    return df

def show_combined_repo_and_llm():
    global current_repo_idx
    if not last_repo_ids:
        return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
    if current_repo_idx >= len(last_repo_ids):
        return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
    repo_id = last_repo_ids[current_repo_idx]
    try:
        download_space_repo(repo_id, local_dir="repo_files")
    except Exception as e:
        return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
    txt_path = combine_repo_files_for_llm()
    try:
        with open(txt_path, "r", encoding="utf-8") as f:
            combined_content = f.read()
    except Exception as e:
        return f"Error reading {txt_path}: {e}", "", read_csv_as_text("repo_ids.csv")
    llm_output = analyze_combined_file(txt_path)
    llm_json = parse_llm_json_response(llm_output)
    # Update CSV for the current repo id
    csv_filename = "repo_ids.csv"
    extraction_status = ""
    strengths = ""
    weaknesses = ""
    try:
        df = read_csv_as_text(csv_filename)
        for col in ["strength", "weaknesses", "speciality", "relevance rating"]:
            df[col] = df[col].astype(str)
        for idx, row in df.iterrows():
            if row["repo id"] == repo_id:
                if isinstance(llm_json, dict) and "error" not in llm_json:
                    extraction_status = "JSON extraction: SUCCESS"
                    strengths = llm_json.get("strength", "")
                    weaknesses = llm_json.get("weaknesses", "")
                    df.at[idx, "strength"] = strengths
                    df.at[idx, "weaknesses"] = weaknesses
                    df.at[idx, "speciality"] = llm_json.get("speciality", "")
                    df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
                else:
                    extraction_status = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
                break
        df.to_csv(csv_filename, index=False)
    except Exception as e:
        df = read_csv_as_text(csv_filename)
        extraction_status = f"CSV update error: {e}"
    # Move to next repo for next click
    current_repo_idx += 1
    summary = f"{extraction_status}\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
    return combined_content, summary, df

def go_to_analysis():
    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

def go_to_input():
    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

def go_to_chatbot():
    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)

def go_to_start():
    return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

def go_to_results():
    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)

repo_id_input = gr.Textbox(label="Enter repo IDs (comma or newline separated)", lines=5, placeholder="repo1, repo2\nrepo3")
df_output = gr.Dataframe(headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
    datatype=["str", "str", "str", "str", "str", "str"]
)




def use_keywords_to_search_and_update_csv(keywords):
    global last_repo_ids, current_repo_idx
    if not keywords:
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    # Split keywords and search for each
    keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
    repo_ids = []
    for kw in keyword_list:
        repo_ids.extend(search_top_spaces(kw, limit=3))  # limit=3 per keyword
    # Remove duplicates while preserving order
    seen = set()
    unique_repo_ids = []
    for rid in repo_ids:
        if rid not in seen:
            unique_repo_ids.append(rid)
            seen.add(rid)
    last_repo_ids = unique_repo_ids
    current_repo_idx = 0
    csv_filename = "repo_ids.csv"
    with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        for repo_id in unique_repo_ids:
            writer.writerow([repo_id, "", "", "", ""])
    df = read_csv_as_text(csv_filename)
    return df

with gr.Blocks() as demo:
    page_state = gr.State(0)

    # --- Start Page: Option Selection ---
    with gr.Column(visible=True) as start_page:
        gr.Markdown("## Welcome! How would you like to proceed?")
        option_a_btn = gr.Button("A) I know which repos I want to search and research about")
        option_b_btn = gr.Button("B) I don't know exactly what I want (Chatbot)")

    # --- Page 1: Input ---
    with gr.Column(visible=False) as input_page:
        gr.Markdown("## Enter Keyword or Repo IDs")
        keyword_input = gr.Textbox(label="Enter keywords to search repos (comma or newline separated)", lines=2, placeholder="e.g. audio, vision\ntext")
        keyword_btn = gr.Button("Search and Update Repo List")
        repo_id_box = repo_id_input.render()
        df_box = df_output.render()
        submit_btn = gr.Button("Submit Repo IDs")
        next_btn = gr.Button("Next: Go to Analysis")
        back_to_start_btn = gr.Button("Back to Start")

    # --- Page 2: Analysis ---
    with gr.Column(visible=False) as analysis_page:
        gr.Markdown("## Combine and Display Repo Files")
        combine_btn = gr.Button("Download, Combine & Show .py/.md Files from Next Repo and Analyze")
        combined_txt = gr.Textbox(label="Combined Repo Files", lines=20)
        llm_output_txt = gr.Textbox(label="LLM Analysis Output", lines=10)
        df_display = gr.Dataframe(
            headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
            datatype=["str", "str", "str", "str", "str", "str"]
        )
        back_btn = gr.Button("Back to Input")
        back_to_start_btn2 = gr.Button("Back to Start")

    # --- Page 3: Chatbot ---
    with gr.Column(visible=False) as chatbot_page:
        gr.Markdown("## Repo Recommendation Chatbot")
        chatbot = gr.Chatbot()
        state = gr.State([])
        user_input = gr.Textbox(label="Your message", placeholder="Describe your ideal repo or answer the assistant's questions...")
        send_btn = gr.Button("Send")
        end_btn = gr.Button("End Chat and Extract Keywords")
        keywords_output = gr.Textbox(label="Extracted Keywords for Repo Search", interactive=False)
        go_to_results_btn = gr.Button("Find Repos with These Keywords")
        back_to_start_btn3 = gr.Button("Back to Start")

    # --- Page 4: Results after Chatbot ---
    with gr.Column(visible=False) as results_page:
        gr.Markdown("## Repo Results Based on Your Conversation")
        results_df = gr.Dataframe(
            headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating", "Usecase"],
            datatype=["str", "str", "str", "str", "str", "str"]
        )
        analyze_next_btn = gr.Button("Download, Combine & Analyze Next Repo")
        combined_txt_results = gr.Textbox(label="Combined Repo Files", lines=20)
        llm_output_txt_results = gr.Textbox(label="LLM Analysis Output", lines=10)
        back_to_start_btn4 = gr.Button("Back to Start")

    # Navigation logic
    option_a_btn.click(go_to_input, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
    option_b_btn.click(go_to_chatbot, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
    next_btn.click(go_to_analysis, inputs=None, outputs=[input_page, analysis_page, chatbot_page, results_page])
    back_btn.click(go_to_input, inputs=None, outputs=[input_page, analysis_page, chatbot_page, results_page])
    back_to_start_btn.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
    back_to_start_btn2.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
    back_to_start_btn3.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])
    back_to_start_btn4.click(go_to_start, inputs=None, outputs=[start_page, input_page, chatbot_page, results_page])

    # Keyword and repo input logic
    keyword_btn.click(keyword_search_and_update, inputs=keyword_input, outputs=df_box)
    submit_btn.click(process_repo_input_and_store, inputs=repo_id_box, outputs=df_box)

    # Analysis logic
    combine_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt, llm_output_txt, df_display])

    # Chatbot logic
    def user_send(user_message, history):
        assistant_reply = chat_with_user(user_message, history)
        history = history + [[user_message, assistant_reply]]
        return history, history, ""

    def end_chat(history):
        keywords = extract_keywords_from_conversation(history)
        global generated_keywords
        generated_keywords.clear()
        generated_keywords.extend([k.strip() for k in keywords.split(",") if k.strip()])
        return keywords

    def go_to_results_from_chatbot(keywords):
        # Use the keywords to search and update the CSV, then display the DataFrame
        df = use_keywords_to_search_and_update_csv(keywords)
        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), df

    send_btn.click(user_send, inputs=[user_input, state], outputs=[chatbot, state, user_input])
    end_btn.click(end_chat, inputs=state, outputs=keywords_output)
    go_to_results_btn.click(
        go_to_results_from_chatbot,
        inputs=keywords_output,
        outputs=[chatbot_page, input_page, analysis_page, results_page, results_df]
    )

    # Add logic for the new button on results_page
    analyze_next_btn.click(show_combined_repo_and_llm, inputs=None, outputs=[combined_txt_results, llm_output_txt_results, results_df])

demo.launch()