File size: 24,056 Bytes
1efd29f
6245b3b
b138e3b
 
f367387
 
 
 
 
 
 
 
 
 
 
bd91ae0
f84e71c
885c1f9
f367387
 
 
 
 
 
 
 
885c1f9
 
 
 
 
 
1efd29f
f367387
 
 
 
10a33ac
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b138e3b
5b7f342
f367387
 
 
 
 
 
 
 
 
 
d27a85c
f367387
 
5b7f342
f367387
 
5b7f342
f367387
5b7f342
f367387
 
 
 
 
 
 
 
5b7f342
3414412
f367387
 
5b7f342
791be58
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b7f342
f367387
 
 
 
 
1a943f1
10a33ac
3d185cb
f367387
1a943f1
3414412
f367387
 
 
 
 
 
1a943f1
f367387
 
3d185cb
 
f367387
 
 
 
3d185cb
 
f367387
1a943f1
f367387
 
1a943f1
f367387
 
f84e71c
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
885c1f9
f367387
 
 
d27a85c
f367387
 
ac1a436
f367387
ac1a436
f367387
 
ac1a436
 
 
 
 
 
 
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e410b86
 
f367387
 
 
 
 
 
 
 
e410b86
 
f367387
 
 
 
 
 
 
e410b86
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e410b86
f367387
 
e410b86
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9169bdf
 
f367387
 
 
 
 
 
 
 
e410b86
f367387
e410b86
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc78434
f367387
 
 
 
 
bc78434
f367387
 
 
 
 
 
 
e410b86
2c45946
e410b86
2c45946
 
9169bdf
2c45946
e410b86
2c45946
 
9169bdf
2c45946
e410b86
2c45946
 
9169bdf
2c45946
e410b86
9169bdf
2c45946
9169bdf
 
 
2c45946
9169bdf
2c45946
9169bdf
 
2c45946
9169bdf
 
2c45946
9169bdf
2c45946
e410b86
2c45946
 
9169bdf
e410b86
f367387
2c45946
4336f6d
e410b86
4336f6d
e410b86
f367387
e410b86
f367387
 
 
2c45946
4336f6d
e410b86
4336f6d
e410b86
f367387
e410b86
f367387
 
 
2c45946
4336f6d
e410b86
4336f6d
e410b86
f367387
e410b86
f367387
 
 
2c45946
4336f6d
e410b86
4336f6d
e410b86
f367387
e410b86
f367387
 
 
2c45946
4336f6d
e410b86
4336f6d
e410b86
f367387
e410b86
9169bdf
 
 
2c45946
9169bdf
e410b86
9169bdf
e410b86
9169bdf
e410b86
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
bc78434
f367387
 
 
bc78434
f367387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc78434
f367387
 
 
 
 
e5959c0
f367387
 
 
 
 
d27a85c
f367387
 
 
 
 
 
 
 
3fa421f
f367387
 
 
 
bc78434
f367387
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
import gradio as gr
import regex as re
import csv
import pandas as pd
from typing import Dict, List, Tuple, Optional, Any
import logging
from pathlib import Path
import os

from analyzer import (
    combine_repo_files_for_llm,
    analyze_combined_file,
    parse_llm_json_response,
    analyze_code
)
from hf_utils import download_space_repo, search_top_spaces
from chatbot_page import chat_with_user, extract_keywords_from_conversation

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Constants
CHATBOT_SYSTEM_PROMPT = (
    "You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
    "Ask questions to clarify what they want, their use case, preferred language, features, etc. "
    "When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
    "Return only the keywords as a comma-separated list."
)

CHATBOT_INITIAL_MESSAGE = (
    "Hello! Please tell me about your ideal Hugging Face repo. "
    "What use case, preferred language, or features are you looking for?"
)

# State management
class AppState:
    def __init__(self):
        self.repo_ids: List[str] = []
        self.current_repo_idx: int = 0
        self.generated_keywords: List[str] = []
        self.analysis_results: Dict[str, Dict[str, Any]] = {}
        self.chat_history: List[Tuple[str, str]] = []

    def reset(self):
        self.__init__()

# Helper functions
def read_csv_as_text(csv_filename: str) -> pd.DataFrame:
    """Read CSV file and return as DataFrame with string dtype."""
    try:
        return pd.read_csv(csv_filename, dtype=str)
    except Exception as e:
        logger.error(f"Error reading CSV file {csv_filename}: {e}")
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])

def write_repos_to_csv(repo_ids: List[str], csv_filename: str = "repo_ids.csv") -> None:
    """Write repo IDs to CSV file."""
    try:
        with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
            for repo_id in repo_ids:
                writer.writerow([repo_id, "", "", "", ""])
    except Exception as e:
        logger.error(f"Error writing to CSV file {csv_filename}: {e}")

def process_repo_input(text: str, state: AppState) -> pd.DataFrame:
    """Process input text containing repo IDs and update state."""
    if not text:
        state.repo_ids = []
        state.current_repo_idx = 0
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
    
    repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
    state.repo_ids = repo_ids
    state.current_repo_idx = 0
    
    write_repos_to_csv(repo_ids)
    return read_csv_as_text("repo_ids.csv")

def analyze_single_repo(repo_id: str) -> Tuple[str, str, Dict[str, Any]]:
    """Analyze a single repository and return combined content, summary, and analysis results."""
    try:
        download_space_repo(repo_id, local_dir="repo_files")
        txt_path = combine_repo_files_for_llm()
        
        with open(txt_path, "r", encoding="utf-8") as f:
            combined_content = f.read()
            
        llm_output = analyze_combined_file(txt_path)
        last_start = llm_output.rfind('{')
        last_end = llm_output.rfind('}')
        
        final_json_str = llm_output[last_start:last_end+1] if last_start != -1 and last_end != -1 and last_end > last_start else llm_output
        llm_json = parse_llm_json_response(final_json_str)
        
        if isinstance(llm_json, dict) and "error" not in llm_json:
            strengths = llm_json.get("strength", "")
            weaknesses = llm_json.get("weaknesses", "")
            summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
        else:
            summary = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
            
        return combined_content, summary, llm_json
        
    except Exception as e:
        logger.error(f"Error analyzing repo {repo_id}: {e}")
        return "", f"Error analyzing repo: {e}", {"error": str(e)}

def update_csv_with_analysis(repo_id: str, analysis_results: Dict[str, Any], csv_filename: str = "repo_ids.csv") -> pd.DataFrame:
    """Update CSV file with analysis results for a repository."""
    try:
        df = read_csv_as_text(csv_filename)
        updated = False
        
        for idx, row in df.iterrows():
            if row["repo id"] == repo_id:
                if isinstance(analysis_results, dict) and "error" not in analysis_results:
                    df.at[idx, "strength"] = analysis_results.get("strength", "")
                    df.at[idx, "weaknesses"] = analysis_results.get("weaknesses", "")
                    df.at[idx, "speciality"] = analysis_results.get("speciality", "")
                    df.at[idx, "relevance rating"] = analysis_results.get("relevance rating", "")
                updated = True
                break
                
        if not updated and isinstance(analysis_results, dict) and "error" not in analysis_results:
            new_row = {
                "repo id": repo_id,
                "strength": analysis_results.get("strength", ""),
                "weaknesses": analysis_results.get("weaknesses", ""),
                "speciality": analysis_results.get("speciality", ""),
                "relevance rating": analysis_results.get("relevance rating", "")
            }
            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
            
        df.to_csv(csv_filename, index=False)
        return df
        
    except Exception as e:
        logger.error(f"Error updating CSV for repo {repo_id}: {e}")
        return read_csv_as_text(csv_filename)

def show_combined_repo_and_llm(state: AppState) -> Tuple[str, str, pd.DataFrame]:
    """Show combined repo content and LLM analysis for current repo."""
    if not state.repo_ids:
        return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
        
    if state.current_repo_idx >= len(state.repo_ids):
        return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
        
    repo_id = state.repo_ids[state.current_repo_idx]
    combined_content, summary, analysis_results = analyze_single_repo(repo_id)
    df = update_csv_with_analysis(repo_id, analysis_results)
    
    state.current_repo_idx += 1
    return combined_content, summary, df

def keyword_search_and_update(keyword: str, state: AppState) -> pd.DataFrame:
    """Search for repos using keywords and update state."""
    if not keyword:
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
        
    keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
    repo_ids = []
    
    for kw in keyword_list:
        repo_ids.extend(search_top_spaces(kw, limit=5))
        
    # Remove duplicates while preserving order
    seen = set()
    unique_repo_ids = []
    for rid in repo_ids:
        if rid not in seen:
            unique_repo_ids.append(rid)
            seen.add(rid)
            
    state.repo_ids = unique_repo_ids
    state.current_repo_idx = 0
    
    write_repos_to_csv(unique_repo_ids)
    return read_csv_as_text("repo_ids.csv")

# UI Components
def create_ui() -> gr.Blocks:
    """Create the Gradio interface."""
    state = gr.State(AppState())
    
    with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
        gr.Markdown("# Hugging Face Repository Analyzer")
        
        # Navigation state
        current_page = gr.State("start")
        
        # Start Page
        with gr.Group(visible=True) as start_page:
            gr.Markdown("""
            # Welcome to the Hugging Face Repository Analyzer!
            
            This tool helps you analyze and understand Hugging Face repositories. You can:
            - Enter repository IDs directly
            - Search repositories using keywords
            - Chat with an AI assistant to find the perfect repository
            - Get detailed analysis of repositories
            
            Click 'Start Analysis' to begin!
            """)
            with gr.Row():
                start_btn = gr.Button("Start Analysis", variant="primary")
                help_btn = gr.Button("View Help Guide", variant="secondary")
            
        # Help Guide
        with gr.Group(visible=False) as help_page:
            gr.Markdown("""
            # Help Guide
            
            ## Quick Start
            1. Enter repository IDs or search by keywords
            2. Start the analysis
            3. Review the results
            
            ## Features
            - **Repository Analysis**: Get detailed insights about repositories
            - **Keyword Search**: Find repositories matching your criteria
            - **AI Assistant**: Chat to find the perfect repository
            - **Comparison**: Compare repositories side by side
            
            ## Keyboard Shortcuts
            - `Ctrl + Enter`: Send message in chat
            - `Ctrl + S`: Start new analysis
            - `Ctrl + H`: Toggle help guide
            """)
            back_btn = gr.Button("Back to Start", variant="primary")
            
        # Input Page
        with gr.Group(visible=False) as input_page:
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### Enter Repository IDs")
                    repo_id_input = gr.Textbox(
                        label="Enter repo IDs (comma or newline separated)",
                        lines=5,
                        placeholder="repo1, repo2\nrepo3"
                    )
                    submit_btn = gr.Button("Submit Repo IDs", variant="primary")
                    submit_status = gr.Textbox(label="Status", visible=False)
                    
                with gr.Column():
                    gr.Markdown("### Or Search by Keywords")
                    keyword_input = gr.Textbox(
                        label="Enter keywords to search",
                        lines=3,
                        placeholder="Enter keywords separated by commas"
                    )
                    search_btn = gr.Button("Search by Keywords", variant="primary")
                    search_status = gr.Textbox(label="Status", visible=False)
                    
            df_output = gr.Dataframe(
                headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
                datatype=["str", "str", "str", "str", "str"]
            )
            with gr.Row():
                analyze_btn = gr.Button("Start Analysis", variant="primary")
                analyze_status = gr.Textbox(label="Status", visible=False)
                compare_btn = gr.Button("Compare Repositories", variant="secondary")
            
        # Analysis Page
        with gr.Group(visible=False) as analysis_page:
            gr.Markdown("### Repository Analysis")
            progress = gr.Slider(
                minimum=0,
                maximum=100,
                value=0,
                label="Analysis Progress",
                interactive=False
            )
            with gr.Row():
                with gr.Column():
                    content_output = gr.Textbox(label="Repository Content", lines=10)
                with gr.Column():
                    summary_output = gr.Textbox(label="Analysis Summary", lines=10)
            with gr.Row():
                next_btn = gr.Button("Analyze Next Repository", variant="primary")
                next_status = gr.Textbox(label="Status", visible=False)
                finish_btn = gr.Button("Finish Analysis", variant="secondary")
                export_btn = gr.Button("Export Results", variant="secondary")
                export_status = gr.Textbox(label="Status", visible=False)
            
        # Comparison Page
        with gr.Group(visible=False) as comparison_page:
            gr.Markdown("### Repository Comparison")
            with gr.Row():
                with gr.Column():
                    repo1_select = gr.Dropdown(
                        label="Select First Repository",
                        choices=[],
                        interactive=True
                    )
                    repo1_content = gr.Textbox(label="Repository 1 Content", lines=10)
                    repo1_summary = gr.Textbox(label="Repository 1 Summary", lines=10)
                with gr.Column():
                    repo2_select = gr.Dropdown(
                        label="Select Second Repository",
                        choices=[],
                        interactive=True
                    )
                    repo2_content = gr.Textbox(label="Repository 2 Content", lines=10)
                    repo2_summary = gr.Textbox(label="Repository 2 Summary", lines=10)
            compare_btn = gr.Button("Compare", variant="primary")
            back_to_analysis_btn = gr.Button("Back to Analysis", variant="secondary")
            
        # Chatbot Page
        with gr.Group(visible=False) as chatbot_page:
            gr.Markdown("### Chat with Assistant")
            gr.Markdown("""
            Tell me about your ideal repository. I'll help you find the perfect match!
            What are you looking for? Consider:
            - Your use case
            - Preferred programming language
            - Required features
            - Any specific requirements
            """)
            chatbot = gr.Chatbot(
                label="Chat with Assistant",
                height=400,
                type="messages"
            )
            msg = gr.Textbox(
                label="Message",
                placeholder="Type your message here...",
                lines=2
            )
            with gr.Row():
                send_btn = gr.Button("Send", variant="primary")
                send_status = gr.Textbox(label="Status", visible=False)
                end_chat_btn = gr.Button("End Chat", variant="secondary")
                end_chat_status = gr.Textbox(label="Status", visible=False)
                
        # Results Page
        with gr.Group(visible=False) as results_page:
            gr.Markdown("### Analysis Results")
            with gr.Row():
                with gr.Column():
                    results_df = gr.Dataframe(
                        headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
                        datatype=["str", "str", "str", "str", "str"]
                    )
                with gr.Column():
                    gr.Markdown("### Repository Metrics")
                    metrics_plot = gr.Plot(label="Repository Metrics")
            with gr.Row():
                restart_btn = gr.Button("Start New Analysis", variant="primary")
                export_btn = gr.Button("Export Results", variant="secondary")
                history_btn = gr.Button("View History", variant="secondary")
            
        # History Page
        with gr.Group(visible=False) as history_page:
            gr.Markdown("### Analysis History")
            history_df = gr.Dataframe(
                headers=["Date", "Repositories", "Keywords", "Results"],
                datatype=["str", "str", "str", "str"]
            )
            back_to_results_btn = gr.Button("Back to Results", variant="primary")
            
        # Navigation functions
        def navigate_to(page: str) -> List[gr.update]:
            """Navigate to a specific page."""
            updates = []
            for p in ["start", "input", "analysis", "chatbot", "results", "help", "comparison", "history"]:
                updates.append(gr.update(visible=(p == page)))
            return updates
            
        # Event handlers
        start_btn.click(
            fn=lambda: navigate_to("input"),
            inputs=[],
            outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
        help_btn.click(
            fn=lambda: navigate_to("help"),
            inputs=[],
            outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
        back_btn.click(
            fn=lambda: navigate_to("start"),
            inputs=[],
            outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
        # Modified event handlers with status updates
        def process_repo_input_with_status(text: str, state: AppState) -> Tuple[pd.DataFrame, str]:
            """Process repo input with status update."""
            df = process_repo_input(text, state)
            return df, ""
            
        def keyword_search_with_status(keyword: str, state: AppState) -> Tuple[pd.DataFrame, str]:
            """Search keywords with status update."""
            df = keyword_search_and_update(keyword, state)
            return df, ""
            
        def analyze_with_status(state: AppState) -> Tuple[str, str, pd.DataFrame, str]:
            """Analyze with status update."""
            content, summary, df = show_combined_repo_and_llm(state)
            return content, summary, df, ""
            
        def send_message_with_status(user_message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str, str]:
            """Send message with status update."""
            if not user_message:
                return history, "", ""
            history.append({"role": "user", "content": user_message})
            response = chat_with_user(user_message, history, CHATBOT_SYSTEM_PROMPT)
            history.append({"role": "assistant", "content": response})
            return history, "", ""
            
        def end_chat_with_status(history: List[Dict[str, str]], state: AppState) -> Tuple[List[str], gr.update, str]:
            """End chat and extract keywords."""
            if not history:
                return [], gr.update(visible=True), ""
            keywords = extract_keywords_from_conversation(history)
            state.generated_keywords = keywords
            return keywords, gr.update(visible=True), ""
            
        def export_with_status(df: pd.DataFrame) -> Tuple[str, str]:
            """Export with status update."""
            result = export_results(df)
            return result, ""
            
        # Update event handlers with status updates
        submit_btn.click(
            fn=lambda: "Processing...",
            inputs=[],
            outputs=[submit_status]
        ).then(
            fn=process_repo_input_with_status,
            inputs=[repo_id_input, state],
            outputs=[df_output, submit_status]
        )
        
        search_btn.click(
            fn=lambda: "Searching...",
            inputs=[],
            outputs=[search_status]
        ).then(
            fn=keyword_search_with_status,
            inputs=[keyword_input, state],
            outputs=[df_output, search_status]
        )
        
        next_btn.click(
            fn=lambda: "Analyzing...",
            inputs=[],
            outputs=[next_status]
        ).then(
            fn=analyze_with_status,
            inputs=[state],
            outputs=[content_output, summary_output, df_output, next_status]
        )
        
        send_btn.click(
            fn=lambda: "Sending...",
            inputs=[],
            outputs=[send_status]
        ).then(
            fn=send_message_with_status,
            inputs=[msg, chatbot, state],
            outputs=[chatbot, msg, send_status]
        )
        
        end_chat_btn.click(
            fn=lambda: "Processing...",
            inputs=[],
            outputs=[end_chat_status]
        ).then(
            fn=end_chat_with_status,
            inputs=[chatbot, state],
            outputs=[gr.Textbox(label="Extracted Keywords"), results_page, end_chat_status]
        )
        
        export_btn.click(
            fn=lambda: "Exporting...",
            inputs=[],
            outputs=[export_status]
        ).then(
            fn=export_with_status,
            inputs=[results_df],
            outputs=[gr.Textbox(label="Export Status"), export_status]
        )
        
        restart_btn.click(
            fn=lambda: (state.reset(), navigate_to("start")),
            inputs=[state],
            outputs=[start_page, input_page, analysis_page, chatbot_page, results_page]
        )
        
        def update_progress(current: int, total: int) -> float:
            """Update progress bar."""
            return (current / total) * 100
            
        def export_results(df: pd.DataFrame) -> str:
            """Export results to CSV."""
            try:
                filename = f"analysis_results_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
                df.to_csv(filename, index=False)
                return f"Results exported to {filename}"
            except Exception as e:
                return f"Error exporting results: {e}"
            
        def load_history() -> pd.DataFrame:
            """Load analysis history."""
            try:
                return pd.read_csv("analysis_history.csv")
            except:
                return pd.DataFrame(columns=["Date", "Repositories", "Keywords", "Results"])
            
        def save_to_history(repos: List[str], keywords: List[str], results: pd.DataFrame) -> None:
            """Save current analysis to history."""
            try:
                history_df = load_history()
                new_row = {
                    "Date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
                    "Repositories": ", ".join(repos),
                    "Keywords": ", ".join(keywords),
                    "Results": results.to_json()
                }
                history_df = pd.concat([history_df, pd.DataFrame([new_row])], ignore_index=True)
                history_df.to_csv("analysis_history.csv", index=False)
            except Exception as e:
                logger.error(f"Error saving to history: {e}")
                
        # Add new event handlers for new features
        history_btn.click(
            fn=lambda: (load_history(), navigate_to("history")),
            inputs=[],
            outputs=[history_df, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
        back_to_results_btn.click(
            fn=lambda: navigate_to("results"),
            inputs=[],
            outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
        compare_btn.click(
            fn=lambda: (update_repo_choices(state), navigate_to("comparison")),
            inputs=[state],
            outputs=[repo1_select, repo2_select, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
        back_to_analysis_btn.click(
            fn=lambda: navigate_to("analysis"),
            inputs=[],
            outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
        )
        
    return app

def update_repo_choices(state: AppState) -> Tuple[List[str], List[str]]:
    """Update repository choices for comparison."""
    choices = state.repo_ids
    return choices, choices

if __name__ == "__main__":
    app = create_ui()
    app.launch()