File size: 6,163 Bytes
7bf325d
8e34de3
b0473cc
6528c77
fe68698
 
33f6fed
fe68698
 
 
 
 
 
 
 
 
 
 
 
 
8738bd2
fe68698
 
8738bd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe68698
 
 
 
36f7a03
 
 
6b7b8b5
36f7a03
 
 
 
8e34de3
 
36f7a03
 
 
8e34de3
 
 
 
f358b3f
 
 
8e34de3
 
 
 
fd06f0b
8e34de3
5110d3f
f358b3f
 
 
 
 
 
5110d3f
f358b3f
 
5110d3f
f358b3f
 
 
 
 
5ba1ab4
f358b3f
8e34de3
 
 
 
6528c77
 
b0473cc
f358b3f
daf2b71
8f89b6a
daf2b71
 
 
 
e41c9c7
daf2b71
 
6528c77
daf2b71
 
087a38a
daf2b71
 
087a38a
 
 
8f89b6a
e41c9c7
daf2b71
 
087a38a
ab35b41
8f89b6a
965dddb
 
9b4b6b8
6528c77
965dddb
36f7a03
8e34de3
36f7a03
 
fe68698
 
 
8e34de3
b0473cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
from ui.dataset_input import create_dataset_input, load_example_dataset
from ui.analysis_screen import create_analysis_screen, process_analysis_request
from visualization.bow_visualizer import process_and_visualize_analysis
import nltk
import os
import json

# Download necessary NLTK data packages
def download_nltk_resources():
    """Download required NLTK resources if not already downloaded"""
    try:
        # Create nltk_data directory in the user's home directory if it doesn't exist
        nltk_data_path = os.path.expanduser("~/nltk_data")
        os.makedirs(nltk_data_path, exist_ok=True)
        
        # Add this path to NLTK's data path
        nltk.data.path.append(nltk_data_path)
        
        # Download required resources
        resources = ['punkt', 'wordnet', 'stopwords', 'punkt_tab']
        for resource in resources:
            try:
                # Different resources can be in different directories in NLTK
                locations = [
                    f'tokenizers/{resource}',
                    f'corpora/{resource}',
                    f'taggers/{resource}',
                    f'{resource}'
                ]
                
                found = False
                for location in locations:
                    try:
                        nltk.data.find(location)
                        print(f"Resource {resource} already downloaded")
                        found = True
                        break
                    except LookupError:
                        continue
                
                if not found:
                    print(f"Downloading {resource}...")
                    nltk.download(resource, quiet=True)
            except Exception as e:
                print(f"Error with resource {resource}: {e}")
        
        print("NLTK resources check completed")
    except Exception as e:
        print(f"Error downloading NLTK resources: {e}")

def create_app():
    """
    Create a streamlined Gradio app for dataset input and Bag of Words analysis. 
    
    Returns:
        gr.Blocks: The Gradio application
    """
    with gr.Blocks(title="LLM Response Comparator") as app:
        # Application state to share data between tabs
        dataset_state = gr.State({})
        analysis_results_state = gr.State({})
        
        # Dataset Input Tab
        with gr.Tab("Dataset Input"):
            dataset_inputs, example_dropdown, load_example_btn, create_btn, prompt, response1, model1, response2, model2 = create_dataset_input()
            
            # Add status indicator to show when dataset is created
            dataset_status = gr.Markdown("*No dataset loaded*")
            
            # Load example dataset
            load_example_btn.click(
                fn=load_example_dataset,
                inputs=[example_dropdown],
                outputs=[prompt, response1, model1, response2, model2]  # Update all field values
            )

            # Save dataset to state and update status
            def create_dataset(p, r1, m1, r2, m2):
                if not p or not r1 or not r2:
                    return {}, "❌ **Error:** Please fill in at least the prompt and both responses"
                
                dataset = {
                    "entries": [
                        {"prompt": p, "response": r1, "model": m1 or "Model 1"},
                        {"prompt": p, "response": r2, "model": m2 or "Model 2"}
                    ]
                }
                return dataset, "✅ **Dataset created successfully!** You can now go to the Analysis tab"
                
            create_btn.click(
                fn=create_dataset,
                inputs=[prompt, response1, model1, response2, model2],
                outputs=[dataset_state, dataset_status]
            )
        
        # Analysis Tab
        with gr.Tab("Analysis"):
            # Use create_analysis_screen to get UI components including visualization container
            analysis_options, analysis_params, run_analysis_btn, analysis_output, bow_top_slider, visualization_container = create_analysis_screen()
            
            # Define a helper function to extract parameter values and call process_analysis_request
            def run_analysis(dataset, selected_analyses, bow_top):
                try:
                    parameters = {
                        "bow_top": bow_top,
                    }
                    print("Running analysis with parameters:", parameters)
                    
                    # Process the analysis request
                    analysis_results, output_update = process_analysis_request(dataset, selected_analyses, parameters)
                    
                    # Generate visualization components
                    print("Generating visualization components...")
                    # Instead of directly returning the components, we'll update the container
                    visualization_components = process_and_visualize_analysis(analysis_results)
                    
                    # For the third return value, return a simple value that can trigger the update
                    # The actual components will be placed inside the container
                    return analysis_results, True, gr.update(value=visualization_components)
                except Exception as e:
                    import traceback
                    error_msg = f"Error in run_analysis: {str(e)}\n{traceback.format_exc()}"
                    print(error_msg)
                    return {"error": error_msg}, True, gr.update(value=[gr.Markdown(f"**Error:**\n\n```\n{error_msg}\n```")])

            # Run analysis with proper parameters
            run_analysis_btn.click(
                fn=run_analysis,
                inputs=[dataset_state, analysis_options, bow_top_slider],
                outputs=[analysis_results_state, analysis_output, visualization_container]
            )
    
    return app

if __name__ == "__main__":
    # Download required NLTK resources before launching the app
    download_nltk_resources()
    
    app = create_app()
    app.launch()