Ryan commited on
Commit
8e34de3
·
1 Parent(s): c34967c
Files changed (2) hide show
  1. app.py +43 -123
  2. processors/bow_analysis.py +225 -0
app.py CHANGED
@@ -1,138 +1,58 @@
1
  import gradio as gr
2
  import os
3
- import json
4
-
5
- # Import UI components
6
- from ui.main_screen import create_main_screen
7
- #from ui.dataset_input import create_dataset_input, process_dataset_submission, load_example_dataset
8
- #from ui.analysis_screen import create_analysis_screen, process_analysis_request
9
- #from ui.visualization_screen import create_visualization_screen, update_visualization
10
- #from ui.classification_screen import create_classification_screen, update_classification_results
11
- #from ui.report_screen import create_report_screen, update_report, update_with_llm_analysis
12
-
13
- # Import utility functions
14
- #from utils.llm_analyzer import run_llm_analysis
15
- #from utils.report_generator import create_report, export_report
16
- #from utils.text_dataset_parser import get_available_text_datasets
17
 
18
  def create_app():
19
  """
20
- Create the complete Gradio app with all tabs
21
 
22
  Returns:
23
  gr.Blocks: The Gradio application
24
  """
25
- with gr.Blocks(title="LLM Response Comparator", theme=gr.themes.Soft()) as app:
26
- # Application states to share data between tabs
27
  dataset_state = gr.State({})
28
  analysis_results_state = gr.State({})
29
- visualization_state = gr.State({})
30
- classification_results_state = gr.State({})
31
- report_state = gr.State({})
32
-
33
- # Create tabs
34
- with gr.Tabs() as tabs:
35
- with gr.Tab("Home", id="home_tab"):
36
- welcome_msg, about_info, get_started_btn = create_main_screen()
37
-
38
- with gr.Tab("Dataset Input", id="dataset_tab"):
39
- dataset_inputs, example_dropdown, load_example_btn, analyze_btn = create_dataset_input()
40
-
41
- with gr.Tab("Analysis", id="analysis_tab"):
42
- analysis_options, analysis_params, run_analysis_btn, analysis_output = create_analysis_screen()
43
-
44
- with gr.Tab("Visualization", id="viz_tab"):
45
- viz_options, viz_params, viz_output = create_visualization_screen()
46
-
47
- with gr.Tab("Classification", id="classification_tab"):
48
- classifier_options, classifier_params, run_classifier_btn, classifier_output = create_classification_screen()
49
-
50
- with gr.Tab("Report", id="report_tab"):
51
- report_options, generate_report_btn, llm_analysis_btn, export_btn, report_output = create_report_screen()
52
-
53
- # Set up event handlers
54
-
55
- # Main screen navigation
56
- get_started_btn.click(
57
- fn=lambda: gr.Tabs.update(selected="dataset_tab"),
58
- outputs=[tabs]
59
- )
60
-
61
- # Dataset processing
62
- analyze_btn.click(
63
- fn=process_dataset_submission,
64
- inputs=dataset_inputs,
65
- outputs=[dataset_state, gr.Tabs.update(selected="analysis_tab")]
66
- )
67
-
68
- # Load example dataset
69
- load_example_btn.click(
70
- fn=load_example_dataset,
71
- inputs=[example_dropdown],
72
- outputs=[dataset_inputs]
73
- )
74
-
75
- # Analysis
76
- run_analysis_btn.click(
77
- fn=process_analysis_request,
78
- inputs=[dataset_state, analysis_options, analysis_params],
79
- outputs=[analysis_results_state, analysis_output]
80
- )
81
-
82
- # Visualization updates based on analysis results
83
- tabs.select(
84
- fn=lambda tab, results: update_visualization(results, viz_options.value, viz_params.value) if tab == "viz_tab" and results else None,
85
- inputs=["selected", analysis_results_state],
86
- outputs=[viz_output]
87
- )
88
-
89
- viz_options.change(
90
- fn=update_visualization,
91
- inputs=[analysis_results_state, viz_options, viz_params],
92
- outputs=[viz_output]
93
- )
94
-
95
- # Classification
96
- run_classifier_btn.click(
97
- fn=update_classification_results,
98
- inputs=[dataset_state, classifier_options, classifier_params],
99
- outputs=[classification_results_state, classifier_output]
100
- )
101
 
102
- # Report generation
103
- generate_report_btn.click(
104
- fn=lambda results, class_results, options: update_report(create_report(results, class_results), options),
105
- inputs=[analysis_results_state, classification_results_state, report_options],
106
- outputs=[report_state, report_output]
107
- )
108
-
109
- # LLM meta-analysis
110
- llm_analysis_btn.click(
111
- fn=lambda report: update_with_llm_analysis(report, run_llm_analysis(report)),
112
- inputs=[report_state],
113
- outputs=[report_state, report_output]
114
- )
115
-
116
- # Export report
117
- export_btn.click(
118
- fn=lambda report, format: export_report(report, format),
119
- inputs=[report_state, gr.Dropdown(choices=["md", "html", "pdf"], value="md", label="Export Format")],
120
- outputs=[]
121
- )
122
-
123
- return app
124
-
125
- def main():
126
- """
127
- Main function to launch the Gradio app
128
- """
129
- # Create necessary directories
130
- os.makedirs(os.path.join("dataset", "text_datasets"), exist_ok=True)
131
- os.makedirs("reports", exist_ok=True)
 
 
 
 
132
 
133
- # Create and launch app
134
- app = create_app()
135
- app.launch(share=True)
136
 
137
  if __name__ == "__main__":
138
- main()
 
 
 
1
  import gradio as gr
2
  import os
3
+ from ui.dataset_input import create_dataset_input, load_example_dataset
4
+ from ui.analysis_screen import process_analysis_request
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def create_app():
7
  """
8
+ Create a streamlined Gradio app for dataset input and Bag of Words analysis.
9
 
10
  Returns:
11
  gr.Blocks: The Gradio application
12
  """
13
+ with gr.Blocks(title="LLM Response Comparator") as app:
14
+ # Application state to share data between tabs
15
  dataset_state = gr.State({})
16
  analysis_results_state = gr.State({})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Dataset Input Tab
19
+ with gr.Tab("Dataset Input"):
20
+ dataset_inputs, example_dropdown, load_example_btn, create_btn, prompt, response1, model1, response2, model2 = create_dataset_input()
21
+
22
+ # Load example dataset
23
+ load_example_btn.click(
24
+ fn=load_example_dataset,
25
+ inputs=[example_dropdown],
26
+ outputs=[dataset_inputs]
27
+ )
28
+
29
+ # Save dataset to state
30
+ create_btn.click(
31
+ fn=lambda p, r1, m1, r2, m2: {"entries": [{"prompt": p, "response": r1, "model": m1}, {"prompt": p, "response": r2, "model": m2}]},
32
+ inputs=[prompt, response1, model1, response2, model2],
33
+ outputs=[dataset_state]
34
+ )
35
+
36
+ # Analysis Tab
37
+ with gr.Tab("Analysis"):
38
+ analysis_options = gr.CheckboxGroup(
39
+ choices=["Bag of Words"],
40
+ value=["Bag of Words"],
41
+ label="Select Analyses to Run"
42
+ )
43
+ run_analysis_btn = gr.Button("Run Analysis", variant="primary")
44
+ analysis_output = gr.JSON(label="Analysis Results", visible=False)
45
+
46
+ # Run analysis
47
+ run_analysis_btn.click(
48
+ fn=process_analysis_request,
49
+ inputs=[dataset_state, analysis_options, {}],
50
+ outputs=[analysis_results_state, analysis_output]
51
+ )
52
 
53
+ return app
 
 
54
 
55
  if __name__ == "__main__":
56
+ # Create and launch the app
57
+ app = create_app()
58
+ app.launch()
processors/bow_analysis.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import CountVectorizer
2
+ import numpy as np
3
+ from collections import Counter
4
+ import re
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk.stem import WordNetLemmatizer
8
+ from nltk.tokenize import word_tokenize
9
+
10
+ # Download necessary NLTK data
11
+ try:
12
+ nltk.data.find('tokenizers/punkt')
13
+ except LookupError:
14
+ nltk.download('punkt')
15
+
16
+ try:
17
+ nltk.data.find('corpora/stopwords')
18
+ except LookupError:
19
+ nltk.download('stopwords')
20
+
21
+ try:
22
+ nltk.data.find('corpora/wordnet')
23
+ except LookupError:
24
+ nltk.download('wordnet')
25
+
26
+ def preprocess_text(text):
27
+ """
28
+ Preprocess text for bag of words analysis
29
+
30
+ Args:
31
+ text (str): Input text
32
+
33
+ Returns:
34
+ str: Preprocessed text
35
+ """
36
+ # Convert to lowercase
37
+ text = text.lower()
38
+
39
+ # Remove special characters and digits
40
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
41
+
42
+ # Tokenize
43
+ tokens = word_tokenize(text)
44
+
45
+ # Remove stopwords
46
+ stop_words = set(stopwords.words('english'))
47
+ tokens = [token for token in tokens if token not in stop_words]
48
+
49
+ # Lemmatize
50
+ lemmatizer = WordNetLemmatizer()
51
+ tokens = [lemmatizer.lemmatize(token) for token in tokens]
52
+
53
+ # Filter out short words (likely not meaningful)
54
+ tokens = [token for token in tokens if len(token) > 2]
55
+
56
+ # Join back to string
57
+ return ' '.join(tokens)
58
+
59
+ def create_bow(text):
60
+ """
61
+ Create bag of words representation
62
+
63
+ Args:
64
+ text (str): Input text
65
+
66
+ Returns:
67
+ dict: Bag of words representation with word counts
68
+ """
69
+ # Preprocess text
70
+ preprocessed_text = preprocess_text(text)
71
+
72
+ # Tokenize
73
+ tokens = preprocessed_text.split()
74
+
75
+ # Count occurrences
76
+ word_counts = Counter(tokens)
77
+
78
+ return dict(word_counts)
79
+
80
+ def compare_bow(bow1, bow2):
81
+ """
82
+ Compare two bag of words representations
83
+
84
+ Args:
85
+ bow1 (dict): First bag of words
86
+ bow2 (dict): Second bag of words
87
+
88
+ Returns:
89
+ dict: Comparison metrics
90
+ """
91
+ # Get all unique words
92
+ all_words = set(bow1.keys()).union(set(bow2.keys()))
93
+
94
+ # Words in both
95
+ common_words = set(bow1.keys()).intersection(set(bow2.keys()))
96
+
97
+ # Words unique to each
98
+ unique_to_1 = set(bow1.keys()) - set(bow2.keys())
99
+ unique_to_2 = set(bow2.keys()) - set(bow1.keys())
100
+
101
+ # Calculate Jaccard similarity
102
+ jaccard = len(common_words) / len(all_words) if len(all_words) > 0 else 0
103
+
104
+ # Calculate cosine similarity
105
+ vec1 = np.zeros(len(all_words))
106
+ vec2 = np.zeros(len(all_words))
107
+
108
+ for i, word in enumerate(all_words):
109
+ vec1[i] = bow1.get(word, 0)
110
+ vec2[i] = bow2.get(word, 0)
111
+
112
+ # Normalize vectors
113
+ norm1 = np.linalg.norm(vec1)
114
+ norm2 = np.linalg.norm(vec2)
115
+
116
+ if norm1 == 0 or norm2 == 0:
117
+ cosine = 0
118
+ else:
119
+ cosine = np.dot(vec1, vec2) / (norm1 * norm2)
120
+
121
+ return {
122
+ "jaccard_similarity": jaccard,
123
+ "cosine_similarity": cosine,
124
+ "common_word_count": len(common_words),
125
+ "unique_to_first": list(unique_to_1)[:20], # Limit for readability
126
+ "unique_to_second": list(unique_to_2)[:20] # Limit for readability
127
+ }
128
+
129
+ def important_words(bow, top_n=10):
130
+ """
131
+ Extract most important/distinctive words
132
+
133
+ Args:
134
+ bow (dict): Bag of words representation
135
+ top_n (int): Number of top words to return
136
+
137
+ Returns:
138
+ list: Top words with counts
139
+ """
140
+ # Sort by count
141
+ sorted_words = sorted(bow.items(), key=lambda x: x[1], reverse=True)
142
+
143
+ # Return top N
144
+ return [{"word": word, "count": count} for word, count in sorted_words[:top_n]]
145
+
146
+ def compare_bow_across_texts(texts, model_names, top_n=25):
147
+ """
148
+ Compare bag of words across multiple texts
149
+
150
+ Args:
151
+ texts (list): List of text responses
152
+ model_names (list): List of model names corresponding to responses
153
+ top_n (int): Number of top words to include
154
+
155
+ Returns:
156
+ dict: Comparative bag of words analysis
157
+ """
158
+ # Create bag of words for each text
159
+ bows = [create_bow(text) for text in texts]
160
+
161
+ # Map to models
162
+ model_bows = {model: bow for model, bow in zip(model_names, bows)}
163
+
164
+ # Get important words for each model
165
+ model_important_words = {model: important_words(bow, top_n) for model, bow in model_bows.items()}
166
+
167
+ # Compare pairwise
168
+ comparisons = {}
169
+ for i, model1 in enumerate(model_names):
170
+ for j, model2 in enumerate(model_names):
171
+ if j <= i: # Avoid duplicate comparisons
172
+ continue
173
+
174
+ comparison_key = f"{model1} vs {model2}"
175
+ comparisons[comparison_key] = compare_bow(model_bows[model1], model_bows[model2])
176
+
177
+ # Create combined word list across all models
178
+ all_words = set()
179
+ for bow in bows:
180
+ all_words.update(bow.keys())
181
+
182
+ # Create a matrix of word counts across models
183
+ word_count_matrix = {}
184
+ for word in sorted(list(all_words)):
185
+ word_counts = [bow.get(word, 0) for bow in bows]
186
+ # Only include words that show up in at least one model
187
+ if any(count > 0 for count in word_counts):
188
+ word_count_matrix[word] = {model: bow.get(word, 0) for model, bow in zip(model_names, bows)}
189
+
190
+ # Sort matrix by most differential words (words with biggest variance across models)
191
+ word_variances = {}
192
+ for word, counts in word_count_matrix.items():
193
+ count_values = list(counts.values())
194
+ if len(count_values) > 1:
195
+ word_variances[word] = np.var(count_values)
196
+
197
+ # Get top differential words
198
+ top_diff_words = sorted(word_variances.items(), key=lambda x: x[1], reverse=True)[:top_n]
199
+ differential_words = [word for word, _ in top_diff_words]
200
+
201
+ # Format results
202
+ result = {
203
+ "model_word_counts": model_bows,
204
+ "important_words": model_important_words,
205
+ "comparisons": comparisons,
206
+ "differential_words": differential_words,
207
+ "word_count_matrix": {word: word_count_matrix[word] for word in differential_words},
208
+ "models": model_names
209
+ }
210
+
211
+ return result
212
+
213
+ def compare_bow(texts, model_names, top_n=25):
214
+ """
215
+ Compare bag of words between different texts
216
+
217
+ Args:
218
+ texts (list): List of text responses to compare
219
+ model_names (list): Names of models corresponding to responses
220
+ top_n (int): Number of top words to consider
221
+
222
+ Returns:
223
+ dict: Comparative analysis
224
+ """
225
+ return compare_bow_across_texts(texts, model_names, top_n)