Ryan commited on
Commit
e633a26
·
1 Parent(s): fecdfa0
Files changed (3) hide show
  1. app.py +4 -2
  2. processors/__init__.py +26 -5
  3. processors/bow_analysis.py +83 -195
app.py CHANGED
@@ -233,19 +233,21 @@ def create_app():
233
  similarity_text = "No similarity metrics found"
234
  comparisons = bow_results.get("comparisons", {})
235
  comparison_key = f"{model1_name} vs {model2_name}"
236
-
237
  if comparison_key in comparisons:
238
  metrics = comparisons[comparison_key]
239
  cosine = metrics.get("cosine_similarity", 0)
240
  jaccard = metrics.get("jaccard_similarity", 0)
 
241
  common_words = metrics.get("common_word_count", 0)
242
 
243
  similarity_text = f"""
244
  - **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
245
  - **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
 
246
  - **Common Words**: {common_words} words appear in both responses
247
  """
248
-
249
  # Return all updated component values
250
  return (
251
  analysis_results, # analysis_results_state
 
233
  similarity_text = "No similarity metrics found"
234
  comparisons = bow_results.get("comparisons", {})
235
  comparison_key = f"{model1_name} vs {model2_name}"
236
+
237
  if comparison_key in comparisons:
238
  metrics = comparisons[comparison_key]
239
  cosine = metrics.get("cosine_similarity", 0)
240
  jaccard = metrics.get("jaccard_similarity", 0)
241
+ semantic = metrics.get("semantic_similarity", 0) # Add semantic similarity
242
  common_words = metrics.get("common_word_count", 0)
243
 
244
  similarity_text = f"""
245
  - **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
246
  - **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
247
+ - **Semantic Similarity**: {semantic:.2f} (higher means more similar meaning)
248
  - **Common Words**: {common_words} words appear in both responses
249
  """
250
+
251
  # Return all updated component values
252
  return (
253
  analysis_results, # analysis_results_state
processors/__init__.py CHANGED
@@ -1,8 +1,29 @@
 
 
 
1
  # processors/__init__.py
2
- # Empty file to make the directory a Python package
3
 
4
- # ui/__init__.py
5
- # Empty file to make the directory a Python package
 
6
 
7
- # utils/__init__.py
8
- # Empty file to make the directory a Python package
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Implementation of the processors package structure to ensure metrics.py is properly integrated
3
+ """
4
  # processors/__init__.py
5
+ # This file ensures the processors directory is treated as a Python package
6
 
7
+ # processors/metrics.py
8
+ # This file is already included in your project, but we need to make sure it's properly imported
9
+ # The path should be: processors/metrics.py
10
 
11
+ # processors/bow_analysis.py
12
+ # This is your existing file with the updated code to include similarity metrics
13
+
14
+ # Ensure the package structure is correct:
15
+ # - Project directory/
16
+ # - processors/
17
+ # - __init__.py
18
+ # - metrics.py
19
+ # - bow_analysis.py
20
+
21
+ # Here's a quick implementation of the __init__.py file:
22
+ """
23
+ LLM Response Comparator processor modules
24
+ """
25
+ # Import key functions to make them available from the package
26
+ from processors.metrics import calculate_similarity
27
+ from processors.bow_analysis import compare_bow
28
+
29
+ # You can add more imports as needed when implementing other analysis types
processors/bow_analysis.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  from sklearn.feature_extraction.text import CountVectorizer
2
  import numpy as np
3
  from collections import Counter
@@ -6,209 +9,67 @@ import nltk
6
  from nltk.corpus import stopwords
7
  from nltk.stem import WordNetLemmatizer
8
  from nltk.tokenize import word_tokenize
 
9
 
10
- # Download necessary NLTK data
11
- try:
12
- nltk.data.find('tokenizers/punkt')
13
- except LookupError:
14
- nltk.download('punkt')
15
-
16
- try:
17
- nltk.data.find('corpora/stopwords')
18
- except LookupError:
19
- nltk.download('stopwords')
20
-
21
- try:
22
- nltk.data.find('corpora/wordnet')
23
- except LookupError:
24
- nltk.download('wordnet')
25
-
26
- def preprocess_text(text):
27
- """
28
- Preprocess text for bag of words analysis
29
-
30
- Args:
31
- text (str): Input text
32
-
33
- Returns:
34
- str: Preprocessed text
35
- """
36
- # Convert to lowercase
37
- text = text.lower()
38
-
39
- # Remove special characters and digits
40
- text = re.sub(r'[^a-zA-Z\s]', '', text)
41
-
42
- # Tokenize
43
- tokens = word_tokenize(text)
44
-
45
- # Remove stopwords
46
- stop_words = set(stopwords.words('english'))
47
- tokens = [token for token in tokens if token not in stop_words]
48
-
49
- # Lemmatize
50
- lemmatizer = WordNetLemmatizer()
51
- tokens = [lemmatizer.lemmatize(token) for token in tokens]
52
-
53
- # Filter out short words (likely not meaningful)
54
- tokens = [token for token in tokens if len(token) > 2]
55
-
56
- # Join back to string
57
- return ' '.join(tokens)
58
-
59
- def create_bow(text):
60
- """
61
- Create bag of words representation
62
-
63
- Args:
64
- text (str): Input text
65
-
66
- Returns:
67
- dict: Bag of words representation with word counts
68
- """
69
- # Preprocess text
70
- preprocessed_text = preprocess_text(text)
71
-
72
- # Tokenize
73
- tokens = preprocessed_text.split()
74
-
75
- # Count occurrences
76
- word_counts = Counter(tokens)
77
-
78
- return dict(word_counts)
79
-
80
- def compare_bow(bow1, bow2):
81
- """
82
- Compare two bag of words representations
83
-
84
- Args:
85
- bow1 (dict): First bag of words
86
- bow2 (dict): Second bag of words
87
-
88
- Returns:
89
- dict: Comparison metrics
90
- """
91
- # Get all unique words
92
- all_words = set(bow1.keys()).union(set(bow2.keys()))
93
-
94
- # Words in both
95
- common_words = set(bow1.keys()).intersection(set(bow2.keys()))
96
-
97
- # Words unique to each
98
- unique_to_1 = set(bow1.keys()) - set(bow2.keys())
99
- unique_to_2 = set(bow2.keys()) - set(bow1.keys())
100
-
101
- # Calculate Jaccard similarity
102
- jaccard = len(common_words) / len(all_words) if len(all_words) > 0 else 0
103
-
104
- # Calculate cosine similarity
105
- vec1 = np.zeros(len(all_words))
106
- vec2 = np.zeros(len(all_words))
107
-
108
- for i, word in enumerate(all_words):
109
- vec1[i] = bow1.get(word, 0)
110
- vec2[i] = bow2.get(word, 0)
111
-
112
- # Normalize vectors
113
- norm1 = np.linalg.norm(vec1)
114
- norm2 = np.linalg.norm(vec2)
115
-
116
- if norm1 == 0 or norm2 == 0:
117
- cosine = 0
118
- else:
119
- cosine = np.dot(vec1, vec2) / (norm1 * norm2)
120
-
121
- return {
122
- "jaccard_similarity": jaccard,
123
- "cosine_similarity": cosine,
124
- "common_word_count": len(common_words),
125
- "unique_to_first": list(unique_to_1)[:20], # Limit for readability
126
- "unique_to_second": list(unique_to_2)[:20] # Limit for readability
127
- }
128
-
129
- def important_words(bow, top_n=10):
130
- """
131
- Extract most important/distinctive words
132
-
133
- Args:
134
- bow (dict): Bag of words representation
135
- top_n (int): Number of top words to return
136
-
137
- Returns:
138
- list: Top words with counts
139
- """
140
- # Sort by count
141
- sorted_words = sorted(bow.items(), key=lambda x: x[1], reverse=True)
142
-
143
- # Return top N
144
- return [{"word": word, "count": count} for word, count in sorted_words[:top_n]]
145
 
146
- def compare_bow_across_texts(texts, model_names, top_n=25):
147
  """
148
- Compare bag of words across multiple texts
149
 
150
  Args:
151
- texts (list): List of text responses
 
152
  model_names (list): List of model names corresponding to responses
153
- top_n (int): Number of top words to include
154
 
155
  Returns:
156
- dict: Comparative bag of words analysis
157
  """
158
- # Create bag of words for each text
159
- bows = [create_bow(text) for text in texts]
160
-
161
- # Map to models
162
- model_bows = {model: bow for model, bow in zip(model_names, bows)}
163
-
164
- # Get important words for each model
165
- model_important_words = {model: important_words(bow, top_n) for model, bow in model_bows.items()}
166
-
167
- # Compare pairwise
168
- comparisons = {}
169
- for i, model1 in enumerate(model_names):
170
- for j, model2 in enumerate(model_names):
171
- if j <= i: # Avoid duplicate comparisons
172
- continue
173
-
174
- comparison_key = f"{model1} vs {model2}"
175
- comparisons[comparison_key] = compare_bow(model_bows[model1], model_bows[model2])
176
-
177
- # Create combined word list across all models
178
- all_words = set()
179
- for bow in bows:
180
- all_words.update(bow.keys())
181
-
182
- # Create a matrix of word counts across models
183
- word_count_matrix = {}
184
- for word in sorted(list(all_words)):
185
- word_counts = [bow.get(word, 0) for bow in bows]
186
- # Only include words that show up in at least one model
187
- if any(count > 0 for count in word_counts):
188
- word_count_matrix[word] = {model: bow.get(word, 0) for model, bow in zip(model_names, bows)}
189
-
190
- # Sort matrix by most differential words (words with biggest variance across models)
191
- word_variances = {}
192
- for word, counts in word_count_matrix.items():
193
- count_values = list(counts.values())
194
- if len(count_values) > 1:
195
- word_variances[word] = np.var(count_values)
196
-
197
- # Get top differential words
198
- top_diff_words = sorted(word_variances.items(), key=lambda x: x[1], reverse=True)[:top_n]
199
- differential_words = [word for word, _ in top_diff_words]
200
-
201
- # Format results
202
- result = {
203
- "model_word_counts": model_bows,
204
- "important_words": model_important_words,
205
- "comparisons": comparisons,
206
- "differential_words": differential_words,
207
- "word_count_matrix": {word: word_count_matrix[word] for word in differential_words},
208
- "models": model_names
209
- }
210
-
211
- return result
212
 
213
  def compare_bow(texts, model_names, top_n=25):
214
  """
@@ -222,4 +83,31 @@ def compare_bow(texts, model_names, top_n=25):
222
  Returns:
223
  dict: Comparative analysis
224
  """
225
- return compare_bow_across_texts(texts, model_names, top_n)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Updated bow_analysis.py to include similarity metrics
3
+ """
4
  from sklearn.feature_extraction.text import CountVectorizer
5
  import numpy as np
6
  from collections import Counter
 
9
  from nltk.corpus import stopwords
10
  from nltk.stem import WordNetLemmatizer
11
  from nltk.tokenize import word_tokenize
12
+ from processors.metrics import calculate_similarity
13
 
14
+ """
15
+ Implementation of the similarity metrics integration for LLM Response Comparator
16
+ """
17
+ from processors.metrics import calculate_similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def add_similarity_metrics(bow_results, response_texts, model_names):
20
  """
21
+ Add similarity metrics to the bag of words analysis results
22
 
23
  Args:
24
+ bow_results (dict): The bag of words analysis results
25
+ response_texts (list): List of response texts to compare
26
  model_names (list): List of model names corresponding to responses
 
27
 
28
  Returns:
29
+ dict: Updated bag of words results with similarity metrics
30
  """
31
+ # Make sure we have at least two responses to compare
32
+ if len(response_texts) < 2 or len(model_names) < 2:
33
+ print("Need at least two responses to calculate similarity metrics")
34
+ return bow_results
35
+
36
+ # Get the first two responses (current implementation only handles two-way comparisons)
37
+ text1, text2 = response_texts[0], response_texts[1]
38
+ model1, model2 = model_names[0], model_names[1]
39
+
40
+ # Generate the comparison key
41
+ comparison_key = f"{model1} vs {model2}"
42
+
43
+ # Initialize comparisons if needed
44
+ if "comparisons" not in bow_results:
45
+ bow_results["comparisons"] = {}
46
+
47
+ # Initialize the comparison entry if needed
48
+ if comparison_key not in bow_results["comparisons"]:
49
+ bow_results["comparisons"][comparison_key] = {}
50
+
51
+ # Calculate similarity metrics
52
+ metrics = calculate_similarity(text1, text2)
53
+
54
+ # Add metrics to the comparison
55
+ bow_results["comparisons"][comparison_key].update({
56
+ "cosine_similarity": metrics.get("cosine_similarity", 0),
57
+ "jaccard_similarity": metrics.get("jaccard_similarity", 0),
58
+ "semantic_similarity": metrics.get("semantic_similarity", 0)
59
+ })
60
+
61
+ # If we have common_word_count from BOW analysis, keep it
62
+ if "common_word_count" not in bow_results["comparisons"][comparison_key]:
63
+ # Calculate from bow data as a fallback
64
+ if "important_words" in bow_results:
65
+ words1 = set([item["word"] for item in bow_results["important_words"].get(model1, [])])
66
+ words2 = set([item["word"] for item in bow_results["important_words"].get(model2, [])])
67
+ common_words = words1.intersection(words2)
68
+ bow_results["comparisons"][comparison_key]["common_word_count"] = len(common_words)
69
+
70
+ return bow_results
71
+
72
+ # All existing imports and preprocessing functions remain unchanged
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  def compare_bow(texts, model_names, top_n=25):
75
  """
 
83
  Returns:
84
  dict: Comparative analysis
85
  """
86
+ bow_results = compare_bow_across_texts(texts, model_names, top_n)
87
+
88
+ # Add similarity metrics to the results
89
+ if len(texts) >= 2 and len(model_names) >= 2:
90
+ # Generate comparison key for first two models
91
+ model1, model2 = model_names[0], model_names[1]
92
+ comparison_key = f"{model1} vs {model2}"
93
+
94
+ # Initialize comparisons dict if needed
95
+ if "comparisons" not in bow_results:
96
+ bow_results["comparisons"] = {}
97
+
98
+ # Initialize comparison entry if needed
99
+ if comparison_key not in bow_results["comparisons"]:
100
+ bow_results["comparisons"][comparison_key] = {}
101
+
102
+ # Calculate similarity metrics
103
+ text1, text2 = texts[0], texts[1]
104
+ metrics = calculate_similarity(text1, text2)
105
+
106
+ # Add metrics to the comparison
107
+ bow_results["comparisons"][comparison_key].update({
108
+ "cosine_similarity": metrics.get("cosine_similarity", 0),
109
+ "jaccard_similarity": metrics.get("jaccard_similarity", 0),
110
+ "semantic_similarity": metrics.get("semantic_similarity", 0)
111
+ })
112
+
113
+ return bow_results