Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
e633a26
1
Parent(s):
fecdfa0
update
Browse files- app.py +4 -2
- processors/__init__.py +26 -5
- processors/bow_analysis.py +83 -195
app.py
CHANGED
@@ -233,19 +233,21 @@ def create_app():
|
|
233 |
similarity_text = "No similarity metrics found"
|
234 |
comparisons = bow_results.get("comparisons", {})
|
235 |
comparison_key = f"{model1_name} vs {model2_name}"
|
236 |
-
|
237 |
if comparison_key in comparisons:
|
238 |
metrics = comparisons[comparison_key]
|
239 |
cosine = metrics.get("cosine_similarity", 0)
|
240 |
jaccard = metrics.get("jaccard_similarity", 0)
|
|
|
241 |
common_words = metrics.get("common_word_count", 0)
|
242 |
|
243 |
similarity_text = f"""
|
244 |
- **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
|
245 |
- **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
|
|
|
246 |
- **Common Words**: {common_words} words appear in both responses
|
247 |
"""
|
248 |
-
|
249 |
# Return all updated component values
|
250 |
return (
|
251 |
analysis_results, # analysis_results_state
|
|
|
233 |
similarity_text = "No similarity metrics found"
|
234 |
comparisons = bow_results.get("comparisons", {})
|
235 |
comparison_key = f"{model1_name} vs {model2_name}"
|
236 |
+
|
237 |
if comparison_key in comparisons:
|
238 |
metrics = comparisons[comparison_key]
|
239 |
cosine = metrics.get("cosine_similarity", 0)
|
240 |
jaccard = metrics.get("jaccard_similarity", 0)
|
241 |
+
semantic = metrics.get("semantic_similarity", 0) # Add semantic similarity
|
242 |
common_words = metrics.get("common_word_count", 0)
|
243 |
|
244 |
similarity_text = f"""
|
245 |
- **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
|
246 |
- **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
|
247 |
+
- **Semantic Similarity**: {semantic:.2f} (higher means more similar meaning)
|
248 |
- **Common Words**: {common_words} words appear in both responses
|
249 |
"""
|
250 |
+
|
251 |
# Return all updated component values
|
252 |
return (
|
253 |
analysis_results, # analysis_results_state
|
processors/__init__.py
CHANGED
@@ -1,8 +1,29 @@
|
|
|
|
|
|
|
|
1 |
# processors/__init__.py
|
2 |
-
#
|
3 |
|
4 |
-
#
|
5 |
-
#
|
|
|
6 |
|
7 |
-
#
|
8 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Implementation of the processors package structure to ensure metrics.py is properly integrated
|
3 |
+
"""
|
4 |
# processors/__init__.py
|
5 |
+
# This file ensures the processors directory is treated as a Python package
|
6 |
|
7 |
+
# processors/metrics.py
|
8 |
+
# This file is already included in your project, but we need to make sure it's properly imported
|
9 |
+
# The path should be: processors/metrics.py
|
10 |
|
11 |
+
# processors/bow_analysis.py
|
12 |
+
# This is your existing file with the updated code to include similarity metrics
|
13 |
+
|
14 |
+
# Ensure the package structure is correct:
|
15 |
+
# - Project directory/
|
16 |
+
# - processors/
|
17 |
+
# - __init__.py
|
18 |
+
# - metrics.py
|
19 |
+
# - bow_analysis.py
|
20 |
+
|
21 |
+
# Here's a quick implementation of the __init__.py file:
|
22 |
+
"""
|
23 |
+
LLM Response Comparator processor modules
|
24 |
+
"""
|
25 |
+
# Import key functions to make them available from the package
|
26 |
+
from processors.metrics import calculate_similarity
|
27 |
+
from processors.bow_analysis import compare_bow
|
28 |
+
|
29 |
+
# You can add more imports as needed when implementing other analysis types
|
processors/bow_analysis.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
from sklearn.feature_extraction.text import CountVectorizer
|
2 |
import numpy as np
|
3 |
from collections import Counter
|
@@ -6,209 +9,67 @@ import nltk
|
|
6 |
from nltk.corpus import stopwords
|
7 |
from nltk.stem import WordNetLemmatizer
|
8 |
from nltk.tokenize import word_tokenize
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
nltk.download('punkt')
|
15 |
-
|
16 |
-
try:
|
17 |
-
nltk.data.find('corpora/stopwords')
|
18 |
-
except LookupError:
|
19 |
-
nltk.download('stopwords')
|
20 |
-
|
21 |
-
try:
|
22 |
-
nltk.data.find('corpora/wordnet')
|
23 |
-
except LookupError:
|
24 |
-
nltk.download('wordnet')
|
25 |
-
|
26 |
-
def preprocess_text(text):
|
27 |
-
"""
|
28 |
-
Preprocess text for bag of words analysis
|
29 |
-
|
30 |
-
Args:
|
31 |
-
text (str): Input text
|
32 |
-
|
33 |
-
Returns:
|
34 |
-
str: Preprocessed text
|
35 |
-
"""
|
36 |
-
# Convert to lowercase
|
37 |
-
text = text.lower()
|
38 |
-
|
39 |
-
# Remove special characters and digits
|
40 |
-
text = re.sub(r'[^a-zA-Z\s]', '', text)
|
41 |
-
|
42 |
-
# Tokenize
|
43 |
-
tokens = word_tokenize(text)
|
44 |
-
|
45 |
-
# Remove stopwords
|
46 |
-
stop_words = set(stopwords.words('english'))
|
47 |
-
tokens = [token for token in tokens if token not in stop_words]
|
48 |
-
|
49 |
-
# Lemmatize
|
50 |
-
lemmatizer = WordNetLemmatizer()
|
51 |
-
tokens = [lemmatizer.lemmatize(token) for token in tokens]
|
52 |
-
|
53 |
-
# Filter out short words (likely not meaningful)
|
54 |
-
tokens = [token for token in tokens if len(token) > 2]
|
55 |
-
|
56 |
-
# Join back to string
|
57 |
-
return ' '.join(tokens)
|
58 |
-
|
59 |
-
def create_bow(text):
|
60 |
-
"""
|
61 |
-
Create bag of words representation
|
62 |
-
|
63 |
-
Args:
|
64 |
-
text (str): Input text
|
65 |
-
|
66 |
-
Returns:
|
67 |
-
dict: Bag of words representation with word counts
|
68 |
-
"""
|
69 |
-
# Preprocess text
|
70 |
-
preprocessed_text = preprocess_text(text)
|
71 |
-
|
72 |
-
# Tokenize
|
73 |
-
tokens = preprocessed_text.split()
|
74 |
-
|
75 |
-
# Count occurrences
|
76 |
-
word_counts = Counter(tokens)
|
77 |
-
|
78 |
-
return dict(word_counts)
|
79 |
-
|
80 |
-
def compare_bow(bow1, bow2):
|
81 |
-
"""
|
82 |
-
Compare two bag of words representations
|
83 |
-
|
84 |
-
Args:
|
85 |
-
bow1 (dict): First bag of words
|
86 |
-
bow2 (dict): Second bag of words
|
87 |
-
|
88 |
-
Returns:
|
89 |
-
dict: Comparison metrics
|
90 |
-
"""
|
91 |
-
# Get all unique words
|
92 |
-
all_words = set(bow1.keys()).union(set(bow2.keys()))
|
93 |
-
|
94 |
-
# Words in both
|
95 |
-
common_words = set(bow1.keys()).intersection(set(bow2.keys()))
|
96 |
-
|
97 |
-
# Words unique to each
|
98 |
-
unique_to_1 = set(bow1.keys()) - set(bow2.keys())
|
99 |
-
unique_to_2 = set(bow2.keys()) - set(bow1.keys())
|
100 |
-
|
101 |
-
# Calculate Jaccard similarity
|
102 |
-
jaccard = len(common_words) / len(all_words) if len(all_words) > 0 else 0
|
103 |
-
|
104 |
-
# Calculate cosine similarity
|
105 |
-
vec1 = np.zeros(len(all_words))
|
106 |
-
vec2 = np.zeros(len(all_words))
|
107 |
-
|
108 |
-
for i, word in enumerate(all_words):
|
109 |
-
vec1[i] = bow1.get(word, 0)
|
110 |
-
vec2[i] = bow2.get(word, 0)
|
111 |
-
|
112 |
-
# Normalize vectors
|
113 |
-
norm1 = np.linalg.norm(vec1)
|
114 |
-
norm2 = np.linalg.norm(vec2)
|
115 |
-
|
116 |
-
if norm1 == 0 or norm2 == 0:
|
117 |
-
cosine = 0
|
118 |
-
else:
|
119 |
-
cosine = np.dot(vec1, vec2) / (norm1 * norm2)
|
120 |
-
|
121 |
-
return {
|
122 |
-
"jaccard_similarity": jaccard,
|
123 |
-
"cosine_similarity": cosine,
|
124 |
-
"common_word_count": len(common_words),
|
125 |
-
"unique_to_first": list(unique_to_1)[:20], # Limit for readability
|
126 |
-
"unique_to_second": list(unique_to_2)[:20] # Limit for readability
|
127 |
-
}
|
128 |
-
|
129 |
-
def important_words(bow, top_n=10):
|
130 |
-
"""
|
131 |
-
Extract most important/distinctive words
|
132 |
-
|
133 |
-
Args:
|
134 |
-
bow (dict): Bag of words representation
|
135 |
-
top_n (int): Number of top words to return
|
136 |
-
|
137 |
-
Returns:
|
138 |
-
list: Top words with counts
|
139 |
-
"""
|
140 |
-
# Sort by count
|
141 |
-
sorted_words = sorted(bow.items(), key=lambda x: x[1], reverse=True)
|
142 |
-
|
143 |
-
# Return top N
|
144 |
-
return [{"word": word, "count": count} for word, count in sorted_words[:top_n]]
|
145 |
|
146 |
-
def
|
147 |
"""
|
148 |
-
|
149 |
|
150 |
Args:
|
151 |
-
|
|
|
152 |
model_names (list): List of model names corresponding to responses
|
153 |
-
top_n (int): Number of top words to include
|
154 |
|
155 |
Returns:
|
156 |
-
dict:
|
157 |
"""
|
158 |
-
#
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
#
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
# Format results
|
202 |
-
result = {
|
203 |
-
"model_word_counts": model_bows,
|
204 |
-
"important_words": model_important_words,
|
205 |
-
"comparisons": comparisons,
|
206 |
-
"differential_words": differential_words,
|
207 |
-
"word_count_matrix": {word: word_count_matrix[word] for word in differential_words},
|
208 |
-
"models": model_names
|
209 |
-
}
|
210 |
-
|
211 |
-
return result
|
212 |
|
213 |
def compare_bow(texts, model_names, top_n=25):
|
214 |
"""
|
@@ -222,4 +83,31 @@ def compare_bow(texts, model_names, top_n=25):
|
|
222 |
Returns:
|
223 |
dict: Comparative analysis
|
224 |
"""
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Updated bow_analysis.py to include similarity metrics
|
3 |
+
"""
|
4 |
from sklearn.feature_extraction.text import CountVectorizer
|
5 |
import numpy as np
|
6 |
from collections import Counter
|
|
|
9 |
from nltk.corpus import stopwords
|
10 |
from nltk.stem import WordNetLemmatizer
|
11 |
from nltk.tokenize import word_tokenize
|
12 |
+
from processors.metrics import calculate_similarity
|
13 |
|
14 |
+
"""
|
15 |
+
Implementation of the similarity metrics integration for LLM Response Comparator
|
16 |
+
"""
|
17 |
+
from processors.metrics import calculate_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
def add_similarity_metrics(bow_results, response_texts, model_names):
|
20 |
"""
|
21 |
+
Add similarity metrics to the bag of words analysis results
|
22 |
|
23 |
Args:
|
24 |
+
bow_results (dict): The bag of words analysis results
|
25 |
+
response_texts (list): List of response texts to compare
|
26 |
model_names (list): List of model names corresponding to responses
|
|
|
27 |
|
28 |
Returns:
|
29 |
+
dict: Updated bag of words results with similarity metrics
|
30 |
"""
|
31 |
+
# Make sure we have at least two responses to compare
|
32 |
+
if len(response_texts) < 2 or len(model_names) < 2:
|
33 |
+
print("Need at least two responses to calculate similarity metrics")
|
34 |
+
return bow_results
|
35 |
+
|
36 |
+
# Get the first two responses (current implementation only handles two-way comparisons)
|
37 |
+
text1, text2 = response_texts[0], response_texts[1]
|
38 |
+
model1, model2 = model_names[0], model_names[1]
|
39 |
+
|
40 |
+
# Generate the comparison key
|
41 |
+
comparison_key = f"{model1} vs {model2}"
|
42 |
+
|
43 |
+
# Initialize comparisons if needed
|
44 |
+
if "comparisons" not in bow_results:
|
45 |
+
bow_results["comparisons"] = {}
|
46 |
+
|
47 |
+
# Initialize the comparison entry if needed
|
48 |
+
if comparison_key not in bow_results["comparisons"]:
|
49 |
+
bow_results["comparisons"][comparison_key] = {}
|
50 |
+
|
51 |
+
# Calculate similarity metrics
|
52 |
+
metrics = calculate_similarity(text1, text2)
|
53 |
+
|
54 |
+
# Add metrics to the comparison
|
55 |
+
bow_results["comparisons"][comparison_key].update({
|
56 |
+
"cosine_similarity": metrics.get("cosine_similarity", 0),
|
57 |
+
"jaccard_similarity": metrics.get("jaccard_similarity", 0),
|
58 |
+
"semantic_similarity": metrics.get("semantic_similarity", 0)
|
59 |
+
})
|
60 |
+
|
61 |
+
# If we have common_word_count from BOW analysis, keep it
|
62 |
+
if "common_word_count" not in bow_results["comparisons"][comparison_key]:
|
63 |
+
# Calculate from bow data as a fallback
|
64 |
+
if "important_words" in bow_results:
|
65 |
+
words1 = set([item["word"] for item in bow_results["important_words"].get(model1, [])])
|
66 |
+
words2 = set([item["word"] for item in bow_results["important_words"].get(model2, [])])
|
67 |
+
common_words = words1.intersection(words2)
|
68 |
+
bow_results["comparisons"][comparison_key]["common_word_count"] = len(common_words)
|
69 |
+
|
70 |
+
return bow_results
|
71 |
+
|
72 |
+
# All existing imports and preprocessing functions remain unchanged
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
def compare_bow(texts, model_names, top_n=25):
|
75 |
"""
|
|
|
83 |
Returns:
|
84 |
dict: Comparative analysis
|
85 |
"""
|
86 |
+
bow_results = compare_bow_across_texts(texts, model_names, top_n)
|
87 |
+
|
88 |
+
# Add similarity metrics to the results
|
89 |
+
if len(texts) >= 2 and len(model_names) >= 2:
|
90 |
+
# Generate comparison key for first two models
|
91 |
+
model1, model2 = model_names[0], model_names[1]
|
92 |
+
comparison_key = f"{model1} vs {model2}"
|
93 |
+
|
94 |
+
# Initialize comparisons dict if needed
|
95 |
+
if "comparisons" not in bow_results:
|
96 |
+
bow_results["comparisons"] = {}
|
97 |
+
|
98 |
+
# Initialize comparison entry if needed
|
99 |
+
if comparison_key not in bow_results["comparisons"]:
|
100 |
+
bow_results["comparisons"][comparison_key] = {}
|
101 |
+
|
102 |
+
# Calculate similarity metrics
|
103 |
+
text1, text2 = texts[0], texts[1]
|
104 |
+
metrics = calculate_similarity(text1, text2)
|
105 |
+
|
106 |
+
# Add metrics to the comparison
|
107 |
+
bow_results["comparisons"][comparison_key].update({
|
108 |
+
"cosine_similarity": metrics.get("cosine_similarity", 0),
|
109 |
+
"jaccard_similarity": metrics.get("jaccard_similarity", 0),
|
110 |
+
"semantic_similarity": metrics.get("semantic_similarity", 0)
|
111 |
+
})
|
112 |
+
|
113 |
+
return bow_results
|