jharrison27 commited on
Commit
fce83cf
·
1 Parent(s): 9097dd4

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +38 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from sklearn.cluster import KMeans
4
+ import numpy as np
5
+
6
+ # Mock data
7
+ mock_words = [
8
+ "apple", "banana", "cherry", "date", # Fruits
9
+ "car", "truck", "bus", "bicycle", # Vehicles
10
+ "red", "blue", "green", "yellow", # Colors
11
+ "cat", "dog", "rabbit", "hamster" # Pets
12
+ ]
13
+
14
+ # Embedding model
15
+ embedder = pipeline('feature-extraction', model='distilbert-base-uncased')
16
+
17
+ def embed_words(words):
18
+ embeddings = embedder(words)
19
+ return np.array([np.mean(embedding[0], axis=0) for embedding in embeddings])
20
+
21
+ def cluster_words(words):
22
+ embeddings = embed_words(words)
23
+ kmeans = KMeans(n_clusters=4, random_state=0).fit(embeddings)
24
+ clusters = {i: [] for i in range(4)}
25
+ for word, label in zip(words, kmeans.labels_):
26
+ clusters[label].append(word)
27
+ return clusters
28
+
29
+ def main():
30
+ st.title("NYT Connections Solver")
31
+
32
+ if st.button("Generate Clusters"):
33
+ clusters = cluster_words(mock_words)
34
+ for i, words in clusters.items():
35
+ st.write(f"Group {i+1}: {', '.join(words)}")
36
+
37
+ if __name__ == "__main__":
38
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ scikit-learn
3
+ streamlit