Spaces:

GroNLP
/

agalma

Running

App Files Files Community

Mark7549 commited on Mar 12, 2024

Commit

bdf0a5e

1 Parent(s): 169869e

Added cosine similarity front-end

Browse files

Files changed (2) hide show

app.py +24 -6
word2vec.py +30 -7

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ if active_tab == "Nearest neighbours":
     col1, col2 = st.columns(2)
     with st.container():
         with col1:
-            word = st.text_input("Enter a word", placeholder="ἀνήρ")
         with col2:
             time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
@@ -52,14 +52,32 @@ if active_tab == "Nearest neighbours":
                 df = pd.DataFrame(nearest_neighbours, columns=["Word", "Time slice", "Similarity"])
                 st.table(df)
 # Cosine similarity tab
 elif active_tab == "Cosine similarity":
     with st.container():
-        st.write("Cosine similarity tab")
 # 3D graph tab
 elif active_tab == "3D graph":

     col1, col2 = st.columns(2)
     with st.container():
         with col1:
+            word = st.text_input("Enter a word", placeholder="πατήρ")
         with col2:
             time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
                 df = pd.DataFrame(nearest_neighbours, columns=["Word", "Time slice", "Similarity"])
                 st.table(df)
 # Cosine similarity tab
 elif active_tab == "Cosine similarity":
+    col1, col2 = st.columns(2)
+    col3, col4 = st.columns(2)
     with st.container():
+        with col1:
+            word_1 = st.text_input("Enter a word", placeholder="πατήρ")
+        with col2:
+            time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
+    with st.container():
+        with col3:
+            word_2 = st.text_input("Enter a word", placeholder="μήτηρ")
+        with col4:
+            time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
+    # Create button for calculating cosine similarity
+    cosine_similarity_button = st.button("Calculate cosine similarity")
+    # If the button is clicked, execute calculation
+    if cosine_similarity_button:
+        cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
+        st.write(cosine_simularity_score)
 # 3D graph tab
 elif active_tab == "3D graph":

word2vec.py CHANGED Viewed

@@ -104,19 +104,27 @@ def cosine_similarity(vector_a, vector_b):
     return "{:.2f}".format(similarity)
-def get_cosine_similarity(word1, word2, time_slice):
     '''
         Return the cosine similarity of two words
     '''
     # TO DO: MOET NETTER
     # Return if path does not exist
-    if not os.path.exists(f'models/{time_slice}.model'):
-        return
-    model = load_word2vec_model(f'models/{time_slice}.model')
-    dict = model_dictionary(model)
-    return cosine_similarity(dict[word1], dict[word2])
 def get_cosine_similarity_one_word(word, time_slice1, time_slice2):
@@ -163,6 +171,21 @@ def convert_model_to_time_name(model_name):
         return 'Late Roman'
 def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
     '''
         Return the nearest neighbours of a word
@@ -241,7 +264,7 @@ def main():
     late_roman = ('late_roman', load_word2vec_model('models/late_roman_cbow.model'))
     models = [archaic, classical, early_roman, hellen, late_roman]
-    nearest_neighbours = get_nearest_neighbours('πατήρ', archaic[1], models, n=5)
     print(nearest_neighbours)
     # vector = get_word_vector(model, 'ἀνήρ')
     # print(vector)

     return "{:.2f}".format(similarity)
+def get_cosine_similarity(word1, time_slice_1, word2, time_slice_2):
     '''
         Return the cosine similarity of two words
     '''
     # TO DO: MOET NETTER
     # Return if path does not exist
+    time_slice_1 = convert_time_name_to_model(time_slice_1)
+    time_slice_2 = convert_time_name_to_model(time_slice_2)
+    if not os.path.exists(f'models/{time_slice_1}.model'):
+        return
+    model_1 = load_word2vec_model(f'models/{time_slice_1}.model')
+    model_2 = load_word2vec_model(f'models/{time_slice_2}.model')
+    dict_1 = model_dictionary(model_1)
+    dict_2 = model_dictionary(model_2)
+    return cosine_similarity(dict_1[word1], dict_2[word2])
 def get_cosine_similarity_one_word(word, time_slice1, time_slice2):
         return 'Late Roman'
+def convert_time_name_to_model(time_name):
+    '''
+        Convert the time slice name to the model name
+    '''
+    if time_name == 'Archaic':
+        return 'archaic_cbow'
+    elif time_name == 'Classical':
+        return 'classical_cbow'
+    elif time_name == 'Early Roman':
+        return 'early_roman_cbow'
+    elif time_name == 'Hellenistic':
+        return 'hellen_cbow'
+    elif time_name == 'Late Roman':
+        return 'late_roman_cbow'
 def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
     '''
         Return the nearest neighbours of a word
     late_roman = ('late_roman', load_word2vec_model('models/late_roman_cbow.model'))
     models = [archaic, classical, early_roman, hellen, late_roman]
+    nearest_neighbours = get_nearest_neighbours('πατήρ', 'archaic_cbow', n=5)
     print(nearest_neighbours)
     # vector = get_word_vector(model, 'ἀνήρ')
     # print(vector)