Spaces:

flax-sentence-embeddings
/

sentence-embeddings

Runtime error

App Files Files Community

Trent commited on Jul 18, 2021

Commit

a41bdbc

1 Parent(s): 49438d6

Multi model select and local model loading

Browse files

Files changed (8) hide show

__init__.py +0 -0
app.py +12 -30
backend/__init__.py +0 -0
backend/config.py +1 -0
backend/inference.py +9 -20
backend/main.py +0 -19
backend/utils.py +11 -0
requirements.txt +1 -1

__init__.py ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import streamlit as st
 import pandas as pd
-import base64
-import requests
 st.title('Demo using Flax-Sentence-Tranformers')
@@ -20,12 +21,12 @@ For more cool information on sentence embeddings, see the [sBert project](https:
 Please enjoy!!
 ''')
 anchor = st.text_input(
     'Please enter here the main text you want to compare:'
 )
 if anchor:
     n_texts = st.sidebar.number_input(
         f'''How many texts you want to compare with: '{anchor}'?''',
         value=2,
@@ -34,40 +35,21 @@ if anchor:
     inputs = []
     for i in range(n_texts):
-        input = st.sidebar.text_input(f'Text {i+1}:')
         inputs.append(input)
-api_base_url = 'http://127.0.0.1:8000/similarity'
 if anchor:
     if st.sidebar.button('Tell me the similarity.'):
-        res_distilroberta = requests.get(url = api_base_url, params = dict(anchor = anchor,
-                                                                           inputs = inputs,
-                                                                           model = 'distilroberta'))
-        res_mpnet = requests.get(url = api_base_url, params = dict(anchor = anchor,
-                                                                   inputs = inputs,
-                                                                   model = 'mpnet'))
-        res_minilm_l6 = requests.get(url = api_base_url, params = dict(anchor = anchor,
-                                                                       inputs = inputs,
-                                                                       model = 'minilm_l6'))
-        d_distilroberta = res_distilroberta.json()['dataframe']
-        d_mpnet = res_mpnet.json()['dataframe']
-        d_minilm_l6 = res_minilm_l6.json()['dataframe']
-        index = list(d_distilroberta['inputs'].values())
         df_total = pd.DataFrame(index=index)
-        df_total['distilroberta'] = list(d_distilroberta['score'].values())
-        df_total['mpnet'] = list(d_mpnet['score'].values())
-        df_total['minilm_l6'] = list(d_minilm_l6['score'].values())
-        st.write('Here are the results for our three models:')
         st.write(df_total)
         st.write('Visualize the results of each model:')
         st.area_chart(df_total)

 import streamlit as st
 import pandas as pd
+from backend import inference
+from backend.config import MODELS_ID
 st.title('Demo using Flax-Sentence-Tranformers')
 Please enjoy!!
 ''')
 anchor = st.text_input(
     'Please enter here the main text you want to compare:'
 )
 if anchor:
+    select_models = st.sidebar.multiselect("Choose models", options=MODELS_ID.keys())
     n_texts = st.sidebar.number_input(
         f'''How many texts you want to compare with: '{anchor}'?''',
         value=2,
     inputs = []
     for i in range(n_texts):
+        input = st.sidebar.text_input(f'Text {i + 1}:')
         inputs.append(input)
 if anchor:
     if st.sidebar.button('Tell me the similarity.'):
+        results = {model: inference.text_similarity(anchor, inputs, model) for model in select_models}
+        df_results = {model: results[model] for model in results}
+        index = inputs
         df_total = pd.DataFrame(index=index)
+        for key, value in df_results.items():
+            df_total[key] = list(value['score'].values)
+        st.write('Here are the results for selected models:')
         st.write(df_total)
         st.write('Visualize the results of each model:')
         st.area_chart(df_total)

backend/__init__.py ADDED Viewed

File without changes

backend/config.py CHANGED Viewed

@@ -1,3 +1,4 @@
 MODELS_ID = dict(distilroberta = 'flax-sentence-embeddings/st-codesearch-distilroberta-base',
                  mpnet = 'flax-sentence-embeddings/all_datasets_v3_mpnet-base',
                  minilm_l6 = 'flax-sentence-embeddings/all_datasets_v3_MiniLM-L6')

 MODELS_ID = dict(distilroberta = 'flax-sentence-embeddings/st-codesearch-distilroberta-base',
                  mpnet = 'flax-sentence-embeddings/all_datasets_v3_mpnet-base',
+                 mpnet_qa = 'flax-sentence-embeddings/mpnet_stackexchange_v1',
                  minilm_l6 = 'flax-sentence-embeddings/all_datasets_v3_MiniLM-L6')

backend/inference.py CHANGED Viewed

@@ -1,41 +1,30 @@
-from sentence_transformers import SentenceTransformer
 import pandas as pd
 import jax.numpy as jnp
 from typing import List
-import config
-# We download the models we will be using.
-# If you do not want to use all, you can comment the unused ones.
-distilroberta_model = SentenceTransformer(config.MODELS_ID['distilroberta'])
-mpnet_model = SentenceTransformer(config.MODELS_ID['mpnet'])
-minilm_l6_model = SentenceTransformer(config.MODELS_ID['minilm_l6'])
 # Defining cosine similarity using flax.
 def cos_sim(a, b):
-    return jnp.matmul(a, jnp.transpose(b))/(jnp.linalg.norm(a)*jnp.linalg.norm(b))
 # We get similarity between embeddings.
-def text_similarity(anchor: str, inputs: List[str], model: str = 'distilroberta'):
     # Creating embeddings
-    if model == 'distilroberta':
-        anchor_emb = distilroberta_model.encode(anchor)[None, :]
-        inputs_emb = distilroberta_model.encode([input for input in inputs])
-    elif model == 'mpnet':
-        anchor_emb = mpnet_model.encode(anchor)[None, :]
-        inputs_emb = mpnet_model.encode([input for input in inputs])
-    elif model == 'minilm_l6':
-        anchor_emb = minilm_l6_model.encode(anchor)[None, :]
-        inputs_emb = minilm_l6_model.encode([input for input in inputs])
     # Obtaining similarity
     similarity = list(jnp.squeeze(cos_sim(anchor_emb, inputs_emb)))
     # Returning a Pandas' dataframe
     d = {'inputs': [input for input in inputs],
-         'score': [round(similarity[i],3) for i in range(len(similarity))]}
     df = pd.DataFrame(d, columns=['inputs', 'score'])
     return df.sort_values('score', ascending=False)

 import pandas as pd
 import jax.numpy as jnp
 from typing import List
 # Defining cosine similarity using flax.
+from backend.utils import load_model
 def cos_sim(a, b):
+    return jnp.matmul(a, jnp.transpose(b)) / (jnp.linalg.norm(a) * jnp.linalg.norm(b))
 # We get similarity between embeddings.
+def text_similarity(anchor: str, inputs: List[str], model_name: str):
+    model = load_model(model_name)
     # Creating embeddings
+    anchor_emb = model.encode(anchor)[None, :]
+    inputs_emb = model.encode([input for input in inputs])
     # Obtaining similarity
     similarity = list(jnp.squeeze(cos_sim(anchor_emb, inputs_emb)))
     # Returning a Pandas' dataframe
     d = {'inputs': [input for input in inputs],
+         'score': [round(similarity[i], 3) for i in range(len(similarity))]}
     df = pd.DataFrame(d, columns=['inputs', 'score'])
     return df.sort_values('score', ascending=False)

backend/main.py DELETED Viewed

@@ -1,19 +0,0 @@
-from fastapi import Query, FastAPI
-import config
-import inference
-from typing import List
-app = FastAPI()
-@app.get("/")
-def read_root():
-    return {"message": "Welcome to the API of flax-sentence-embeddings."}
-@app.get('/similarity')
-def get_similarity(anchor: str, inputs: List[str] = Query([]), model: str = 'distilroberta'):
-    return {'dataframe': inference.text_similarity(anchor, inputs, model)}
-#if __name__ == "__main__":
-#    uvicorn.run("main:app", host="0.0.0.0", port=8080)

backend/utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import streamlit as st
+from sentence_transformers import SentenceTransformer
+from .config import MODELS_ID
+@st.cache(allow_output_mutation=True)
+def load_model(model_name):
+    assert model_name in MODELS_ID.keys()
+    # Lazy downloading
+    model = SentenceTransformer(MODELS_ID[model_name])
+    return model

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-fastapi
 sentence_transformers
 pandas
 jax
 streamlit

 sentence_transformers
 pandas
 jax
+jaxlib
 streamlit