Spaces:
Running
Running
Vela
commited on
Commit
·
e3cf75b
1
Parent(s):
60d1a5c
added function for cosine similarity
Browse files
src/api/__pycache__/main.cpython-312.pyc
CHANGED
Binary files a/src/api/__pycache__/main.cpython-312.pyc and b/src/api/__pycache__/main.cpython-312.pyc differ
|
|
src/api/main.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
import os
|
3 |
import sys
|
4 |
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
|
5 |
sys.path.append(src_directory)
|
6 |
from modules import encoding_model
|
|
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
@@ -15,15 +16,33 @@ def home():
|
|
15 |
@app.get("/dimention")
|
16 |
def display_dimention(message : str = "Hello World"):
|
17 |
try:
|
18 |
-
|
19 |
-
|
|
|
20 |
except Exception as e:
|
21 |
return f"Unable to fetch the data {e}"
|
22 |
|
23 |
@app.get("/prediction")
|
24 |
def display_prediction(message : str = "Give me a sms to predict"):
|
25 |
try:
|
26 |
-
prediction = encoding_model.get_prediction(message)
|
27 |
return {"message" : f"Given sms is a {prediction}"}
|
28 |
except Exception as e:
|
29 |
-
return f"Unable to fetch the data {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
import os
|
3 |
import sys
|
4 |
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
|
5 |
sys.path.append(src_directory)
|
6 |
from modules import encoding_model
|
7 |
+
from schemas.schemas import CosineSimilarity
|
8 |
|
9 |
app = FastAPI()
|
10 |
|
|
|
16 |
@app.get("/dimention")
|
17 |
def display_dimention(message : str = "Hello World"):
|
18 |
try:
|
19 |
+
no_of_dimention = encoding_model.get_prediction(message)[0]
|
20 |
+
dimentions = encoding_model.get_prediction(message)[1]
|
21 |
+
return {"message" : {"Prediction":{no_of_dimention:dimentions}}}
|
22 |
except Exception as e:
|
23 |
return f"Unable to fetch the data {e}"
|
24 |
|
25 |
@app.get("/prediction")
|
26 |
def display_prediction(message : str = "Give me a sms to predict"):
|
27 |
try:
|
28 |
+
prediction = encoding_model.get_prediction(message)[2]
|
29 |
return {"message" : f"Given sms is a {prediction}"}
|
30 |
except Exception as e:
|
31 |
+
return f"Unable to fetch the data {e}"
|
32 |
+
|
33 |
+
@app.post("/cosine_similarity")
|
34 |
+
def display_similarity(similarity: CosineSimilarity):
|
35 |
+
try:
|
36 |
+
if not similarity.message_1 or not similarity.message_2:
|
37 |
+
raise HTTPException(status_code=400, detail="Both messages must be non-empty strings.")
|
38 |
+
|
39 |
+
cosine_similarity = encoding_model.get_cosine_similarity(similarity.message_1, similarity.message_2)
|
40 |
+
|
41 |
+
return {
|
42 |
+
"message_1": similarity.message_1,
|
43 |
+
"message_2": similarity.message_2,
|
44 |
+
"cosine_similarity": cosine_similarity
|
45 |
+
}
|
46 |
+
|
47 |
+
except Exception as e:
|
48 |
+
raise HTTPException(status_code=500, detail=f"Unable to calculate cosine similarity: {str(e)}")
|
src/modules/__pycache__/encoding_model.cpython-312.pyc
CHANGED
Binary files a/src/modules/__pycache__/encoding_model.cpython-312.pyc and b/src/modules/__pycache__/encoding_model.cpython-312.pyc differ
|
|
src/modules/encoding_model.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from sentence_transformers import SentenceTransformer
|
2 |
from sklearn.model_selection import train_test_split
|
3 |
from sklearn.linear_model import LogisticRegression
|
4 |
import pandas as pd
|
@@ -28,24 +28,20 @@ def train_model():
|
|
28 |
logreg_model = LogisticRegression(max_iter=100)
|
29 |
logreg_model.fit(X_train_embeddings, y_train)
|
30 |
|
31 |
-
def
|
32 |
if logreg_model is None:
|
33 |
raise ValueError("Model has not been trained yet. Please call train_model first.")
|
34 |
|
35 |
new_embeddings = encoding_model.encode([message])
|
36 |
array = np.array(new_embeddings)[0].tolist()
|
37 |
|
38 |
-
prediction = logreg_model.predict(new_embeddings).tolist()
|
39 |
-
|
40 |
no_of_dimensions = len(new_embeddings[0])
|
41 |
dimension_df = pd.DataFrame(array, columns=["Dimension"])
|
|
|
42 |
|
43 |
-
return
|
44 |
-
|
45 |
-
def get_prediction(message):
|
46 |
-
if logreg_model is None:
|
47 |
-
raise ValueError("Model has not been trained yet. Please call train_model first.")
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer,util
|
2 |
from sklearn.model_selection import train_test_split
|
3 |
from sklearn.linear_model import LogisticRegression
|
4 |
import pandas as pd
|
|
|
28 |
logreg_model = LogisticRegression(max_iter=100)
|
29 |
logreg_model.fit(X_train_embeddings, y_train)
|
30 |
|
31 |
+
def get_prediction(message):
|
32 |
if logreg_model is None:
|
33 |
raise ValueError("Model has not been trained yet. Please call train_model first.")
|
34 |
|
35 |
new_embeddings = encoding_model.encode([message])
|
36 |
array = np.array(new_embeddings)[0].tolist()
|
37 |
|
|
|
|
|
38 |
no_of_dimensions = len(new_embeddings[0])
|
39 |
dimension_df = pd.DataFrame(array, columns=["Dimension"])
|
40 |
+
prediction = logreg_model.predict(new_embeddings).tolist()
|
41 |
|
42 |
+
return no_of_dimensions, dimension_df, prediction
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
def get_cosine_similarity(msg_1: str, msg_2: str):
|
45 |
+
embeddings = encoding_model.encode([msg_1, msg_2])
|
46 |
+
similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
|
47 |
+
return round(similarity, 4)
|
src/schemas/__pycache__/schemas.cpython-312.pyc
ADDED
Binary file (492 Bytes). View file
|
|
src/schemas/schemas.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from pydantic import BaseModel
|
2 |
|
3 |
class CosineSimilarity(BaseModel):
|
4 |
-
|
5 |
-
|
|
|
1 |
from pydantic import BaseModel
|
2 |
|
3 |
class CosineSimilarity(BaseModel):
|
4 |
+
message_1 : str
|
5 |
+
message_2 : str
|