Vela commited on
Commit
e3cf75b
·
1 Parent(s): 60d1a5c

added function for cosine similarity

Browse files
src/api/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/src/api/__pycache__/main.cpython-312.pyc and b/src/api/__pycache__/main.cpython-312.pyc differ
 
src/api/main.py CHANGED
@@ -1,9 +1,10 @@
1
- from fastapi import FastAPI
2
  import os
3
  import sys
4
  src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
5
  sys.path.append(src_directory)
6
  from modules import encoding_model
 
7
 
8
  app = FastAPI()
9
 
@@ -15,15 +16,33 @@ def home():
15
  @app.get("/dimention")
16
  def display_dimention(message : str = "Hello World"):
17
  try:
18
- dimention = encoding_model.get_label(message)
19
- return dimention
 
20
  except Exception as e:
21
  return f"Unable to fetch the data {e}"
22
 
23
  @app.get("/prediction")
24
  def display_prediction(message : str = "Give me a sms to predict"):
25
  try:
26
- prediction = encoding_model.get_prediction(message)
27
  return {"message" : f"Given sms is a {prediction}"}
28
  except Exception as e:
29
- return f"Unable to fetch the data {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
  import os
3
  import sys
4
  src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
5
  sys.path.append(src_directory)
6
  from modules import encoding_model
7
+ from schemas.schemas import CosineSimilarity
8
 
9
  app = FastAPI()
10
 
 
16
  @app.get("/dimention")
17
  def display_dimention(message : str = "Hello World"):
18
  try:
19
+ no_of_dimention = encoding_model.get_prediction(message)[0]
20
+ dimentions = encoding_model.get_prediction(message)[1]
21
+ return {"message" : {"Prediction":{no_of_dimention:dimentions}}}
22
  except Exception as e:
23
  return f"Unable to fetch the data {e}"
24
 
25
  @app.get("/prediction")
26
  def display_prediction(message : str = "Give me a sms to predict"):
27
  try:
28
+ prediction = encoding_model.get_prediction(message)[2]
29
  return {"message" : f"Given sms is a {prediction}"}
30
  except Exception as e:
31
+ return f"Unable to fetch the data {e}"
32
+
33
+ @app.post("/cosine_similarity")
34
+ def display_similarity(similarity: CosineSimilarity):
35
+ try:
36
+ if not similarity.message_1 or not similarity.message_2:
37
+ raise HTTPException(status_code=400, detail="Both messages must be non-empty strings.")
38
+
39
+ cosine_similarity = encoding_model.get_cosine_similarity(similarity.message_1, similarity.message_2)
40
+
41
+ return {
42
+ "message_1": similarity.message_1,
43
+ "message_2": similarity.message_2,
44
+ "cosine_similarity": cosine_similarity
45
+ }
46
+
47
+ except Exception as e:
48
+ raise HTTPException(status_code=500, detail=f"Unable to calculate cosine similarity: {str(e)}")
src/modules/__pycache__/encoding_model.cpython-312.pyc CHANGED
Binary files a/src/modules/__pycache__/encoding_model.cpython-312.pyc and b/src/modules/__pycache__/encoding_model.cpython-312.pyc differ
 
src/modules/encoding_model.py CHANGED
@@ -1,4 +1,4 @@
1
- from sentence_transformers import SentenceTransformer
2
  from sklearn.model_selection import train_test_split
3
  from sklearn.linear_model import LogisticRegression
4
  import pandas as pd
@@ -28,24 +28,20 @@ def train_model():
28
  logreg_model = LogisticRegression(max_iter=100)
29
  logreg_model.fit(X_train_embeddings, y_train)
30
 
31
- def get_label(message):
32
  if logreg_model is None:
33
  raise ValueError("Model has not been trained yet. Please call train_model first.")
34
 
35
  new_embeddings = encoding_model.encode([message])
36
  array = np.array(new_embeddings)[0].tolist()
37
 
38
- prediction = logreg_model.predict(new_embeddings).tolist()
39
-
40
  no_of_dimensions = len(new_embeddings[0])
41
  dimension_df = pd.DataFrame(array, columns=["Dimension"])
 
42
 
43
- return {"Prediction_Dimension": {no_of_dimensions: dimension_df}}
44
-
45
- def get_prediction(message):
46
- if logreg_model is None:
47
- raise ValueError("Model has not been trained yet. Please call train_model first.")
48
 
49
- new_embeddings = encoding_model.encode([message])
50
- prediction = logreg_model.predict(new_embeddings).tolist()
51
- return prediction
 
 
1
+ from sentence_transformers import SentenceTransformer,util
2
  from sklearn.model_selection import train_test_split
3
  from sklearn.linear_model import LogisticRegression
4
  import pandas as pd
 
28
  logreg_model = LogisticRegression(max_iter=100)
29
  logreg_model.fit(X_train_embeddings, y_train)
30
 
31
+ def get_prediction(message):
32
  if logreg_model is None:
33
  raise ValueError("Model has not been trained yet. Please call train_model first.")
34
 
35
  new_embeddings = encoding_model.encode([message])
36
  array = np.array(new_embeddings)[0].tolist()
37
 
 
 
38
  no_of_dimensions = len(new_embeddings[0])
39
  dimension_df = pd.DataFrame(array, columns=["Dimension"])
40
+ prediction = logreg_model.predict(new_embeddings).tolist()
41
 
42
+ return no_of_dimensions, dimension_df, prediction
 
 
 
 
43
 
44
+ def get_cosine_similarity(msg_1: str, msg_2: str):
45
+ embeddings = encoding_model.encode([msg_1, msg_2])
46
+ similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
47
+ return round(similarity, 4)
src/schemas/__pycache__/schemas.cpython-312.pyc ADDED
Binary file (492 Bytes). View file
 
src/schemas/schemas.py CHANGED
@@ -1,5 +1,5 @@
1
  from pydantic import BaseModel
2
 
3
  class CosineSimilarity(BaseModel):
4
- text_1 : str
5
- text_1 : str
 
1
  from pydantic import BaseModel
2
 
3
  class CosineSimilarity(BaseModel):
4
+ message_1 : str
5
+ message_2 : str