Spaces:

OrganizedProgrammers
/

Search-Technologies-API

Sleeping

ALLOUNE commited on 25 days ago

Commit

62151ed

1 Parent(s): 290a90d

add purpose search

Files changed (2) hide show

main.py CHANGED Viewed

@@ -28,6 +28,7 @@ dataset = load_dataset("heymenn/Technologies", streaming=True, split="train")
 class SearchInput(BaseModel):
     title: str
 class SearchOutput(BaseModel):
     title: str
@@ -53,7 +54,7 @@ def post_search(payload: SearchInput):
     """
     Endpoint that returns a search result.
     """
-    config = {"dataset": dataset, "model": model}
     res = search_and_retrieve(payload.title, config)
     return res
@@ -63,7 +64,7 @@ def post_generate_and_push(payload: GenerateInput):
     Endpoint to generate a technology and push it to the dataset
     """
-    config = {"dataset": dataset, "model": model}
     res = search_and_retrieve(payload.title, config)
     if res["score"] >= 0.7 and not payload.force:
         raise HTTPException(status_code=500, detail=f"Cannot generate the technology a high score of {res['score']} have been found for the technology : {res['title']}")

 class SearchInput(BaseModel):
     title: str
+    type: str = "title"
 class SearchOutput(BaseModel):
     title: str
     """
     Endpoint that returns a search result.
     """
+    config = {"dataset": dataset, "model": model, "type": payload.type}
     res = search_and_retrieve(payload.title, config)
     return res
     Endpoint to generate a technology and push it to the dataset
     """
+    config = {"dataset": dataset, "model": model, "type": "title"}
     res = search_and_retrieve(payload.title, config)
     if res["score"] >= 0.7 and not payload.force:
         raise HTTPException(status_code=500, detail=f"Cannot generate the technology a high score of {res['score']} have been found for the technology : {res['title']}")

src/processor.py CHANGED Viewed

@@ -18,8 +18,11 @@ def search_and_retrieve(user_input, config):
         purpose = row["purpose"]
         cosim = model.similarity(row["embeddings"], user_embedding)
-        token_set_ratio = fuzz.token_set_ratio(user_input, name)
         fuzzy_score = token_set_ratio / 100
         alpha = 0.6
         combined_score = alpha * cosim + (1 - alpha) * fuzzy_score
@@ -77,9 +80,11 @@ def generate_tech(user_input, user_instructions):
     <USER_INPUT>
     {user_input}
-    </USER_INPUT>
     """
     client = Client(api_key=os.getenv("GEMINI_API_KEY"))
     # Define the grounding tool
@@ -111,4 +116,4 @@ def send_to_dataset(data, model):
     dataset = load_dataset("heymenn/Technologies", split="train")
     updated_dataset = dataset.add_item(data)
-    updated_dataset.push_to_hub("heymenn/Technologies")

         purpose = row["purpose"]
         cosim = model.similarity(row["embeddings"], user_embedding)
+        if config["type"] == "purpose":
+            token_set_ratio = fuzz.token_set_ratio(user_input, purpose)
+        else:
+            token_set_ratio = fuzz.token_set_ratio(user_input, name)
         fuzzy_score = token_set_ratio / 100
         alpha = 0.6
         combined_score = alpha * cosim + (1 - alpha) * fuzzy_score
     <USER_INPUT>
     {user_input}
+    </USER_INPUT>
     """
+    client = Client(api_key=os.getenv("GEMINI_API_KEY"))
     client = Client(api_key=os.getenv("GEMINI_API_KEY"))
     # Define the grounding tool
     dataset = load_dataset("heymenn/Technologies", split="train")
     updated_dataset = dataset.add_item(data)
+    updated_dataset.push_to_hub("heymenn/Technologies")