Spaces:
Sleeping
Sleeping
Commit
·
b30d739
1
Parent(s):
6c6e6a2
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import pandas as pd
|
3 |
+
import torch
|
4 |
+
from datasets import load_dataset
|
5 |
+
from sentence_transformers.util import semantic_search
|
6 |
+
|
7 |
+
|
8 |
+
# Installable
|
9 |
+
# pip install datasets
|
10 |
+
# !pip install retry
|
11 |
+
# !pip install -U sentence-transformers
|
12 |
+
|
13 |
+
|
14 |
+
texts = ["How do I get a replacement Medicare card?",
|
15 |
+
"What is the monthly premium for Medicare Part B?",
|
16 |
+
"How do I terminate my Medicare Part B (medical insurance)?",
|
17 |
+
"How do I sign up for Medicare?",
|
18 |
+
"Can I sign up for Medicare Part B if I am working and have health insurance through an employer?",
|
19 |
+
"How do I sign up for Medicare Part B if I already have Part A?",
|
20 |
+
"What are Medicare late enrollment penalties?",
|
21 |
+
"What is Medicare and who can get it?",
|
22 |
+
"How can I get help with my Medicare Part A and Part B premiums?",
|
23 |
+
"What are the different parts of Medicare?",
|
24 |
+
"Will my Medicare premiums be higher because of my higher income?",
|
25 |
+
"What is TRICARE ?",
|
26 |
+
"Should I sign up for Medicare Part B if I have Veterans' Benefits?"]
|
27 |
+
|
28 |
+
model_id = "sentence-transformers/all-MiniLM-L6-v2"
|
29 |
+
hf_token = "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"
|
30 |
+
|
31 |
+
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
|
32 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
33 |
+
|
34 |
+
# def query(texts):
|
35 |
+
# response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
|
36 |
+
# return response.json()
|
37 |
+
|
38 |
+
@retry(tries=3, delay=10)
|
39 |
+
def query(texts):
|
40 |
+
response = requests.post(api_url, headers=headers, json={"inputs": texts})
|
41 |
+
result = response.json()
|
42 |
+
if isinstance(result, list):
|
43 |
+
return result
|
44 |
+
elif list(result.keys())[0] == "error":
|
45 |
+
raise RuntimeError(
|
46 |
+
"The model is currently loading, please re-run the query."
|
47 |
+
)
|
48 |
+
|
49 |
+
output = (dict(inputs = texts))
|
50 |
+
|
51 |
+
embeddings = pd.DataFrame(output)
|
52 |
+
embeddings.to_csv("embeddings.csv", index=False)
|
53 |
+
|
54 |
+
|
55 |
+
faqs_embeddings = load_dataset('ITESM/embedded_faqs_medicare')
|
56 |
+
dataset_embeddings = torch.from_numpy(faqs_embeddings["train"].to_pandas().to_numpy()).to(torch.float)
|
57 |
+
|
58 |
+
question = ["How can Medicare help me?"]
|
59 |
+
output = query(question)
|
60 |
+
|
61 |
+
|
62 |
+
query_embeddings = torch.FloatTensor(output)
|
63 |
+
print(f"The size of our embedded dataset is {dataset_embeddings.shape} and of our embedded query is {query_embeddings.shape}.")
|
64 |
+
|
65 |
+
# Search top 5 matching query
|
66 |
+
|
67 |
+
hits = semantic_search(query_embeddings, dataset_embeddings, top_k=5)
|
68 |
+
print([texts[hits[0][i]['corpus_id']] for i in range(len(hits[0]))])
|