Spaces:
Runtime error
Runtime error
File size: 2,477 Bytes
e112463 05dabf4 e112463 05dabf4 e112463 05dabf4 e112463 05dabf4 e112463 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import logging
import pickle
import numpy as np
import openai
import pandas as pd
from buster.docparser import EMBEDDING_MODEL
from openai.embeddings_utils import cosine_similarity, get_embedding
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# search through the reviews for a specific product
def rank_documents(df: pd.DataFrame, query: str, top_k: int = 3) -> pd.DataFrame:
product_embedding = get_embedding(
query,
engine=EMBEDDING_MODEL,
)
df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))
if top_k == -1:
# return all results
n = len(df)
results = df.sort_values("similarity", ascending=False).head(top_k)
return results
def engineer_prompt(question: str, documents: list[str]) -> str:
return " ".join(documents) + "\nNow answer the following question:\n" + question
def get_gpt_response(question: str, df) -> str:
# rank the documents, get the highest scoring doc and generate the prompt
candidates = rank_documents(df, query=question, top_k=1)
documents = candidates.text.to_list()
prompt = engineer_prompt(question, documents)
logger.info(f"querying GPT...")
logger.info(f"User Question:\n{question}")
# Call the API to generate a response
try:
response = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=200,
# temperature=0,
# top_p=0,
frequency_penalty=1,
presence_penalty=1,
)
# Get the response text
response_text = response["choices"][0]["text"]
logger.info(
f"""
GPT Response:\n{response_text}
"""
)
return response_text
except Exception as e:
import traceback
logging.error(traceback.format_exc())
return "Oops, something went wrong. Try again later!"
def load_embeddings(path: str) -> pd.DataFrame:
logger.info(f"loading embeddings from {path}...")
df = pd.read_csv(path)
df["embedding"] = df.embedding.apply(eval).apply(np.array)
logger.info(f"embeddings loaded.")
return df
if __name__ == "__main__":
# we generate the embeddings using docparser.py
df = load_embeddings("data/document_embeddings.csv")
question = "Where should I put my datasets when I am running a job?"
response = get_gpt_response(question, df)
|