docu-searcher / app.py
nateevo's picture
Add app and embeddings
6e3e882
raw
history blame
2.34 kB
import gradio as gr
from openai.embeddings_utils import get_embedding, cosine_similarity
import openai
import pandas as pd
import numpy as np
openai.api_key = "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"
def get_documentation(query, platform):
embedding = get_embedding(
query,
engine="text-embedding-ada-002")
if platform == "Salesforce Marketing Cloud Intelligence":
df = pd.read_csv("(sfmci)doc_embeddings.csv")
df.ada_search = df.ada_search.apply(
lambda x: np.array(x[1:-1].split(','), dtype=np.float32))
df["similarities"] = df.ada_search.apply(
lambda x: cosine_similarity(x, embedding))
df = df.sort_values("similarities", ascending=False).reset_index()
titles = df['title']
contents = df['body']
links = df['link']
res = []
for i in range(3):
res.append("Título: " + titles[i] + "\n\nContenido: " +
contents[i] + "\n\nURL: " + links[i])
return res[0], res[1], res[2]
elif platform == "Salesforce Marketing Cloud CDP":
df = pd.read_csv("(sfmcdp)doc_embeddings.csv")
df.ada_search = df.ada_search.apply(
lambda x: np.array(x[1:-1].split(','), dtype=np.float32))
df["similarities"] = df.ada_search.apply(
lambda x: cosine_similarity(x, embedding))
df = df.sort_values("similarities", ascending=False).reset_index()
titles = df['title']
contents = df['body']
links = df['link']
res = []
for i in range(3):
res.append("Título: " + titles[i] + "\n\nContenido: " +
contents[i] + "\n\nURL: " + links[i])
return res[0], res[1], res[2]
demo = gr.Interface(
fn=get_documentation,
inputs=[
gr.Textbox(label="Question", lines=3,),
gr.Radio(["Salesforce Marketing Cloud Intelligence",
"Salesforce Marketing Cloud CDP"])
],
outputs=["text", "text", "text"],
title="Salesforce Documentation Search",
# examples=[
# [2, "cat", "park", ["ran", "swam"], True],
# [4, "dog", "zoo", ["ate", "swam"], False],
# [10, "bird", "road", ["ran"], False],
# [8, "cat", "zoo", ["ate"], True],
# ],
)
if __name__ == "__main__":
demo.launch()