Spaces:
Runtime error
Runtime error
File size: 3,488 Bytes
bed05fc 79ad113 ecb8e22 bed05fc 79ad113 bed05fc 79ad113 cd8cf8a 79ad113 e8d06a3 ecb8e22 bed05fc 79ad113 cd8cf8a bed05fc d332383 bed05fc 79ad113 bed05fc 79ad113 bed05fc 17f29f2 79ad113 17f29f2 79ad113 17f29f2 79ad113 bed05fc 79ad113 bed05fc 9675a20 bed05fc 17f29f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import weaviate
import langchain
import gradio as gr
from langchain.embeddings import CohereEmbeddings
from langchain.document_loaders import UnstructuredFileLoader
from langchain.vectorstores import Weaviate
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
import os
import urllib.request
import ssl
import mimetypes
from dotenv import load_dotenv
import cohere
# Load environment variables
load_dotenv()
openai_api_key = os.getenv('OPENAI')
cohere_api_key = os.getenv('COHERE')
weaviate_api_key = os.getenv('WEAVIATE')
weaviate_url = os.getenv('WEAVIATE_URL')
# Weaviate connection
auth_config = weaviate.auth.AuthApiKey(api_key=weaviate_api_key)
client = weaviate.Client(url=weaviate_url, auth_client_secret=auth_config,
additional_headers={"X-Cohere-Api-Key": cohere_api_key})
# Initialize vectorstore
vectorstore = Weaviate(client, index_name="HereChat", text_key="text")
vectorstore._query_attrs = ["text", "title", "url", "views", "lang", "_additional {distance}"]
vectorstore.embedding = CohereEmbeddings(model="embed-multilingual-v2.0", cohere_api_key=cohere_api_key)
# Initialize Cohere client
co = cohere.Client(api_key=cohere_api_key)
def embed_pdf(file, collection_name):
# Save the uploaded file
filename = file.name
file_path = os.path.join('./', filename)
with open(file_path, 'wb') as f:
f.write(file.read())
# Checking filetype for document parsing
mime_type = mimetypes.guess_type(file_path)[0]
loader = UnstructuredFileLoader(file_path)
docs = loader.load()
# Generate embeddings and store documents in Weaviate
embeddings = CohereEmbeddings(model="embed-multilingual-v2.0", cohere_api_key=cohere_api_key)
for doc in docs:
embedding = embeddings.embed([doc['text']])
weaviate_document = {
"text": doc['text'],
"embedding": embedding
}
client.data_object.create(data_object=weaviate_document, class_name=collection_name)
os.remove(file_path)
return {"message": f"Documents embedded in Weaviate collection '{collection_name}'"}
def retrieve_info(query):
llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
qa = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())
# Retrieve initial results
initial_results = qa({"query": query})
# Assuming initial_results are in the desired format, extract the top documents
top_docs = initial_results[:25] # Adjust this if your result format is different
# Rerank the top results
reranked_results = co.rerank(query=query, documents=top_docs, top_n=3, model='rerank-english-v2.0')
# Format the reranked results
formatted_results = []
for idx, r in enumerate(reranked_results):
formatted_result = {
"Document Rank": idx + 1,
"Document Index": r.index,
"Document": r.document['text'],
"Relevance Score": f"{r.relevance_score:.2f}"
}
formatted_results.append(formatted_result)
return {"results": formatted_results}
# Gradio interface
iface = gr.Interface(
fn=retrieve_info,
inputs=[
gr.inputs.Textbox(label="Query")
],
outputs="text",
allow_flagging="never"
)
# Embed PDF function
iface.add_endpoint(
fn=embed_pdf,
inputs=[
gr.File(label="PDF File", type="file"),
gr.Textbox(label="Collection Name")
],
outputs="text"
)
iface.launch()
|