Spaces:
Paused
Paused
retriever
Browse files- app.py +6 -0
- retriever.py +11 -0
app.py
CHANGED
|
@@ -3,9 +3,11 @@ from huggingface_hub import InferenceClient, login
|
|
| 3 |
import random
|
| 4 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
|
| 5 |
from langchain.schema import AIMessage, HumanMessage
|
|
|
|
| 6 |
import os
|
| 7 |
import datasets
|
| 8 |
from langchain.docstore.document import Document
|
|
|
|
| 9 |
|
| 10 |
login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
|
| 11 |
|
|
@@ -26,6 +28,10 @@ docs = [
|
|
| 26 |
for guest in guest_dataset
|
| 27 |
]
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
llm = HuggingFaceEndpoint(
|
| 30 |
repo_id="HuggingFaceH4/zephyr-7b-beta",
|
| 31 |
task="text-generation",
|
|
|
|
| 3 |
import random
|
| 4 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
|
| 5 |
from langchain.schema import AIMessage, HumanMessage
|
| 6 |
+
from langchain.tools import Tool
|
| 7 |
import os
|
| 8 |
import datasets
|
| 9 |
from langchain.docstore.document import Document
|
| 10 |
+
from retriever import extract_text
|
| 11 |
|
| 12 |
login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
|
| 13 |
|
|
|
|
| 28 |
for guest in guest_dataset
|
| 29 |
]
|
| 30 |
|
| 31 |
+
|
| 32 |
+
bm25_retriever = BM25Retriever.from_documents(docs)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
llm = HuggingFaceEndpoint(
|
| 36 |
repo_id="HuggingFaceH4/zephyr-7b-beta",
|
| 37 |
task="text-generation",
|
retriever.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.retrievers import BM25Retriever
|
| 2 |
+
from langchain.tools import Tool
|
| 3 |
+
|
| 4 |
+
def extract_text(query: str) -> str:
|
| 5 |
+
"""Retrieves detailed information about gala guests based on their name or relation."""
|
| 6 |
+
results = bm25_retriever.invoke(query)
|
| 7 |
+
if results:
|
| 8 |
+
return "\n\n".join([doc.page_content for doc in results[:3]])
|
| 9 |
+
else:
|
| 10 |
+
return "No matching guest information found."
|
| 11 |
+
|