from langchain_community.retrievers import BM25Retriever import datasets from langchain.docstore.document import Document class GuestInfoRetriever: """A class to retrieve information about gala guests.""" def __init__(self, docs): self.docs = docs self.dataset = BM25Retriever.from_documents(docs) def retrieve(self, query: str): """Retrieves detailed information about gala guests based on their name or relation.""" results = self.dataset.invoke(query) if results: return "\n\n".join([doc.page_content for doc in results[:3]]) else: return "No matching guest information found." # Load the dataset def load_guest_dataset(): guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train") # Convert dataset entries into Document objects docs = [ Document( page_content="\n".join([ f"Name: {guest['name']}", f"Relation: {guest['relation']}", f"Description: {guest['description']}", f"Email: {guest['email']}" ]), metadata={"name": guest["name"]} ) for guest in guest_dataset ] return GuestInfoRetriever(docs)