Spaces:

optimum-intel
/

fastrag-e2e

Runtime error

App Files Files Community

IlyasMoutawwakil HF Staff commited on Jun 25, 2024

Commit

bfab57f

verified ·

1 Parent(s): fe31e4e

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -121

app.py CHANGED Viewed

@@ -1,50 +1,47 @@
-from haystack.document_stores.faiss import FAISSDocumentStore
-from haystack.nodes.retriever import EmbeddingRetriever
-from haystack.nodes.ranker import BaseRanker
-from haystack.pipelines import Pipeline
-from haystack.document_stores.base import BaseDocumentStore
-from haystack.schema import Document
-from typing import Optional, List
-from huggingface_hub import get_inference_endpoint
 from datasets import load_dataset
 from time import perf_counter
 import gradio as gr
-import numpy as np
 import requests
 import os
-TOP_K = 2
-BATCH_SIZE = 16
 HF_TOKEN = os.getenv("HF_TOKEN")
 RANKER_URL = os.getenv("RANKER_URL")
-RETRIEVER_URL = os.getenv("RETRIEVER_URL")
-RETRIEVER_IE = get_inference_endpoint(
-    "fastrag-retriever", namespace="optimum-intel", token=HF_TOKEN
 )
 RANKER_IE = get_inference_endpoint(
     "fastrag-ranker", namespace="optimum-intel", token=HF_TOKEN
 )
 def check_inference_endpoints():
-    RETRIEVER_IE.update()
     RANKER_IE.update()
     messages = []
-    if RETRIEVER_IE.status in ["initializing", "pending"]:
         messages += [
-            f"Retriever Inference Endpoint is {RETRIEVER_IE.status}. Please wait a few seconds and try again."
         ]
-    elif RETRIEVER_IE.status in ["paused", "scaledToZero"]:
         messages += [
-            f"Retriever Inference Endpoint is {RETRIEVER_IE.status}. Resuming it. Please wait a few seconds and try again."
         ]
-        RETRIEVER_IE.resume()
     if RANKER_IE.status in ["initializing", "pending"]:
         messages += [
@@ -62,7 +59,6 @@ def check_inference_endpoints():
         return None
 def post(url, payload):
     response = requests.post(
         url,
@@ -85,81 +81,47 @@ def method_timer(method):
     return timed
-class Retriever(EmbeddingRetriever):
-    def __init__(
-        self,
-        document_store: Optional[BaseDocumentStore] = None,
-        top_k: int = 10,
-        batch_size: int = 32,
-        scale_score: bool = True,
-    ):
-        self.document_store = document_store
-        self.top_k = top_k
-        self.batch_size = batch_size
-        self.scale_score = scale_score
-    @method_timer
-    def embed_queries(self, queries: List[str]) -> np.ndarray:
-        payload = {"queries": queries, "inputs": ""}
-        response = post(RETRIEVER_URL, payload)
         if "error" in response:
             raise gr.Error(response["error"])
-        arrays = np.array(response)
-        return arrays
-    @method_timer
-    def embed_documents(self, documents: List[Document]) -> np.ndarray:
         documents = [d.to_dict() for d in documents]
-        for doc in documents:
-            doc["embedding"] = None
         payload = {"documents": documents, "inputs": ""}
-        response = post(RETRIEVER_URL, payload)
         if "error" in response:
             raise gr.Error(response["error"])
-        arrays = np.array(response)
-        return arrays
-class Ranker(BaseRanker):
-    @method_timer
-    def predict(
-        self, query: str, documents: List[Document], top_k: Optional[int] = None
-    ) -> List[Document]:
-        documents = [d.to_dict() for d in documents]
-        for doc in documents:
-            doc["embedding"] = None
-        payload = {"query": query, "documents": documents, "top_k": top_k, "inputs": ""}
-        response = post(RANKER_URL, payload)
-        if "error" in response:
-            raise gr.Error(response["error"])
-        return [Document.from_dict(d) for d in response]
-    @method_timer
-    def predict_batch(
-        self,
-        queries: List[str],
-        documents: List[List[Document]],
-        batch_size: Optional[int] = None,
-        top_k: Optional[int] = None,
-    ) -> List[List[Document]]:
-        documents = [[d.to_dict() for d in docs] for docs in documents]
-        for docs in documents:
-            for doc in docs:
-                doc["embedding"] = None
         payload = {
-            "queries": queries,
             "documents": documents,
-            "batch_size": batch_size,
-            "top_k": top_k,
             "inputs": "",
         }
         response = post(RANKER_URL, payload)
@@ -167,49 +129,58 @@ class Ranker(BaseRanker):
         if "error" in response:
             raise gr.Error(response["error"])
-        return [[Document.from_dict(d) for d in docs] for docs in response]
-if (
-    os.path.exists("/data/faiss_document_store.db")
-    and os.path.exists("/data/faiss_index.json")
-    and os.path.exists("/data/faiss_index")
-):
-    document_store = FAISSDocumentStore.load("/data/faiss_index")
-    retriever = Retriever(
-        document_store=document_store, top_k=TOP_K, batch_size=BATCH_SIZE
-    )
-    document_store.save(index_path="/data/faiss_index")
-else:
-    for file in [
-        "/data/faiss_document_store.db",
-        "/data/faiss_index.json",
-        "/data/faiss_index",
-    ]:
-        try:
-            os.remove(file)
-        except FileNotFoundError:
-            pass
-    document_store = FAISSDocumentStore(
-        sql_url="sqlite:////data/faiss_document_store.db",
         return_embedding=True,
         embedding_dim=384,
     )
-    document_store.write_documents(
-        load_dataset("bilgeyucel/seven-wonders", split="train")
-    )
-    retriever = Retriever(
-        document_store=document_store, top_k=TOP_K, batch_size=BATCH_SIZE
-    )
-    document_store.update_embeddings(retriever=retriever)
-    document_store.save(index_path="/data/faiss_index")
-ranker = Ranker()
 pipe = Pipeline()
-pipe.add_node(component=retriever, name="Retriever", inputs=["Query"])
-pipe.add_node(component=ranker, name="Ranker", inputs=["Retriever"])
 def run(query: str) -> dict:
@@ -221,11 +192,12 @@ def run(query: str) -> dict:
         <p>{message}</p>
         """
-    pipe_output = pipe.run(query=query)
-    output = f"""<h2>Top {TOP_K} Documents</h2>"""
-    for i, doc in enumerate(pipe_output["documents"]):
         output += f"""
         <h3>Document {i + 1}</h3>
         <p><strong>ID:</strong> {doc.id}</p>
@@ -251,6 +223,7 @@ input_text = gr.components.Textbox(
 )
 output_html = gr.components.HTML(label="Documents")
 gr.Interface(
     fn=run,
     inputs=input_text,
@@ -259,9 +232,9 @@ gr.Interface(
     cache_examples=False,
     allow_flagging="never",
     title="End-to-End Retrieval & Ranking with Hugging Face Inference Endpoints and Spaces",
-    description="""## A [haystack](https://haystack.deepset.ai/) pipeline with the following components
-- <strong>Document Store</strong>: A [FAISS document store](https://github.com/facebookresearch/faiss/tree/main) containing the [`seven-wonders` dataset](https://huggingface.co/datasets/bilgeyucel/seven-wonders), created on this Space's [persistent storage](https://huggingface.co/docs/hub/en/spaces-storage).
-- <strong>Retriever</strong>: [Quantized FastRAG Retriever](https://huggingface.co/optimum-intel/fastrag-retriever) deployed on [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) + Intel Sapphire Rapids CPU.
 - <strong>Ranker</strong>: [Quantized FastRAG Retriever](https://huggingface.co/optimum-intel/fastrag-ranker) deployed on [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) + Intel Sapphire Rapids CPU.
 This Space is based on the optimizations demonstrated in the blog [CPU Optimized Embeddings with 🤗 Optimum Intel and fastRAG](https://huggingface.co/blog/intel-fast-embedding)

+from haystack import Document, Pipeline, component
+from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
+from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
+from typing import List
+from huggingface_hub import get_inference_endpoint, get_token
 from datasets import load_dataset
 from time import perf_counter
 import gradio as gr
+import shutil
 import requests
 import os
+RETRIEVER_TOP_K = 5
+RANKER_TOP_K = 2
 HF_TOKEN = os.getenv("HF_TOKEN")
 RANKER_URL = os.getenv("RANKER_URL")
+EMBEDDER_URL = os.getenv("EMBEDDER_URL")
+EMBEDDER_IE = get_inference_endpoint(
+    "fastrag-embedder", namespace="optimum-intel", token=HF_TOKEN
 )
 RANKER_IE = get_inference_endpoint(
     "fastrag-ranker", namespace="optimum-intel", token=HF_TOKEN
 )
 def check_inference_endpoints():
+    EMBEDDER_IE.update()
     RANKER_IE.update()
     messages = []
+    if EMBEDDER_IE.status in ["initializing", "pending"]:
         messages += [
+            f"Embedder Inference Endpoint is {EMBEDDER_IE.status}. Please wait a few seconds and try again."
         ]
+    elif EMBEDDER_IE.status in ["paused", "scaledToZero"]:
         messages += [
+            f"Embedder Inference Endpoint is {EMBEDDER_IE.status}. Resuming it. Please wait a few seconds and try again."
         ]
+        EMBEDDER_IE.resume()
     if RANKER_IE.status in ["initializing", "pending"]:
         messages += [
         return None
 def post(url, payload):
     response = requests.post(
         url,
     return timed
+@component
+class InferenceEndpointTextEmbedder:
+    @component.output_types(embedding=List[float])
+    def run(self, text: str):
+        payload = {"text": text, "inputs": ""}
+        response = post(EMBEDDER_URL, payload)
         if "error" in response:
             raise gr.Error(response["error"])
+        return {"embedding": response["embedding"]}
+@component
+class InferenceEndpointDocumentEmbedder:
+    @component.output_types(documents=List[Document])
+    def run(self, documents: List[Document]):
         documents = [d.to_dict() for d in documents]
         payload = {"documents": documents, "inputs": ""}
+        response = post(EMBEDDER_URL, payload)
         if "error" in response:
             raise gr.Error(response["error"])
+        return {"documents": [Document.from_dict(doc) for doc in response["documents"]]}
+@component
+class InferenceEndpointRanker:
+    def __init__(self, top_k: int):
+        self.top_k = top_k
+    @component.output_types(documents=List[Document])
+    def run(self, query: str, documents: List[Document]):
+        documents = [d.to_dict() for d in documents]
         payload = {
+            "query": query,
             "documents": documents,
+            "top_k": self.top_k,
             "inputs": "",
         }
         response = post(RANKER_URL, payload)
         if "error" in response:
             raise gr.Error(response["error"])
+        return {"documents": [Document.from_dict(doc) for doc in response["documents"]]}
+document_store = None
+if os.path.exists("data/qdrant"):
+    try:
+        document_store = QdrantDocumentStore(
+            path="./data/qdrant",
+            return_embedding=True,
+            recreate_index=False,
+            embedding_dim=384,
+        )
+    except Exception:
+        shutil.rmtree("data/qdrant", ignore_errors=True)
+if document_store is None:
+    shutil.rmtree("data/qdrant", ignore_errors=True)
+    document_store = QdrantDocumentStore(
+        path="./data/qdrant",
         return_embedding=True,
+        recreate_index=True,
         embedding_dim=384,
     )
+    dataset = load_dataset("bilgeyucel/seven-wonders")
+    documents = [Document(**doc) for doc in dataset["train"]]
+    documents_embedder = InferenceEndpointDocumentEmbedder()
+    documents_with_embedding = documents_embedder.run(documents)["documents"]
+    document_store.write_documents(documents_with_embedding)
+print(
+    "Number of embedded documents in DocumentStore:",
+    document_store.count_documents(),
+)
 pipe = Pipeline()
+embedder = InferenceEndpointTextEmbedder()
+ranker = InferenceEndpointRanker(top_k=RANKER_TOP_K)
+retriever = QdrantEmbeddingRetriever(
+    document_store=document_store, top_k=RETRIEVER_TOP_K
+)
+pipe.add_component("retriever", retriever)
+pipe.add_component("embedder", embedder)
+pipe.add_component("ranker", ranker)
+pipe.connect("retriever", "ranker.documents")
+pipe.connect("embedder", "retriever")
+print(pipe)
 def run(query: str) -> dict:
         <p>{message}</p>
         """
+    pipe_output = pipe.run({"embedder": {"text": query}, "ranker": {"query": query}})
+    output = """<h2>Top Ranked Documents</h2>"""
+    for i, doc in enumerate(pipe_output["ranker"]["documents"]):
+        # limit content to 100 characters
         output += f"""
         <h3>Document {i + 1}</h3>
         <p><strong>ID:</strong> {doc.id}</p>
 )
 output_html = gr.components.HTML(label="Documents")
 gr.Interface(
     fn=run,
     inputs=input_text,
     cache_examples=False,
     allow_flagging="never",
     title="End-to-End Retrieval & Ranking with Hugging Face Inference Endpoints and Spaces",
+    description="""## A [haystack](https://haystack.deepset.ai/) V2 pipeline with the following components
+- <strong>Document Store</strong>: A [Qdrant document store](https://github.com/qdrant/qdrant) containing the [`seven-wonders` dataset](https://huggingface.co/datasets/bilgeyucel/seven-wonders), created on this Space's [persistent storage](https://huggingface.co/docs/hub/en/spaces-storage).
+- <strong>Embedder</strong>: [Quantized FastRAG Embedder](https://huggingface.co/optimum-intel/fastrag-embedder) deployed on [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) + Intel Sapphire Rapids CPU.
 - <strong>Ranker</strong>: [Quantized FastRAG Retriever](https://huggingface.co/optimum-intel/fastrag-ranker) deployed on [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) + Intel Sapphire Rapids CPU.
 This Space is based on the optimizations demonstrated in the blog [CPU Optimized Embeddings with 🤗 Optimum Intel and fastRAG](https://huggingface.co/blog/intel-fast-embedding)