Spaces:

vespa-engine
/

colpali-vespa-visual-retrieval

Running on L4

App Files Files Community

thomasht86 commited on Oct 11, 2024

Commit

4775a9f

verified ·

1 Parent(s): 8e4fbd2

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

backend/colpali.py +29 -16
colpali-with-snippets/schemas/pdf_page.sd +30 -9
frontend/app.py +15 -7
globals.css +5 -0
main.py +34 -11
output.css +9 -0

backend/colpali.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import torch
 from PIL import Image
 import numpy as np
-from typing import cast
 from pathlib import Path
 import base64
 from io import BytesIO
@@ -119,7 +119,7 @@ def gen_similarity_maps(
     token_idx_map: dict,
     images: List[Union[Path, str]],
     vespa_sim_maps: List[str],
-) -> List[Dict[str, str]]:
     """
     Generate similarity maps for the given images and query, and return base64-encoded blended images.
@@ -134,8 +134,9 @@ def gen_similarity_maps(
         images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
         vespa_sim_maps (List[str]): List of Vespa similarity maps.
-    Returns:
-        List[Dict[str, str]]: A list where each item is a dictionary mapping tokens to base64-encoded blended images.
     """
     start = time.perf_counter()
@@ -302,11 +303,7 @@ def gen_similarity_maps(
             # Store the base64-encoded image
             result_per_image[token] = blended_img_base64
-        results.append(result_per_image)
-    end3 = time.perf_counter()
-    print(f"Collecting blended images took: {end3 - start3} s")
-    print(f"Total heatmap generation took: {end3 - start} s")
-    return results
 def get_query_embeddings_and_token_map(
@@ -369,23 +366,32 @@ async def query_vespa_default(
 async def query_vespa_bm25(
     app: Vespa,
     query: str,
     hits: int = 3,
     timeout: str = "10s",
     **kwargs,
 ) -> dict:
     async with app.asyncio(connections=1, total_timeout=120) as session:
         response: VespaQueryResponse = await session.query(
             body={
-                "yql": "select id,title,url,full_image,page_number,snippet,text from pdf_page where userQuery();",
                 "ranking": "bm25",
                 "query": query,
                 "timeout": timeout,
                 "hits": hits,
                 "presentation.timing": True,
                 **kwargs,
             },
         )
         assert response.is_successful(), response.json
     return format_query_results(query, response)
@@ -451,7 +457,7 @@ async def query_vespa_nearest_neighbor(
                 **query_tensors,
                 "presentation.timing": True,
                 # if we use rank({nn_string}, userQuery()), dynamic summary doesn't work, see https://github.com/vespa-engine/vespa/issues/28704
-                "yql": f"select id,title,snippet,text,url,full_image,page_number from pdf_page where {nn_string} or userQuery()",
                 "ranking.profile": "retrieval-and-rerank",
                 "timeout": timeout,
                 "hits": hits,
@@ -489,7 +495,7 @@ async def get_result_from_query(
     elif ranking == "bm25+colpali":
         result = await query_vespa_default(app, query, q_embs)
     elif ranking == "bm25":
-        result = await query_vespa_bm25(app, query)
     else:
         raise ValueError(f"Unsupported ranking: {ranking}")
     # Print score, title id, and text of the results
@@ -509,6 +515,8 @@ def add_sim_maps_to_result(
     query: str,
     q_embs: Any,
     token_to_idx: Dict[str, int],
 ) -> Dict[str, Any]:
     vit_config = load_vit_config(model)
     imgs: List[str] = []
@@ -520,7 +528,7 @@ def add_sim_maps_to_result(
         vespa_sim_map = single_result["fields"].get("summaryfeatures", None)
         if vespa_sim_map:
             vespa_sim_maps.append(vespa_sim_map)
-    sim_map_imgs = gen_similarity_maps(
         model=model,
         processor=processor,
         device=model.device,
@@ -531,9 +539,14 @@ def add_sim_maps_to_result(
         images=imgs,
         vespa_sim_maps=vespa_sim_maps,
     )
-    for single_result, sim_map_dict in zip(result["root"]["children"], sim_map_imgs):
-        for token, sim_mapb64 in sim_map_dict.items():
-            single_result["fields"][f"sim_map_{token}"] = sim_mapb64
     return result

 import torch
 from PIL import Image
 import numpy as np
+from typing import cast, Generator
 from pathlib import Path
 import base64
 from io import BytesIO
     token_idx_map: dict,
     images: List[Union[Path, str]],
     vespa_sim_maps: List[str],
+) -> Generator[Tuple[int, str, str], None, None]:
     """
     Generate similarity maps for the given images and query, and return base64-encoded blended images.
         images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
         vespa_sim_maps (List[str]): List of Vespa similarity maps.
+    Yields:
+        Tuple[int, str, str]: A tuple containing the image index, the selected token, and the base64-encoded image.
     """
     start = time.perf_counter()
             # Store the base64-encoded image
             result_per_image[token] = blended_img_base64
+            yield idx, token, blended_img_base64
 def get_query_embeddings_and_token_map(
 async def query_vespa_bm25(
     app: Vespa,
     query: str,
+    q_emb: torch.Tensor,
     hits: int = 3,
     timeout: str = "10s",
     **kwargs,
 ) -> dict:
     async with app.asyncio(connections=1, total_timeout=120) as session:
+        query_embedding = format_q_embs(q_emb)
+        start = time.perf_counter()
         response: VespaQueryResponse = await session.query(
             body={
+                "yql": "select id,title,url,full_image,page_number,snippet,text,summaryfeatures from pdf_page where userQuery();",
                 "ranking": "bm25",
                 "query": query,
                 "timeout": timeout,
                 "hits": hits,
+                "input.query(qt)": query_embedding,
                 "presentation.timing": True,
                 **kwargs,
             },
         )
         assert response.is_successful(), response.json
+        stop = time.perf_counter()
+        print(
+            f"Query time + data transfer took: {stop - start} s, vespa said searchtime was {response.json.get('timing', {}).get('searchtime', -1)} s"
+        )
     return format_query_results(query, response)
                 **query_tensors,
                 "presentation.timing": True,
                 # if we use rank({nn_string}, userQuery()), dynamic summary doesn't work, see https://github.com/vespa-engine/vespa/issues/28704
+                "yql": f"select id,title,snippet,text,url,full_image,page_number,summaryfeatures from pdf_page where {nn_string} or userQuery()",
                 "ranking.profile": "retrieval-and-rerank",
                 "timeout": timeout,
                 "hits": hits,
     elif ranking == "bm25+colpali":
         result = await query_vespa_default(app, query, q_embs)
     elif ranking == "bm25":
+        result = await query_vespa_bm25(app, query, q_embs)
     else:
         raise ValueError(f"Unsupported ranking: {ranking}")
     # Print score, title id, and text of the results
     query: str,
     q_embs: Any,
     token_to_idx: Dict[str, int],
+    query_id: str,
+    result_cache,
 ) -> Dict[str, Any]:
     vit_config = load_vit_config(model)
     imgs: List[str] = []
         vespa_sim_map = single_result["fields"].get("summaryfeatures", None)
         if vespa_sim_map:
             vespa_sim_maps.append(vespa_sim_map)
+    sim_map_imgs_generator = gen_similarity_maps(
         model=model,
         processor=processor,
         device=model.device,
         images=imgs,
         vespa_sim_maps=vespa_sim_maps,
     )
+    for img_idx, token, sim_mapb64 in sim_map_imgs_generator:
+        print(f"Created sim map for image {img_idx} and token {token}")
+        result["root"]["children"][img_idx]["fields"][f"sim_map_{token}"] = sim_mapb64
+        # Update result_cache with the new sim_map
+        result_cache.set(query_id, result)
+    # for single_result, sim_map_dict in zip(result["root"]["children"], sim_map_imgs):
+    #     for token, sim_mapb64 in sim_map_dict.items():
+    #         single_result["fields"][f"sim_map_{token}"] = sim_mapb64
     return result

colpali-with-snippets/schemas/pdf_page.sd CHANGED Viewed

@@ -67,15 +67,28 @@ schema pdf_page {
     }
     rank-profile bm25 {
         first-phase {
             expression: bm25(title) + bm25(text)
         }
     }
     rank-profile default {
         inputs {
-            query(qt) tensor<float>(querytoken{}, v[128])
         }
         function max_sim() {
             expression {
@@ -92,13 +105,6 @@ schema pdf_page {
             }
         }
-        function similarities() {
-            expression  {
-                            sum(
-                                query(qt) * unpack_bits(attribute(embedding)), v
-                            )
-            }
-        }
         function bm25_score() {
             expression {
                 bm25(title) + bm25(text)
@@ -115,6 +121,13 @@ schema pdf_page {
                 max_sim
             }
         }
         summary-features: similarities
     }
     rank-profile retrieval-and-rerank {
@@ -229,5 +242,13 @@ schema pdf_page {
                 max_sim
             }
         }
     }
 }

     }
     rank-profile bm25 {
+        inputs {
+            query(qt) tensor<float>(querytoken{}, v[128]) # only used here to generate image similarity map
+        }
         first-phase {
             expression: bm25(title) + bm25(text)
         }
+    function similarities() {
+            expression  {
+                            sum(
+                                query(qt) * unpack_bits(attribute(embedding)), v
+                            )
+            }
+        }
+        summary-features: similarities
     }
     rank-profile default {
         inputs {
+            query(qt) tensor<float>(querytoken{}, v[128])
         }
         function max_sim() {
             expression {
             }
         }
         function bm25_score() {
             expression {
                 bm25(title) + bm25(text)
                 max_sim
             }
         }
+        function similarities() {
+            expression  {
+                            sum(
+                                query(qt) * unpack_bits(attribute(embedding)), v
+                            )
+            }
+        }
         summary-features: similarities
     }
     rank-profile retrieval-and-rerank {
                 max_sim
             }
         }
+        function similarities() {
+            expression  {
+                            sum(
+                                query(qt) * unpack_bits(attribute(embedding)), v
+                            )
+            }
+        }
+        summary-features: similarities
     }
 }

frontend/app.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from urllib.parse import quote_plus
 from typing import Optional
-from fasthtml.components import H1, H2, Div, Form, Img, P, Span, NotStr
 from fasthtml.xtend import A, Script
 from lucide_fasthtml import Lucide
 from shad4fast import Badge, Button, Input, Label, RadioGroup, RadioGroupItem
@@ -275,14 +275,22 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                         H2(fields["title"], cls="text-xl font-semibold"),
                         P(
                             "Page " + str(fields["page_number"]),
-                            cls="text-muted-foreground",
                         ),
                         P(
-                            "Relevance score: " + str(result["relevance"]),
-                            cls="text-muted-foreground",
                         ),
-                        P(NotStr(fields["snippet"]), cls="text-muted-foreground"),
-                        P(NotStr(fields["text"]), cls="text-muted-foreground"),
                         cls="text-sm grid gap-y-4",
                     ),
                     cls="bg-background px-3 py-5 hidden md:block",

 from typing import Optional
+from urllib.parse import quote_plus
+from fasthtml.components import H1, H2, Div, Form, Img, NotStr, P, Span
 from fasthtml.xtend import A, Script
 from lucide_fasthtml import Lucide
 from shad4fast import Badge, Button, Input, Label, RadioGroup, RadioGroupItem
                         H2(fields["title"], cls="text-xl font-semibold"),
                         P(
                             "Page " + str(fields["page_number"]),
+                            cls="text-foreground font-mono bold",
+                        ),
+                        Div(
+                            Badge(
+                                "Relevance score: " + str(result["relevance"]),
+                                cls="flex gap-1.5 items-center justify-center",
+                            ),
+                        ),
+                        P(
+                            NotStr(fields.get("snippet", "")),
+                            cls="text-highlight text-muted-foreground",
                         ),
                         P(
+                            NotStr(fields.get("text", "")),
+                            cls="text-highlight text-muted-foreground",
                         ),
                         cls="text-sm grid gap-y-4",
                     ),
                     cls="bg-background px-3 py-5 hidden md:block",

globals.css CHANGED Viewed

@@ -165,6 +165,11 @@
     }
 }
 .tokens-button {
     background-color: #B7E2F1;
     color: #2E2F27;

     }
 }
+.text-highlight strong {
+    background-color: #61D790;
+    color: #2E2F27;
+}
 .tokens-button {
     background-color: #B7E2F1;
     color: #2E2F27;

main.py CHANGED Viewed

@@ -40,6 +40,9 @@ app, rt = fast_app(
 vespa_app: Vespa = get_vespa_app()
 result_cache = LRUCache(max_size=20)  # Each result can be ~10MB
 thread_pool = ThreadPoolExecutor()
@@ -97,7 +100,17 @@ async def get(request, query: str, nn: bool = True):
     )
     # Generate a unique query_id based on the query and ranking value
     query_id = generate_query_id(query + ranking_value)
     # Fetch model and processor
     manager = ModelManager.get_instance()
     model = manager.model
@@ -116,19 +129,26 @@ async def get(request, query: str, nn: bool = True):
         ranking=ranking_value,
     )
     end = time.perf_counter()
-    print(f"Search results fetched in {end - start:.2f} seconds, Vespa says searchtime was {result['timing']['searchtime']} seconds")
     # Start generating the similarity map in the background
     asyncio.create_task(
         generate_similarity_map(
             model, processor, query, q_embs, token_to_idx, result, query_id
         )
     )
     search_results = (
         result["root"]["children"]
         if "root" in result and "children" in result["root"]
         else []
     )
-    return SearchResult(search_results, query_id)
 async def generate_similarity_map(
@@ -143,22 +163,25 @@ async def generate_similarity_map(
         query=query,
         q_embs=q_embs,
         token_to_idx=token_to_idx,
     )
     sim_map_result = await loop.run_in_executor(thread_pool, sim_map_task)
     result_cache.set(query_id, sim_map_result)
 @app.get("/updated_search_results")
 async def updated_search_results(query_id: str):
-    data = result_cache.get(query_id)
-    if data is None:
         return HTMLResponse(status_code=204)
-    search_results = (
-        data["root"]["children"]
-        if "root" in data and "children" in data["root"]
-        else []
-    )
-    updated_content = SearchResult(results=search_results, query_id=None)
     return updated_content

 vespa_app: Vespa = get_vespa_app()
 result_cache = LRUCache(max_size=20)  # Each result can be ~10MB
+task_cache = LRUCache(
+    max_size=1000
+)  # Map from query_id to boolean value - False if not all results are ready.
 thread_pool = ThreadPoolExecutor()
     )
     # Generate a unique query_id based on the query and ranking value
     query_id = generate_query_id(query + ranking_value)
+    # See if results are already in cache
+    if result_cache.get(query_id):
+        print(f"Results for query_id {query_id} already in cache")
+        result = result_cache.get(query_id)
+        search_results = get_results_children(result)
+        # If task is completed, return the results, but no query_id
+        if task_cache.get(query_id):
+            return SearchResult(search_results, None)
+        # If task is not completed, return the results with query_id
+        return SearchResult(search_results, query_id)
+    task_cache.set(query_id, False)
     # Fetch model and processor
     manager = ModelManager.get_instance()
     model = manager.model
         ranking=ranking_value,
     )
     end = time.perf_counter()
+    print(
+        f"Search results fetched in {end - start:.2f} seconds, Vespa says searchtime was {result['timing']['searchtime']} seconds"
+    )
     # Start generating the similarity map in the background
     asyncio.create_task(
         generate_similarity_map(
             model, processor, query, q_embs, token_to_idx, result, query_id
         )
     )
+    search_results = get_results_children(result)
+    return SearchResult(search_results, query_id)
+def get_results_children(result):
     search_results = (
         result["root"]["children"]
         if "root" in result and "children" in result["root"]
         else []
     )
+    return search_results
 async def generate_similarity_map(
         query=query,
         q_embs=q_embs,
         token_to_idx=token_to_idx,
+        query_id=query_id,
+        result_cache=result_cache,
     )
     sim_map_result = await loop.run_in_executor(thread_pool, sim_map_task)
     result_cache.set(query_id, sim_map_result)
+    task_cache.set(query_id, True)
 @app.get("/updated_search_results")
 async def updated_search_results(query_id: str):
+    result = result_cache.get(query_id)
+    if result is None:
         return HTMLResponse(status_code=204)
+    search_results = get_results_children(result)
+    # Check if task is completed - Stop polling if it is
+    if task_cache.get(query_id):
+        updated_content = SearchResult(results=search_results, query_id=None)
+    else:
+        updated_content = SearchResult(results=search_results, query_id=query_id)
     return updated_content

output.css CHANGED Viewed

@@ -1117,6 +1117,10 @@ body {
   justify-items: center;
 }
 .gap-2 {
   gap: 0.5rem;
 }
@@ -1949,6 +1953,11 @@ body {
   }
 }
 .tokens-button {
   background-color: #B7E2F1;
   color: #2E2F27;

   justify-items: center;
 }
+.gap-1\.5 {
+  gap: 0.375rem;
+}
 .gap-2 {
   gap: 0.5rem;
 }
   }
 }
+.text-highlight strong {
+  background-color: #61D790;
+  color: #2E2F27;
+}
 .tokens-button {
   background-color: #B7E2F1;
   color: #2E2F27;