|
import gradio as gr |
|
from typing import TypedDict |
|
import pandas as pd |
|
|
|
class Hit(TypedDict): |
|
cid: str |
|
score: float |
|
text: str |
|
|
|
demo: Optional[gr.Interface] = None |
|
return_type = List[Hit] |
|
|
|
|
|
def retrieve(query: str, topk: int=10) -> return_type: |
|
ranking = bm25_retriever.retrieve(query=query, topk=3) |
|
hits = [] |
|
for cid, score in ranking.items(): |
|
text = bm25_retriever.index.doc_texts[bm25_retriever.index.cid2docid[cid]] |
|
hits.append({"cid": cid, "score": score, "text": text}) |
|
return hits |
|
|
|
demo = gr.Interface( |
|
fn=retrieve, |
|
inputs=gr.Textbox(lines=3, placeholder="Enter your query here..."), |
|
outputs="json", |
|
title="CSC BM25 Retriever", |
|
description="Retrieve documents based on the query using CSC BM25 Retriever", |
|
examples=[ |
|
["What are the differences between immunodeficiency and autoimmune diseases?"], |
|
["What are the causes of immunodeficiency?"], |
|
["What are the symptoms of immunodeficiency?"], |
|
] |
|
) |
|
|
|
demo.launch() |