File size: 1,752 Bytes
426db28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import logging

import datasets
from findkit import indexes


import gradio as gr

logging.basicConfig(level="INFO")


def get_html_retrieval_results(retrieval_result, show_only_one_match_per_episode):
    if show_only_one_match_per_episode:
        retrieval_result = retrieval_result.drop_duplicates(subset=["episode"])
    if len(retrieval_result) > 0:
        retrieval_result_html = retrieval_result.to_html(render_links=True, index=False)
        return retrieval_result_html
    else:
        return ""


def get_retrieval_results(findkit_index, query, n_retrieved_results):
    retrieval_results_df = findkit_index.find_similar(query, n_retrieved_results)
    return retrieval_results_df.rename({"distance": "bm25_score"})


def setup_df():
    podcast_dataset = datasets.load_dataset("lambdaofgod/lex_fridman_podcast")["train"]
    df = podcast_dataset.to_pandas()
    return df.dropna()


def setup_index():
    df = setup_df()
    return indexes.InMemoryBM25Index.build(df["text"], df)


findkit_index = setup_index()


def show_retrieval_results(query, n_retrieved_results, show_only_one_match_per_episode):
    retrieval_results_df = get_retrieval_results(
        findkit_index, query, n_retrieved_results
    )
    return get_html_retrieval_results(
        retrieval_results_df, show_only_one_match_per_episode
    )


show_only_one_match_per_episode = gr.Checkbox(
    label="show only one match per episode", value=False
)
n_retrieved_results = gr.Number(label="number of results", value=10, precision=0)
query = gr.Textbox(label="input query", value="artificial life")

demo = gr.Interface(
    fn=show_retrieval_results,
    inputs=[query, n_retrieved_results, show_only_one_match_per_episode],
    outputs="html",
)

demo.launch()