jerpint commited on
Commit
444aa3f
Β·
1 Parent(s): 6a4ac5a

update apps to work with latest refactor

Browse files
buster/apps/gradio_app.py CHANGED
@@ -1,35 +1,73 @@
1
  import os
2
- import pathlib
3
 
4
  import gradio as gr
 
 
5
 
6
  from buster.apps.bot_configs import available_configs
7
  from buster.busterbot import Buster, BusterConfig
8
  from buster.retriever import Retriever
9
  from buster.utils import download_db, get_retriever_from_extension
10
 
 
 
 
11
  DEFAULT_CONFIG = "huggingface"
12
- DB_URL = "https://huggingface.co/datasets/jerpint/buster-data/resolve/main/documents.db"
13
 
14
- # Download the db...
15
- documents_filepath = download_db(db_url=DB_URL, output_dir="./data")
16
- retriever: Retriever = get_retriever_from_extension(documents_filepath)(documents_filepath)
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # initialize buster with the default config...
19
  default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG)
20
  buster = Buster(cfg=default_cfg, retriever=retriever)
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def chat(question, history, bot_source):
24
  history = history or []
25
  cfg = available_configs.get(bot_source)
26
  buster.update_cfg(cfg)
27
- answer = buster.process_input(question)
28
 
29
  # formatting hack for code blocks to render properly every time
30
- answer = answer.replace("```", "\n```\n")
 
 
 
 
 
 
 
 
 
31
 
32
- history.append((question, answer))
33
  return history, history
34
 
35
 
 
1
  import os
2
+ import logging
3
 
4
  import gradio as gr
5
+ from huggingface_hub import hf_hub_download
6
+ import pandas as pd
7
 
8
  from buster.apps.bot_configs import available_configs
9
  from buster.busterbot import Buster, BusterConfig
10
  from buster.retriever import Retriever
11
  from buster.utils import download_db, get_retriever_from_extension
12
 
13
+ logger = logging.getLogger(__name__)
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
  DEFAULT_CONFIG = "huggingface"
 
17
 
18
+ # DOWNLOAD FROM HF HUB
19
+ HUB_TOKEN = os.getenv("HUB_TOKEN")
20
+ REPO_ID = "jerpint/buster-data"
21
+ HUB_DB_FILE = "documents.db"
22
+ logger.info(f"Downloading {HUB_DB_FILE} from hub...")
23
+ hf_hub_download(
24
+ repo_id=REPO_ID,
25
+ repo_type="dataset",
26
+ filename=HUB_DB_FILE,
27
+ token=HUB_TOKEN,
28
+ local_dir=".",
29
+ )
30
+ logger.info(f"Downloaded.")
31
+ retriever: Retriever = get_retriever_from_extension(HUB_DB_FILE)(HUB_DB_FILE)
32
 
33
  # initialize buster with the default config...
34
  default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG)
35
  buster = Buster(cfg=default_cfg, retriever=retriever)
36
 
37
 
38
+ def format_sources(matched_documents: pd.DataFrame) -> str:
39
+ if len(matched_documents) == 0:
40
+ return ""
41
+
42
+ sourced_answer_template: str = (
43
+ """πŸ“ Here are the sources I used to answer your question:<br>""" """{sources}<br><br>""" """{footnote}"""
44
+ )
45
+ source_template: str = """[πŸ”— {source.title}]({source.url}), relevance: {source.similarity:2.1f} %"""
46
+
47
+ matched_documents.similarity = matched_documents.similarity * 100
48
+ sources = "<br>".join([source_template.format(source=source) for _, source in matched_documents.iterrows()])
49
+ footnote: str = "I'm a bot πŸ€– and not always perfect."
50
+
51
+ return sourced_answer_template.format(sources=sources, footnote=footnote)
52
+
53
+
54
  def chat(question, history, bot_source):
55
  history = history or []
56
  cfg = available_configs.get(bot_source)
57
  buster.update_cfg(cfg)
 
58
 
59
  # formatting hack for code blocks to render properly every time
60
+ # answer = answer.replace("```", "\n```\n")
61
+ response = buster.process_input(question)
62
+
63
+ # formatted_sources = source_formatter(sources)
64
+ matched_documents = response.matched_documents
65
+
66
+ formatted_sources = format_sources(matched_documents)
67
+ formatted_response = f"{response.completion.text}<br><br>" + formatted_sources
68
+
69
+ history.append((question, formatted_response))
70
 
 
71
  return history, history
72
 
73
 
buster/examples/gradio_app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import cfg
2
  import gradio as gr
3
 
@@ -10,14 +11,34 @@ retriever: Retriever = get_retriever_from_extension(cfg.documents_filepath)(cfg.
10
  buster: Buster = Buster(cfg=cfg.buster_cfg, retriever=retriever)
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def chat(question, history):
14
  history = history or []
15
- answer = buster.process_input(question)
 
 
 
 
 
 
16
 
17
- # formatting hack for code blocks to render properly every time
18
- answer = answer.replace("```", "\n```\n")
19
 
20
- history.append((question, answer))
21
  return history, history
22
 
23
 
 
1
+ import pandas as pd
2
  import cfg
3
  import gradio as gr
4
 
 
11
  buster: Buster = Buster(cfg=cfg.buster_cfg, retriever=retriever)
12
 
13
 
14
+ def format_sources(matched_documents: pd.DataFrame) -> str:
15
+ if len(matched_documents) == 0:
16
+ return ""
17
+
18
+ sourced_answer_template: str = (
19
+ """πŸ“ Here are the sources I used to answer your question:<br>""" """{sources}<br><br>""" """{footnote}"""
20
+ )
21
+ source_template: str = """[πŸ”— {source.title}]({source.url}), relevance: {source.similarity:2.1f} %"""
22
+
23
+ matched_documents.similarity = matched_documents.similarity * 100
24
+ sources = "<br>".join([source_template.format(source=source) for _, source in matched_documents.iterrows()])
25
+ footnote: str = "I'm a bot πŸ€– and not always perfect."
26
+
27
+ return sourced_answer_template.format(sources=sources, footnote=footnote)
28
+
29
+
30
  def chat(question, history):
31
  history = history or []
32
+ response = buster.process_input(question)
33
+
34
+ # formatted_sources = source_formatter(sources)
35
+ matched_documents = response.matched_documents
36
+
37
+ formatted_sources = format_sources(matched_documents)
38
+ formatted_response = f"{response.completion.text}<br><br>" + formatted_sources
39
 
40
+ history.append((question, formatted_response))
 
41
 
 
42
  return history, history
43
 
44