import logging import os import gradio as gr import pandas as pd from huggingface_hub import hf_hub_download from buster.apps.bot_configs import available_configs from buster.busterbot import Buster, BusterConfig from buster.retriever import Retriever from buster.utils import get_retriever_from_extension logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) DEFAULT_CONFIG = "huggingface" # DOWNLOAD FROM HF HUB HUB_TOKEN = os.getenv("HUB_TOKEN") REPO_ID = "jerpint/buster-data" HUB_DB_FILE = "documents.db" logger.info(f"Downloading {HUB_DB_FILE} from hub...") hf_hub_download( repo_id=REPO_ID, repo_type="dataset", filename=HUB_DB_FILE, token=HUB_TOKEN, local_dir=".", ) logger.info(f"Downloaded.") retriever: Retriever = get_retriever_from_extension(HUB_DB_FILE)(HUB_DB_FILE) # initialize buster with the default config... default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG) buster = Buster(cfg=default_cfg, retriever=retriever) def format_sources(matched_documents: pd.DataFrame) -> str: if len(matched_documents) == 0: return "" sourced_answer_template: str = ( """📝 Here are the sources I used to answer your question:
""" """{sources}

""" """{footnote}""" ) source_template: str = """[🔗 {source.title}]({source.url}), relevance: {source.similarity:2.1f} %""" matched_documents.similarity = matched_documents.similarity * 100 sources = "
".join([source_template.format(source=source) for _, source in matched_documents.iterrows()]) footnote: str = "I'm a bot 🤖 and not always perfect." return sourced_answer_template.format(sources=sources, footnote=footnote) def chat(question, history, bot_source): history = history or [] cfg = available_configs.get(bot_source) buster.update_cfg(cfg) response = buster.process_input(question) # formatted_sources = source_formatter(sources) matched_documents = response.matched_documents formatted_sources = format_sources(matched_documents) formatted_response = f"{response.completion.text}

" + formatted_sources history.append((question, formatted_response)) return history, history block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}") with block: with gr.Row(): gr.Markdown("

Buster 🤖: A Question-Answering Bot for open-source libraries

") doc_source = gr.Dropdown( choices=sorted(list(available_configs.keys())), value=DEFAULT_CONFIG, interactive=True, multiselect=False, label="Source of Documentation", info="The source of documentation to select from", ) chatbot = gr.Chatbot() with gr.Row(): message = gr.Textbox( label="What's your question?", placeholder="What kind of model should I use for sentiment analysis?", lines=1, ) submit = gr.Button(value="Send", variant="secondary").style(full_width=False) examples = gr.Examples( # TODO: seems not possible (for now) to update examples on change... examples=[ "What kind of models should I use for images and text?", "When should I finetune a model vs. training it form scratch?", "Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?", ], inputs=message, ) gr.Markdown( """This simple application uses GPT to search the huggingface 🤗 transformers docs and answer questions. For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index).""" ) gr.HTML("️
Created with ❤️ by @jerpint and @hadrienbertrand") state = gr.State() agent_state = gr.State() submit.click(chat, inputs=[message, state, doc_source], outputs=[chatbot, state]) message.submit(chat, inputs=[message, state, doc_source], outputs=[chatbot, state]) block.launch(debug=True)