from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration import torch import gradio as gr import os import csv from gradio import inputs, outputs from datetime import datetime import fastapi from typing import List, Dict import httpx import pandas as pd import datasets as ds UseMemory=True HF_TOKEN=os.environ.get("HF_TOKEN") def SaveResult(text, outputfileName): basedir = os.path.dirname(__file__) savePath = outputfileName print("Saving: " + text + " to " + savePath) from os.path import exists file_exists = exists(savePath) if file_exists: with open(outputfileName, "a") as f: #append f.write(str(text.replace("\n"," "))) f.write('\n') else: with open(outputfileName, "w") as f: #write f.write(str(text.replace("\n"," "))) f.write('\n') return def store_message(name: str, message: str, outputfileName: str): basedir = os.path.dirname(__file__) savePath = outputfileName if name and message: with open(savePath, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ]) writer.writerow( {"time": str(datetime.now()), "message": message.strip(), "name": name.strip() } ) df = pd.read_csv(savePath) df = df.sort_values(df.columns[0],ascending=False) return df mname = "facebook/blenderbot-400M-distill" model = BlenderbotForConditionalGeneration.from_pretrained(mname) tokenizer = BlenderbotTokenizer.from_pretrained(mname) def take_last_tokens(inputs, note_history, history): if inputs['input_ids'].shape[1] > 128: inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()]) inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()]) note_history = [' '.join(note_history[0].split(' ')[2:])] history = history[1:] return inputs, note_history, history def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay. note_history.append(note) note_history = ' '.join(note_history) return [note_history] title = "💬ChatBack🧠💾" description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions. Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """ def chat(message, history): history = history or [] if history: history_useful = [' '.join([str(a[0])+' '+str(a[1]) for a in history])] else: history_useful = [] history_useful = add_note_to_history(message, history_useful) inputs = tokenizer(history_useful, return_tensors="pt") inputs, history_useful, history = take_last_tokens(inputs, history_useful, history) reply_ids = model.generate(**inputs) response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0] history_useful = add_note_to_history(response, history_useful) list_history = history_useful[0].split(' ') history.append((list_history[-2], list_history[-1])) df=pd.DataFrame() if UseMemory: outputfileName = 'File.csv' df = store_message(message, response, outputfileName) # Save to dataset basedir = os.path.dirname(__file__) savePath = outputfileName #return history, df, outputfileName return history, df with gr.Blocks() as demo: gr.Markdown("

🍰Gradio chatbot backed by memory in a dataset repository.🎨

") #gr.Markdown("The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer Code and datasets on chat are here hf tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest") with gr.Row(): t1 = gr.Textbox(lines=1, default="", label="Chat Text:") b1 = gr.Button("Send Message") with gr.Row(): # inputs and buttons s1 = gr.State([]) s2 = gr.Markdown() with gr.Row(): file = gr.File(label="File"), df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate") #b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1, file]) b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1]) demo.launch(debug=True, show_error=True)