|
import transformers |
|
from transformers import AutoTokenizer, MistralForCausalLM |
|
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM |
|
import torch |
|
import gradio as gr |
|
import random |
|
from textwrap import wrap |
|
from peft import PeftModel, PeftConfig |
|
import torch |
|
import gradio as gr |
|
|
|
|
|
def wrap_text(text, width=90): |
|
lines = text.split('\n') |
|
wrapped_lines = [textwrap.fill(line, width=width) for line in lines] |
|
wrapped_text = '\n'.join(wrapped_lines) |
|
return wrapped_text |
|
|
|
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"): |
|
|
|
formatted_input = f"{system_prompt} {user_input}" |
|
|
|
|
|
encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False) |
|
model_inputs = encodeds.to(device) |
|
|
|
|
|
output = model.generate( |
|
**model_inputs, |
|
max_length=max_length, |
|
use_cache=True, |
|
early_stopping=True, |
|
bos_token_id=model.config.bos_token_id, |
|
eos_token_id=model.config.eos_token_id, |
|
pad_token_id=model.config.eos_token_id, |
|
temperature=0.1, |
|
do_sample=True |
|
) |
|
|
|
response_text = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
return response_text |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model_name = "OpenLLM-France/Claire-Mistral-7B-0.1" |
|
|
|
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) |
|
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, |
|
device_map="auto", |
|
torch_dtype=torch.bfloat16, |
|
load_in_4bit=True |
|
) |
|
|
|
class ChatBot: |
|
def __init__(self): |
|
self.history = [] |
|
|
|
def predict(self, user_input, system_prompt): |
|
|
|
formatted_input = f"{system_prompt} {user_input}" |
|
|
|
|
|
user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt") |
|
|
|
|
|
if len(self.history) > 0: |
|
chat_history_ids = torch.cat([self.history, user_input_ids], dim=-1) |
|
else: |
|
chat_history_ids = user_input_ids |
|
|
|
|
|
response = model.generate(input_ids=chat_history_ids, max_length=512, pad_token_id=tokenizer.eos_token_id) |
|
|
|
|
|
self.history = chat_history_ids |
|
|
|
|
|
response_text = tokenizer.decode(response[0], skip_special_tokens=True) |
|
return response_text |
|
|
|
bot = ChatBot() |
|
|
|
title = "👋🏻Welcome to Tonic's Claire Chat🚀" |
|
description = "You can use this Space to test out the current model ([ClaireLLM](https://huggingface.co/OpenLLM-France/Claire-Mistral-7B-0.1)) or duplicate this Space and use it for any other model on 🤗HuggingFace. Join me on [Discord to build together](https://discord.gg/nXx5wbX9)." |
|
examples = [["[Estragon :] On va voir. Tiens. Ils prennent chacun un bout de la corde et tirent. La corde se casse. Ils manquent de tomber.", "[Vladimir] Fais voir quand même. (Estragon dénoue la corde qui maintient son pantalon.Celui-ci, beaucoup trop large, lui tombe autour des chevilles. Ils regardent la corde.) À la rigueur ça pourrait aller. Mais est-elle solide ?"]] |
|
iface = gr.Interface( |
|
fn=bot.predict, |
|
title=title, |
|
description=description, |
|
examples=examples, |
|
inputs=[{"type": "text", "label": "Deuxieme partie d'un dialogue"}, {"type": "text", "label": "Premiere partie d'un dialogue"}], |
|
outputs={"type": "text", "label": "Claire LLM Dialogue"}, |
|
theme="ParityError/Anime" |
|
) |
|
|
|
iface.launch() |
|
|