|
import gradio as gr |
|
import time |
|
from ctransformers import AutoModelForCausalLM |
|
from huggingface_hub import hf_hub_download |
|
|
|
PROMPT_TEMPLATE = ( |
|
|
|
) |
|
|
|
def load_llm(): |
|
|
|
|
|
llm = AutoModelForCausalLM.from_pretrained( |
|
"s3nh/PY007-TinyLlama-1.1B-Chat-v0.2-GGUF", |
|
model_file="PY007-TinyLlama-1.1B-Chat-v0.2.Q4_K_M.gguf", |
|
model_type="llama", |
|
gpu_layers=0, |
|
max_new_tokens = 1096, |
|
repetition_penalty = 1.13, |
|
temperature = 0.1 |
|
) |
|
return llm |
|
|
|
def llm_function(message, chat_history): |
|
llm = load_llm() |
|
formatted_message = PROMPT_TEMPLATE + f"<s>[INST]{message}[/INST]</s>" |
|
response = llm( |
|
formatted_message |
|
) |
|
output_texts = response |
|
return output_texts |
|
|
|
title = "这里是小兮辞" |
|
|
|
examples = [ |
|
'What is yellow fever.', |
|
] |
|
|
|
gr.ChatInterface( |
|
fn=llm_function, |
|
title=title, |
|
examples=examples |
|
).launch() |
|
|