TinySEA / app.py
Cran-May's picture
Update app.py
7a2f5cb
import gradio as gr
import time
from ctransformers import AutoModelForCausalLM # Please ensure this import is correct
from huggingface_hub import hf_hub_download
PROMPT_TEMPLATE = (
)
def load_llm():
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained(
"s3nh/PY007-TinyLlama-1.1B-Chat-v0.2-GGUF",
model_file="PY007-TinyLlama-1.1B-Chat-v0.2.Q4_K_M.gguf",
model_type="llama",
gpu_layers=0,
max_new_tokens = 1096,
repetition_penalty = 1.13,
temperature = 0.1
)
return llm
def llm_function(message, chat_history):
llm = load_llm()
formatted_message = PROMPT_TEMPLATE + f"<s>[INST]{message}[/INST]</s>"
response = llm(
formatted_message
)
output_texts = response
return output_texts
title = "这里是小兮辞"
examples = [
'What is yellow fever.',
]
gr.ChatInterface(
fn=llm_function,
title=title,
examples=examples
).launch()