File size: 1,039 Bytes
1dab9af 29d77a7 8d195b0 1dab9af 8d195b0 1dab9af 8d195b0 7a2f5cb 1dab9af 8d195b0 1dab9af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
import time
from ctransformers import AutoModelForCausalLM # Please ensure this import is correct
from huggingface_hub import hf_hub_download
PROMPT_TEMPLATE = (
)
def load_llm():
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained(
"s3nh/PY007-TinyLlama-1.1B-Chat-v0.2-GGUF",
model_file="PY007-TinyLlama-1.1B-Chat-v0.2.Q4_K_M.gguf",
model_type="llama",
gpu_layers=0,
max_new_tokens = 1096,
repetition_penalty = 1.13,
temperature = 0.1
)
return llm
def llm_function(message, chat_history):
llm = load_llm()
formatted_message = PROMPT_TEMPLATE + f"<s>[INST]{message}[/INST]</s>"
response = llm(
formatted_message
)
output_texts = response
return output_texts
title = "这里是小兮辞"
examples = [
'What is yellow fever.',
]
gr.ChatInterface(
fn=llm_function,
title=title,
examples=examples
).launch()
|