File size: 1,039 Bytes
1dab9af
 
29d77a7
8d195b0
1dab9af
 
8d195b0
1dab9af
 
 
8d195b0
 
 
 
 
 
7a2f5cb
1dab9af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d195b0
1dab9af
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import time
from ctransformers import AutoModelForCausalLM  # Please ensure this import is correct
from huggingface_hub import hf_hub_download

PROMPT_TEMPLATE = (
  
)

def load_llm():

    # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
    llm = AutoModelForCausalLM.from_pretrained(
    "s3nh/PY007-TinyLlama-1.1B-Chat-v0.2-GGUF",
    model_file="PY007-TinyLlama-1.1B-Chat-v0.2.Q4_K_M.gguf",
    model_type="llama",
    gpu_layers=0,
    max_new_tokens = 1096,
    repetition_penalty = 1.13,
    temperature = 0.1
    )
    return llm

def llm_function(message, chat_history):
    llm = load_llm()
    formatted_message = PROMPT_TEMPLATE + f"<s>[INST]{message}[/INST]</s>"
    response = llm(
        formatted_message
    )
    output_texts = response
    return output_texts

title = "这里是小兮辞"

examples = [
    'What is yellow fever.',
]

gr.ChatInterface(
    fn=llm_function,
    title=title,
    examples=examples
).launch()