TinySEA

Sleeping

TinySEA / app.py

Update app.py

7a2f5cb over 1 year ago

1.04 kB

	import gradio as gr
	import time
	from ctransformers import AutoModelForCausalLM # Please ensure this import is correct
	from huggingface_hub import hf_hub_download

	PROMPT_TEMPLATE = (

	)

	def load_llm():

	# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
	llm = AutoModelForCausalLM.from_pretrained(
	"s3nh/PY007-TinyLlama-1.1B-Chat-v0.2-GGUF",
	model_file="PY007-TinyLlama-1.1B-Chat-v0.2.Q4_K_M.gguf",
	model_type="llama",
	gpu_layers=0,
	max_new_tokens = 1096,
	repetition_penalty = 1.13,
	temperature = 0.1
	)
	return llm

	def llm_function(message, chat_history):
	llm = load_llm()
	formatted_message = PROMPT_TEMPLATE + f"<s>[INST]{message}[/INST]</s>"
	response = llm(
	formatted_message
	)
	output_texts = response
	return output_texts

	title = "这里是小兮辞"

	examples = [
	'What is yellow fever.',
	]

	gr.ChatInterface(
	fn=llm_function,
	title=title,
	examples=examples
	).launch()