Spaces:

CMLM
/

ZhongJing-V2-1_8b-4bit

Runtime error

App Files Files Community

ZhongJing-V2-1_8b-4bit / app.py

CMLL

Update app.py

cdddf91 verified over 1 year ago

raw

history blame

1.99 kB

	# ライブラリのインストール
	import os
	import subprocess

	# 必要なライブラリのインストール
	subprocess.check_call(["pip", "install", "llama-cpp-python"])
	subprocess.check_call(["pip", "install", "gradio"])

	# モデルのダウンロード
	model_url = "https://huggingface.co/CMLL/ZhongJing-2-1_8b-GGUF/resolve/main/ZhongJing1_5-1_8b-fp16.gguf"
	model_path = "ggml-model.gguf"
	if not os.path.exists(model_path):
	subprocess.check_call(["wget", model_url, "-O", model_path])

	# ウェブUIの起動
	import gradio as gr
	import copy
	import time
	from llama_cpp import Llama

	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_gpu_layers=100, # CPUで実行する場合は削除
	)

	history = []

	system_message = """
	You are a helpful TCM medical assistant named 仲景中医大语言模型.
	"""

	def generate_text(message, history):
	temp = ""
	input_prompt = f"{system_message}"
	for interaction in history:
	input_prompt = input_prompt + "\nUSER: " + str(interaction[0]) + "\nASSISTANT: " + str(interaction[1])
	input_prompt = input_prompt + "\nUSER: " + str(message) + "\nASSISTANT: "

	output = llm.create_completion(
	input_prompt,
	temperature=0.7,
	top_p=0.3,
	top_k=40,
	repeat_penalty=1.1,
	max_tokens=1024,
	stop=[
	"ASSISTANT:",
	"USER:",
	"SYSTEM:",
	],
	stream=True,
	)
	for out in output:
	stream = copy.deepcopy(out)
	temp += stream["choices"][0]["text"]
	yield temp

	history.append((message, temp))


	demo = gr.ChatInterface(
	generate_text,
	title="ZhongJingGPT-V2-1_8B-GGUF chatbot using llama-cpp-python",
	description="",
	examples=["日本の四国にある県名を挙げてください。"],
	cache_examples=True,
	retry_btn=None,
	undo_btn="Remove last",
	clear_btn="Clear all",
	)
	demo.queue(concurrency_count=1, max_size=5)
	demo.launch(debug=True, share=True)