CMLL's picture
Update app.py
cdddf91 verified
raw
history blame
1.99 kB
# ライブラリのインストール
import os
import subprocess
# 必要なライブラリのインストール
subprocess.check_call(["pip", "install", "llama-cpp-python"])
subprocess.check_call(["pip", "install", "gradio"])
# モデルのダウンロード
model_url = "https://huggingface.co/CMLL/ZhongJing-2-1_8b-GGUF/resolve/main/ZhongJing1_5-1_8b-fp16.gguf"
model_path = "ggml-model.gguf"
if not os.path.exists(model_path):
subprocess.check_call(["wget", model_url, "-O", model_path])
# ウェブUIの起動
import gradio as gr
import copy
import time
from llama_cpp import Llama
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_gpu_layers=100, # CPUで実行する場合は削除
)
history = []
system_message = """
You are a helpful TCM medical assistant named 仲景中医大语言模型.
"""
def generate_text(message, history):
temp = ""
input_prompt = f"{system_message}"
for interaction in history:
input_prompt = input_prompt + "\nUSER: " + str(interaction[0]) + "\nASSISTANT: " + str(interaction[1])
input_prompt = input_prompt + "\nUSER: " + str(message) + "\nASSISTANT: "
output = llm.create_completion(
input_prompt,
temperature=0.7,
top_p=0.3,
top_k=40,
repeat_penalty=1.1,
max_tokens=1024,
stop=[
"ASSISTANT:",
"USER:",
"SYSTEM:",
],
stream=True,
)
for out in output:
stream = copy.deepcopy(out)
temp += stream["choices"][0]["text"]
yield temp
history.append((message, temp))
demo = gr.ChatInterface(
generate_text,
title="ZhongJingGPT-V2-1_8B-GGUF chatbot using llama-cpp-python",
description="",
examples=["日本の四国にある県名を挙げてください。"],
cache_examples=True,
retry_btn=None,
undo_btn="Remove last",
clear_btn="Clear all",
)
demo.queue(concurrency_count=1, max_size=5)
demo.launch(debug=True, share=True)