Spaces:

CMLM
/

ZhongJing-V2-1_8b-4bit

Runtime error

App Files Files Community

CMLL commited on Jun 17, 2024

Commit

cdddf91

verified ·

1 Parent(s): bcf1d2d

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -26

app.py CHANGED Viewed

@@ -1,45 +1,33 @@
-#import subprocess
-# Remove the llama.cpp directory if it exists
-#subprocess.run(["rm", "-rf", "llama.cpp"])
-# Clone the llama.cpp repository
-#subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])
-# Change directory to llama.cpp
-#subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")
-# Download the zephyr-7b-beta model
-#subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])
-# Run the server
-#subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")
 # ライブラリのインストール
-!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
-#（CPUで実行する場合）!pip install llama-cpp-python
-pip install gradio
 # モデルのダウンロード
-wget https://huggingface.co/TFMC/openbuddy-llama2-13b-v11.1-bf16-GGUF/resolve/main/ggml-model-q4_m.gguf
 # ウェブUIの起動
-import os
 import gradio as gr
 import copy
 import time
 from llama_cpp import Llama
 llm = Llama(
-    model_path="ggml-model-q4_m.gguf",
     n_ctx=2048,
-    n_gpu_layers=100, #CPUで実行する場合は削除
 )
 history = []
 system_message = """
-あなたはAIアシスタントです。
 """
 def generate_text(message, history):
@@ -68,7 +56,7 @@ def generate_text(message, history):
         temp += stream["choices"][0]["text"]
         yield temp
-    history = ["init", input_prompt]
 demo = gr.ChatInterface(
@@ -84,3 +72,4 @@ demo = gr.ChatInterface(
 demo.queue(concurrency_count=1, max_size=5)
 demo.launch(debug=True, share=True)

 # ライブラリのインストール
+import os
+import subprocess
+# 必要なライブラリのインストール
+subprocess.check_call(["pip", "install", "llama-cpp-python"])
+subprocess.check_call(["pip", "install", "gradio"])
 # モデルのダウンロード
+model_url = "https://huggingface.co/CMLL/ZhongJing-2-1_8b-GGUF/resolve/main/ZhongJing1_5-1_8b-fp16.gguf"
+model_path = "ggml-model.gguf"
+if not os.path.exists(model_path):
+    subprocess.check_call(["wget", model_url, "-O", model_path])
 # ウェブUIの起動
 import gradio as gr
 import copy
 import time
 from llama_cpp import Llama
 llm = Llama(
+    model_path=model_path,
     n_ctx=2048,
+    n_gpu_layers=100, # CPUで実行する場合は削除
 )
 history = []
 system_message = """
+You are a helpful TCM medical assistant named 仲景中医大语言模型.
 """
 def generate_text(message, history):
         temp += stream["choices"][0]["text"]
         yield temp
+    history.append((message, temp))
 demo = gr.ChatInterface(
 demo.queue(concurrency_count=1, max_size=5)
 demo.launch(debug=True, share=True)