Spaces:
Runtime error
Runtime error
File size: 5,501 Bytes
11df34d 8091023 11df34d b299ae3 fba1123 11df34d b299ae3 11df34d b299ae3 11df34d b299ae3 11df34d 3d15ec6 fba1123 11df34d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
##### `🦙_Alpaca.py`
##### Alpaca Model
##### https://github.com/seemanne/llamacpypy
##### https://github.com/shaunabanana/llama.py
##### https://github.com/thomasantony/llamacpp-python
##### https://github.com/abetlen/llama-cpp-python
##### Please reach out to [email protected] for any questions
#### Loading needed Python libraries
import streamlit as st
#from llamacpypy import Llama
#import llamacpp
from llama_cpp import Llama
import os
import subprocess
import sys
#### Streamlit initial setup
st.set_page_config(
page_title = "🦙 Alpaca",
page_icon = "images/Logo.png",
layout = "centered",
initial_sidebar_state = "expanded"
)
#### Functions of the Python Wrapper
def llama_stream(
prompt = '',
skip_prompt = True,
trim_prompt = 0,
executable = 'pages/llama.cpp/main',
model = 'models/7B/ggml-model-q4_0.bin',
threads = 4,
temperature = 0.7,
top_k = 40,
top_p = 0.5,
repeat_last_n = 256,
repeat_penalty = 1.17647,
n = 4096,
interactive = False,
reverse_prompt = "User:"
):
command = [
executable,
'-m', model,
'-t', str(threads),
'--temp', str(temperature),
'--top_k', str(top_k),
'--top_p', str(top_p),
'--repeat_last_n', str(repeat_last_n),
'--repeat_penalty', str(repeat_penalty),
'-n', str(n),
'-p', prompt
]
if interactive:
command += ['-i', '-r', reverse_prompt]
process = subprocess.Popen(
command,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
)
token = b''
generated = ''
while True:
token += process.stdout.read(1)
if token: # neither empty string nor None
try:
decoded = token.decode('utf-8')
trimmed_prompt = prompt
if trim_prompt > 0:
trimmed_prompt = prompt[:-trim_prompt]
prompt_finished = generated.startswith(trimmed_prompt)
reverse_prompt_encountered = generated.endswith(reverse_prompt)
if not skip_prompt or prompt_finished:
yield decoded
if interactive and prompt_finished and reverse_prompt_encountered:
user_input = input()
process.stdin.write(user_input.encode('utf-8') + b'\n')
process.stdin.flush()
generated += decoded
token = b''
except UnicodeDecodeError:
continue
elif process.poll() is not None:
return
def llama(
prompt = '',
stream = False,
skip_prompt = False,
trim_prompt = 0,
executable = 'pages/llama.cpp/main',
model = 'models/7B/ggml-model-q4_0.bin',
threads = 4,
temperature = 0.7,
top_k = 40,
top_p = 0.5,
repeat_last_n = 256,
repeat_penalty = 1.17647,
n = 4096,
interactive = False,
reverse_prompt = "User:"
):
streamer = llama_stream(
prompt = prompt,
skip_prompt = skip_prompt,
trim_prompt = trim_prompt,
executable = executable,
model = model,
threads = threads,
temperature = temperature,
top_k = top_k,
top_p = top_p,
repeat_last_n = repeat_last_n,
repeat_penalty = repeat_penalty,
n = n,
interactive = interactive,
reverse_prompt = reverse_prompt
)
if stream:
return streamer
else:
return ''.join(list(streamer))
### Python Wrapper (functions above)
#text = []
#for token in llama(prompt = 'What is your purpose?', repeat_penalty = 1.05, skip_prompt = False, interactive = False):
# print(token, end = '', flush = True)
# text.append(token)
#st.subheader('Debug')
#st.experimental_show(text[0])
#st.experimental_show(text[1])
#st.subheader('Answer')
#st.write(''.join(text))
### llamacpypy
#llama = Llama(model_name = 'models/7B/ggml-model-q4_0.bin', warm_start = True)
#llama.load_model()
#var = llama.generate("This is the weather report, we are reporting a clown fiesta happening at backer street. The clowns ")
#st.write(var)
### llamacpp
#model_path = "./models/7B/ggml-model-q4_0.bin"
#params = llamacpp.gpt_params(model_path, 4096, 40, 0.1, 0.7, 2.0)
#model = llamacpp.PyLLAMA(model_path, params)
#text = model.predict("Hello, I'm a llama.", 10)
#st.write(text)
#params = llamacpp.gpt_params('./models/7B/ggml-model-q4_0.bin', # model,
# 512, # ctx_size
# 100, # n_predict
# 40, # top_k
# 0.95, # top_p
# 0.85, # temp
# 1.30, # repeat_penalty
# -1, # seed
# 8, # threads
# 64, # repeat_last_n
# 8, # batch_size
#)
#model = llamacpp.PyLLAMA(params)
#model.add_bos() # Adds "beginning of string" token
#model.update_input("A llama is a")
#model.print_startup_stats()
#model.prepare_context()
#model.ingest_all_pending_input(True)
#while not model.is_finished():
# text, is_finished = model.infer_text()
# print(text, end="")
# if is_finished:
# break
# Flush stdout
#sys.stdout.flush()
#model.print_end_stats()
### Llama cpp
llm = Llama(model_path = "xfh/alpaca.cpp_65b_ggml")
output = llm("Q: Name the planets in the solar system? A: ", max_tokens = 32, stop = ["Q:", "\n"], echo = True)
st.write(output)
|