Spaces:

DrBenjamin
/

AI_Demo

Runtime error

File size: 5,501 Bytes

##### `🦙_Alpaca.py`
##### Alpaca Model
##### https://github.com/seemanne/llamacpypy
##### https://github.com/shaunabanana/llama.py
##### https://github.com/thomasantony/llamacpp-python
##### https://github.com/abetlen/llama-cpp-python
##### Please reach out to [email protected] for any questions
#### Loading needed Python libraries
import streamlit as st
#from llamacpypy import Llama
#import llamacpp
from llama_cpp import Llama
import os
import subprocess
import sys




#### Streamlit initial setup
st.set_page_config(
  page_title = "🦙 Alpaca",
  page_icon = "images/Logo.png",
  layout = "centered",
  initial_sidebar_state = "expanded"
)




#### Functions of the Python Wrapper
def llama_stream(
        prompt = '',
        skip_prompt = True,
        trim_prompt = 0,
        executable = 'pages/llama.cpp/main',
        model = 'models/7B/ggml-model-q4_0.bin',
        threads = 4,
        temperature = 0.7,
        top_k = 40,
        top_p = 0.5,
        repeat_last_n = 256,
        repeat_penalty = 1.17647,
        n = 4096,
        interactive = False,
        reverse_prompt = "User:"
):
    command = [
        executable,
        '-m', model,
        '-t', str(threads),
        '--temp', str(temperature),
        '--top_k', str(top_k),
        '--top_p', str(top_p),
        '--repeat_last_n', str(repeat_last_n),
        '--repeat_penalty', str(repeat_penalty),
        '-n', str(n),
        '-p', prompt
    ]
    if interactive:
        command += ['-i', '-r', reverse_prompt]
    
    process = subprocess.Popen(
        command,
        stdin = subprocess.PIPE,
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE,
    )
    
    token = b''
    generated = ''
    while True:
        token += process.stdout.read(1)
        if token:  # neither empty string nor None
            try:
                decoded = token.decode('utf-8')
                
                trimmed_prompt = prompt
                if trim_prompt > 0:
                    trimmed_prompt = prompt[:-trim_prompt]
                prompt_finished = generated.startswith(trimmed_prompt)
                reverse_prompt_encountered = generated.endswith(reverse_prompt)
                if not skip_prompt or prompt_finished:
                    yield decoded
                if interactive and prompt_finished and reverse_prompt_encountered:
                    user_input = input()
                    process.stdin.write(user_input.encode('utf-8') + b'\n')
                    process.stdin.flush()
                
                generated += decoded
                token = b''
            except UnicodeDecodeError:
                continue
        elif process.poll() is not None:
            return


def llama(
        prompt = '',
        stream = False,
        skip_prompt = False,
        trim_prompt = 0,
        executable = 'pages/llama.cpp/main',
        model = 'models/7B/ggml-model-q4_0.bin',
        threads = 4,
        temperature = 0.7,
        top_k = 40,
        top_p = 0.5,
        repeat_last_n = 256,
        repeat_penalty = 1.17647,
        n = 4096,
        interactive = False,
        reverse_prompt = "User:"
):
    streamer = llama_stream(
        prompt = prompt,
        skip_prompt = skip_prompt,
        trim_prompt = trim_prompt,
        executable = executable,
        model = model,
        threads = threads,
        temperature = temperature,
        top_k = top_k,
        top_p = top_p,
        repeat_last_n = repeat_last_n,
        repeat_penalty = repeat_penalty,
        n = n,
        interactive = interactive,
        reverse_prompt = reverse_prompt
    )
    if stream:
        return streamer
    else:
        return ''.join(list(streamer))



### Python Wrapper (functions above)
#text = []
#for token in llama(prompt = 'What is your purpose?', repeat_penalty = 1.05, skip_prompt = False, interactive = False):
#    print(token, end = '', flush = True)
#    text.append(token)
#st.subheader('Debug')
#st.experimental_show(text[0])
#st.experimental_show(text[1])
#st.subheader('Answer')
#st.write(''.join(text))



### llamacpypy
#llama = Llama(model_name = 'models/7B/ggml-model-q4_0.bin', warm_start = True)
#llama.load_model()
#var = llama.generate("This is the weather report, we are reporting a clown fiesta happening at backer street. The clowns ")
#st.write(var)



### llamacpp
#model_path = "./models/7B/ggml-model-q4_0.bin"
#params = llamacpp.gpt_params(model_path, 4096, 40, 0.1, 0.7, 2.0)
#model = llamacpp.PyLLAMA(model_path, params)
#text = model.predict("Hello, I'm a llama.", 10)
#st.write(text)
#params = llamacpp.gpt_params('./models/7B/ggml-model-q4_0.bin',  # model,
#    512,  # ctx_size
#    100,  # n_predict
#    40,  # top_k
#    0.95,  # top_p
#    0.85,  # temp
#    1.30,  # repeat_penalty
#    -1,  # seed
#    8,  # threads
#    64,  # repeat_last_n
#    8,  # batch_size
#)
#model = llamacpp.PyLLAMA(params)
#model.add_bos()     # Adds "beginning of string" token
#model.update_input("A llama is a")
#model.print_startup_stats()
#model.prepare_context()

#model.ingest_all_pending_input(True)
#while not model.is_finished():
#    text, is_finished = model.infer_text()
#    print(text, end="")

#    if is_finished:
#        break

# Flush stdout
#sys.stdout.flush()
#model.print_end_stats()



### Llama cpp
llm = Llama(model_path = "xfh/alpaca.cpp_65b_ggml")
output = llm("Q: Name the planets in the solar system? A: ", max_tokens = 32, stop = ["Q:", "\n"], echo = True)
st.write(output)