LLM_Ariphes / app.py
Euryeth's picture
Update app.py
c293898 verified
raw
history blame
542 Bytes
from llama_cpp import Llama
import os
# Configure cache
os.environ['GGUF_CACHE'] = '/tmp/gguf_cache'
os.makedirs('/tmp/gguf_cache', exist_ok=True)
# Load GGUF model
llm = Llama(
model_path="ninja-v1-nsfw-rp.gguf",
n_ctx=2048, # Context window
n_threads=4 # CPU threads
)
def generate_text(prompt, max_tokens=560):
output = llm.create_chat_completion(
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=0.7
)
return output['choices'][0]['message']['content']