Spaces:
Runtime error
Runtime error
from llama_cpp import Llama | |
import os | |
# Configure cache | |
os.environ['GGUF_CACHE'] = '/tmp/gguf_cache' | |
os.makedirs('/tmp/gguf_cache', exist_ok=True) | |
# Load GGUF model | |
llm = Llama( | |
model_path="ninja-v1-nsfw-rp.gguf", | |
n_ctx=2048, # Context window | |
n_threads=4 # CPU threads | |
) | |
def generate_text(prompt, max_tokens=560): | |
output = llm.create_chat_completion( | |
messages=[{"role": "user", "content": prompt}], | |
max_tokens=max_tokens, | |
temperature=0.7 | |
) | |
return output['choices'][0]['message']['content'] |