File size: 542 Bytes
c293898
efdd63d
29cdb01
c293898
 
 
29cdb01
c293898
 
 
 
 
c278a4e
 
c293898
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from llama_cpp import Llama
import os

# Configure cache
os.environ['GGUF_CACHE'] = '/tmp/gguf_cache'
os.makedirs('/tmp/gguf_cache', exist_ok=True)

# Load GGUF model
llm = Llama(
    model_path="ninja-v1-nsfw-rp.gguf",
    n_ctx=2048,  # Context window
    n_threads=4  # CPU threads
)

def generate_text(prompt, max_tokens=560):
    output = llm.create_chat_completion(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=0.7
    )
    return output['choices'][0]['message']['content']