Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,49 @@
|
|
1 |
-
from
|
2 |
import os
|
3 |
|
4 |
-
#
|
5 |
os.environ['HF_HOME'] = '/tmp/cache'
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
11 |
)
|
12 |
|
13 |
-
def
|
14 |
-
"""Generate
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
16 |
prompt,
|
17 |
-
max_new_tokens=
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
)
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctransformers import AutoModelForCausalLM
|
2 |
import os
|
3 |
|
4 |
+
# Configure cache
|
5 |
os.environ['HF_HOME'] = '/tmp/cache'
|
6 |
|
7 |
+
# Load GGUF model
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(
|
9 |
+
"mradermacher/Ninja-v1-NSFW-RP-GGUF",
|
10 |
+
model_file="ninja-v1.Q5_K_M.gguf", # Medium quantization
|
11 |
+
model_type="llama",
|
12 |
+
gpu_layers=0, # CPU only
|
13 |
+
context_length=4096 # Max context size
|
14 |
)
|
15 |
|
16 |
+
def generate_chat_completion(messages, max_tokens=560, temperature=0.7):
|
17 |
+
"""Generate chat response in OpenAI format"""
|
18 |
+
# Format messages as prompt
|
19 |
+
prompt = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
|
20 |
+
prompt += "\nassistant:"
|
21 |
+
|
22 |
+
# Generate response
|
23 |
+
response = model(
|
24 |
prompt,
|
25 |
+
max_new_tokens=max_tokens,
|
26 |
+
temperature=temperature,
|
27 |
+
stop=["</s>", "user:", "system:"],
|
28 |
+
stream=False
|
29 |
)
|
30 |
+
|
31 |
+
return {
|
32 |
+
"id": f"chatcmpl-{os.urandom(8).hex()}",
|
33 |
+
"object": "chat.completion",
|
34 |
+
"created": int(time.time()),
|
35 |
+
"model": "Ninja-v1-NSFW-RP",
|
36 |
+
"choices": [{
|
37 |
+
"index": 0,
|
38 |
+
"message": {
|
39 |
+
"role": "assistant",
|
40 |
+
"content": response
|
41 |
+
},
|
42 |
+
"finish_reason": "stop"
|
43 |
+
}],
|
44 |
+
"usage": {
|
45 |
+
"prompt_tokens": len(prompt.split()),
|
46 |
+
"completion_tokens": len(response.split()),
|
47 |
+
"total_tokens": len(prompt.split()) + len(response.split())
|
48 |
+
}
|
49 |
+
}
|