Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,61 +1,20 @@
|
|
|
|
1 |
from ctransformers import AutoModelForCausalLM
|
2 |
import os
|
3 |
|
4 |
-
# Configure cache
|
5 |
os.environ['HF_HOME'] = '/tmp/cache'
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
"mradermacher/Ninja-v1-NSFW-RP-GGUF",
|
10 |
-
|
11 |
-
|
12 |
-
gpu_layers=0, # CPU only
|
13 |
-
context_length=4096 # Max context size
|
14 |
-
)
|
15 |
-
|
16 |
-
def generate_chat_completion(messages, max_tokens=1080, temperature=0.8):
|
17 |
-
"""Generate chat response in OpenAI format"""
|
18 |
-
# Format messages as prompt
|
19 |
-
prompt = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
|
20 |
-
prompt += "\nassistant:"
|
21 |
-
|
22 |
-
# Generate response
|
23 |
-
response = model(
|
24 |
-
prompt,
|
25 |
-
max_new_tokens=max_tokens,
|
26 |
-
temperature=temperature,
|
27 |
-
stop=["</s>", "user:", "system:"],
|
28 |
-
stream=False
|
29 |
-
)
|
30 |
-
|
31 |
-
return {
|
32 |
-
"id": f"chatcmpl-{os.urandom(8).hex()}",
|
33 |
-
"object": "chat.completion",
|
34 |
-
"created": int(time.time()),
|
35 |
-
"model": "Ninja-v1-NSFW-RP",
|
36 |
-
"choices": [{
|
37 |
-
"index": 0,
|
38 |
-
"message": {
|
39 |
-
"role": "assistant",
|
40 |
-
"content": response
|
41 |
-
},
|
42 |
-
"finish_reason": "stop"
|
43 |
-
}],
|
44 |
-
"usage": {
|
45 |
-
"prompt_tokens": len(prompt.split()),
|
46 |
-
"completion_tokens": len(response.split()),
|
47 |
-
"total_tokens": len(prompt.split()) + len(response.split())
|
48 |
-
}
|
49 |
-
}
|
50 |
-
|
51 |
-
from transformers import AutoTokenizer
|
52 |
-
|
53 |
-
# Add after model loading
|
54 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
55 |
-
"mradermacher/Ninja-v1-NSFW-RP-GGUF",
|
56 |
-
use_fast=False
|
57 |
)
|
58 |
|
59 |
-
#
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import hf_hub_download
|
2 |
from ctransformers import AutoModelForCausalLM
|
3 |
import os
|
4 |
|
|
|
5 |
os.environ['HF_HOME'] = '/tmp/cache'
|
6 |
|
7 |
+
# Download model explicitly
|
8 |
+
model_path = hf_hub_download(
|
9 |
+
repo_id="mradermacher/Ninja-v1-NSFW-RP-GGUF",
|
10 |
+
filename="ninja-v1.Q5_K_M.gguf",
|
11 |
+
revision="main"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
)
|
13 |
|
14 |
+
# Load from local path
|
15 |
+
model = AutoModelForCausalLM.from_pretrained(
|
16 |
+
model_path, # Use downloaded path
|
17 |
+
model_type="llama",
|
18 |
+
gpu_layers=0,
|
19 |
+
context_length=4096
|
20 |
+
)
|