Euryeth commited on
Commit
57d8e73
·
verified ·
1 Parent(s): 3ce5700

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -54
app.py CHANGED
@@ -1,61 +1,20 @@
 
1
  from ctransformers import AutoModelForCausalLM
2
  import os
3
 
4
- # Configure cache
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
- # Load GGUF model
8
- model = AutoModelForCausalLM.from_pretrained(
9
- "mradermacher/Ninja-v1-NSFW-RP-GGUF",
10
- model_file="ninja-v1.Q5_K_M.gguf", # Medium quantization
11
- model_type="llama",
12
- gpu_layers=0, # CPU only
13
- context_length=4096 # Max context size
14
- )
15
-
16
- def generate_chat_completion(messages, max_tokens=1080, temperature=0.8):
17
- """Generate chat response in OpenAI format"""
18
- # Format messages as prompt
19
- prompt = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
20
- prompt += "\nassistant:"
21
-
22
- # Generate response
23
- response = model(
24
- prompt,
25
- max_new_tokens=max_tokens,
26
- temperature=temperature,
27
- stop=["</s>", "user:", "system:"],
28
- stream=False
29
- )
30
-
31
- return {
32
- "id": f"chatcmpl-{os.urandom(8).hex()}",
33
- "object": "chat.completion",
34
- "created": int(time.time()),
35
- "model": "Ninja-v1-NSFW-RP",
36
- "choices": [{
37
- "index": 0,
38
- "message": {
39
- "role": "assistant",
40
- "content": response
41
- },
42
- "finish_reason": "stop"
43
- }],
44
- "usage": {
45
- "prompt_tokens": len(prompt.split()),
46
- "completion_tokens": len(response.split()),
47
- "total_tokens": len(prompt.split()) + len(response.split())
48
- }
49
- }
50
-
51
- from transformers import AutoTokenizer
52
-
53
- # Add after model loading
54
- tokenizer = AutoTokenizer.from_pretrained(
55
- "mradermacher/Ninja-v1-NSFW-RP-GGUF",
56
- use_fast=False
57
  )
58
 
59
- # In generate_chat_completion():
60
- prompt_tokens = len(tokenizer.encode(prompt))
61
- completion_tokens = len(tokenizer.encode(response))
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
  from ctransformers import AutoModelForCausalLM
3
  import os
4
 
 
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
+ # Download model explicitly
8
+ model_path = hf_hub_download(
9
+ repo_id="mradermacher/Ninja-v1-NSFW-RP-GGUF",
10
+ filename="ninja-v1.Q5_K_M.gguf",
11
+ revision="main"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  )
13
 
14
+ # Load from local path
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_path, # Use downloaded path
17
+ model_type="llama",
18
+ gpu_layers=0,
19
+ context_length=4096
20
+ )