Euryeth commited on
Commit
3d4be89
·
verified ·
1 Parent(s): b438a2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -14
app.py CHANGED
@@ -1,22 +1,49 @@
1
- from transformers import pipeline
2
  import os
3
 
4
- # Cache setup
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
- model = pipeline(
8
- "text-generation",
9
- model="gpt2",
10
- device=-1 # Force CPU
 
 
 
11
  )
12
 
13
- def generate_text(prompt, max_new_tokens=560, max_context=1080):
14
- """Generate text with precise token control"""
15
- output = model(
 
 
 
 
 
16
  prompt,
17
- max_new_tokens=max_new_tokens, # Response tokens (560)
18
- max_length=min(max_context, 1024), # GPT-2's max context is 1024
19
- truncation=True,
20
- pad_token_id=50256 # Explicitly set to avoid warnings
21
  )
22
- return output[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctransformers import AutoModelForCausalLM
2
  import os
3
 
4
+ # Configure cache
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
+ # Load GGUF model
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ "mradermacher/Ninja-v1-NSFW-RP-GGUF",
10
+ model_file="ninja-v1.Q5_K_M.gguf", # Medium quantization
11
+ model_type="llama",
12
+ gpu_layers=0, # CPU only
13
+ context_length=4096 # Max context size
14
  )
15
 
16
+ def generate_chat_completion(messages, max_tokens=560, temperature=0.7):
17
+ """Generate chat response in OpenAI format"""
18
+ # Format messages as prompt
19
+ prompt = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
20
+ prompt += "\nassistant:"
21
+
22
+ # Generate response
23
+ response = model(
24
  prompt,
25
+ max_new_tokens=max_tokens,
26
+ temperature=temperature,
27
+ stop=["</s>", "user:", "system:"],
28
+ stream=False
29
  )
30
+
31
+ return {
32
+ "id": f"chatcmpl-{os.urandom(8).hex()}",
33
+ "object": "chat.completion",
34
+ "created": int(time.time()),
35
+ "model": "Ninja-v1-NSFW-RP",
36
+ "choices": [{
37
+ "index": 0,
38
+ "message": {
39
+ "role": "assistant",
40
+ "content": response
41
+ },
42
+ "finish_reason": "stop"
43
+ }],
44
+ "usage": {
45
+ "prompt_tokens": len(prompt.split()),
46
+ "completion_tokens": len(response.split()),
47
+ "total_tokens": len(prompt.split()) + len(response.split())
48
+ }
49
+ }