Euryeth commited on
Commit
d5c6c7d
·
verified ·
1 Parent(s): c293898

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,21 +1,22 @@
1
- from llama_cpp import Llama
2
  import os
3
 
4
- # Configure cache
5
- os.environ['GGUF_CACHE'] = '/tmp/gguf_cache'
6
- os.makedirs('/tmp/gguf_cache', exist_ok=True)
7
 
8
- # Load GGUF model
9
- llm = Llama(
10
- model_path="ninja-v1-nsfw-rp.gguf",
11
- n_ctx=2048, # Context window
12
- n_threads=4 # CPU threads
13
  )
14
 
15
- def generate_text(prompt, max_tokens=560):
16
- output = llm.create_chat_completion(
17
- messages=[{"role": "user", "content": prompt}],
18
- max_tokens=max_tokens,
19
- temperature=0.7
 
 
 
20
  )
21
- return output['choices'][0]['message']['content']
 
1
+ from transformers import pipeline
2
  import os
3
 
4
+ # Cache setup
5
+ os.environ['HF_HOME'] = '/tmp/cache'
 
6
 
7
+ model = pipeline(
8
+ "text-generation",
9
+ model="gpt2",
10
+ device=-1 # Force CPU
 
11
  )
12
 
13
+ def generate_text(prompt, max_new_tokens=560, max_context=1080):
14
+ """Generate text with precise token control"""
15
+ output = model(
16
+ prompt,
17
+ max_new_tokens=max_new_tokens, # Response tokens (560)
18
+ max_length=min(max_context, 1024), # GPT-2's max context is 1024
19
+ truncation=True,
20
+ pad_token_id=50256 # Explicitly set to avoid warnings
21
  )
22
+ return output[0]["generated_text"]