Euryeth commited on
Commit
c293898
·
verified ·
1 Parent(s): 000ae17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,20 +1,21 @@
 
1
  import os
2
- from transformers import pipeline
3
 
4
- # Force cache location BEFORE importing transformers
5
- os.environ['TRANSFORMERS_CACHE'] = '/tmp/cache'
6
- os.environ['HF_HOME'] = '/tmp/cache'
7
- os.environ['HF_DATASETS_CACHE'] = '/tmp/cache'
8
- os.environ['HUGGINGFACE_HUB_CACHE'] = '/tmp/cache'
9
 
10
- # Now import pipeline
11
- model = pipeline(
12
- "text-generation",
13
- model="distilgpt2", # Smaller model for reliability
14
- device=-1 # Force CPU
15
  )
16
 
17
- def generate_text(prompt, max_length=100):
18
- """Generate text from a prompt"""
19
- output = model(prompt, max_length=max_length)
20
- return output[0]["generated_text"]
 
 
 
 
1
+ from llama_cpp import Llama
2
  import os
 
3
 
4
+ # Configure cache
5
+ os.environ['GGUF_CACHE'] = '/tmp/gguf_cache'
6
+ os.makedirs('/tmp/gguf_cache', exist_ok=True)
 
 
7
 
8
+ # Load GGUF model
9
+ llm = Llama(
10
+ model_path="ninja-v1-nsfw-rp.gguf",
11
+ n_ctx=2048, # Context window
12
+ n_threads=4 # CPU threads
13
  )
14
 
15
+ def generate_text(prompt, max_tokens=560):
16
+ output = llm.create_chat_completion(
17
+ messages=[{"role": "user", "content": prompt}],
18
+ max_tokens=max_tokens,
19
+ temperature=0.7
20
+ )
21
+ return output['choices'][0]['message']['content']