Euryeth commited on
Commit
c0fe323
·
verified ·
1 Parent(s): 034a4f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -14
app.py CHANGED
@@ -1,20 +1,41 @@
1
- from huggingface_hub import hf_hub_download
2
- from ctransformers import AutoModelForCausalLM
3
  import os
4
 
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
- # Download model explicitly
8
- model_path = hf_hub_download(
9
- repo_id="mradermacher/Ninja-v1-NSFW-RP-GGUF",
10
- filename="ninja-v1.Q4_K_M.gguf", # Different quantization
11
- revision="main"
 
 
12
  )
13
 
14
- # Load from local path
15
- model = AutoModelForCausalLM.from_pretrained(
16
- model_path, # Use downloaded path
17
- model_type="llama",
18
- gpu_layers=0,
19
- context_length=4096
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
+ import torch
3
  import os
4
 
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
+ # Load model and tokenizer
8
+ model_id = "Disya/DS-R1-Qwen3-8B-ArliAI-RpR-v4-exl2-8bpw-h8"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_id,
12
+ device_map="auto",
13
+ torch_dtype=torch.float16
14
  )
15
 
16
+ # Create text generation pipeline
17
+ pipe = pipeline(
18
+ "text-generation",
19
+ model=model,
20
+ tokenizer=tokenizer,
21
+ max_new_tokens=1080
22
+ )
23
+
24
+ def generate_chat_completion(messages, max_tokens=1080, temperature=0.8):
25
+ """Generate chat response in OpenAI format"""
26
+ # Format messages as prompt
27
+ prompt = tokenizer.apply_chat_template(
28
+ messages,
29
+ tokenize=False,
30
+ add_generation_prompt=True
31
+ )
32
+
33
+ # Generate response
34
+ outputs = pipe(
35
+ prompt,
36
+ max_new_tokens=max_tokens,
37
+ temperature=temperature,
38
+ do_sample=True
39
+ )
40
+
41
+ return outputs[0]["generated_text"]