Spaces:

Euryeth
/

LLM_Ariphes

Runtime error

Euryeth commited on Jun 8

Commit

c0fe323

verified ·

1 Parent(s): 034a4f9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,41 @@
-from huggingface_hub import hf_hub_download
-from ctransformers import AutoModelForCausalLM
 import os
 os.environ['HF_HOME'] = '/tmp/cache'
-# Download model explicitly
-model_path = hf_hub_download(
-    repo_id="mradermacher/Ninja-v1-NSFW-RP-GGUF",
-    filename="ninja-v1.Q4_K_M.gguf",  # Different quantization
-    revision="main"
 )
-# Load from local path
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,  # Use downloaded path
-    model_type="llama",
-    gpu_layers=0,
-    context_length=4096
-)

+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
 import os
 os.environ['HF_HOME'] = '/tmp/cache'
+# Load model and tokenizer
+model_id = "Disya/DS-R1-Qwen3-8B-ArliAI-RpR-v4-exl2-8bpw-h8"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.float16
 )
+# Create text generation pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=1080
+)
+def generate_chat_completion(messages, max_tokens=1080, temperature=0.8):
+    """Generate chat response in OpenAI format"""
+    # Format messages as prompt
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    # Generate response
+    outputs = pipe(
+        prompt,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        do_sample=True
+    )
+    return outputs[0]["generated_text"]