Euryeth commited on
Commit
18e3582
·
verified ·
1 Parent(s): dc5df57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -1,41 +1,36 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
  import torch
3
  import os
4
 
 
5
  os.environ['HF_HOME'] = '/tmp/cache'
6
 
7
- # Load model and tokenizer
8
- model_id = "Disya/DS-R1-Qwen3-8B-ArliAI-RpR-v4-exl2-8bpw-h8"
9
- tokenizer = AutoTokenizer.from_pretrained(model_id)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- model_id,
12
- device_map="auto",
13
- torch_dtype=torch.float16
14
- )
15
 
16
- # Create text generation pipeline
17
- pipe = pipeline(
18
  "text-generation",
19
- model=model,
20
- tokenizer=tokenizer,
21
- max_new_tokens=1080
 
22
  )
23
 
24
- def generate_chat_completion(messages, max_tokens=1080, temperature=0.8):
25
- """Generate chat response in OpenAI format"""
26
- # Format messages as prompt
27
- prompt = tokenizer.apply_chat_template(
28
- messages,
29
- tokenize=False,
30
- add_generation_prompt=True
31
- )
32
 
33
- # Generate response
34
- outputs = pipe(
35
  prompt,
36
  max_new_tokens=max_tokens,
37
  temperature=temperature,
 
 
38
  do_sample=True
39
  )
40
 
41
- return outputs[0]["generated_text"]
 
1
+ from transformers import pipeline
2
  import torch
3
  import os
4
 
5
+ # Configure cache
6
  os.environ['HF_HOME'] = '/tmp/cache'
7
 
8
+ # Use a reliable model that works in Spaces
9
+ MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" # 8K context, good performance
 
 
 
 
 
 
10
 
11
+ # Initialize the pipeline with your settings
12
+ generator = pipeline(
13
  "text-generation",
14
+ model=MODEL_NAME,
15
+ device_map="auto",
16
+ torch_dtype=torch.bfloat16,
17
+ max_new_tokens=560
18
  )
19
 
20
+ def generate_chat_completion(messages, max_tokens=560, temperature=0.8):
21
+ """Generate chat response with precise control"""
22
+ # Format messages as instruction prompt
23
+ prompt = "\n".join([f"{msg['role'].capitalize()}: {msg['content']}" for msg in messages])
24
+ prompt += "\nAssistant:"
 
 
 
25
 
26
+ # Generate response with your settings
27
+ response = generator(
28
  prompt,
29
  max_new_tokens=max_tokens,
30
  temperature=temperature,
31
+ top_p=0.95,
32
+ repetition_penalty=1.15,
33
  do_sample=True
34
  )
35
 
36
+ return response[0]['generated_text'].replace(prompt, "").strip()