Spaces:
Runtime error
Runtime error
Update llm.py
Browse files
llm.py
CHANGED
@@ -1,74 +1,41 @@
|
|
1 |
-
from transformers import
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
# Option 1: Ultra-fast CPU model (best for response speed)
|
5 |
-
def generate_answer_fast(context, question):
|
6 |
-
"""
|
7 |
-
Uses DistilGPT-2 (smallest version) with optimizations
|
8 |
-
~2-5 seconds on CPU for 60 tokens
|
9 |
-
"""
|
10 |
-
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
|
11 |
-
model = AutoModelForCausalLM.from_pretrained("distilgpt2")
|
12 |
-
|
13 |
-
prompt = f"""You are a helpful AI assistant. Using the context, answer the question conversationally.
|
14 |
-
|
15 |
Context:
|
16 |
-
{context[:
|
17 |
|
18 |
Question: {question}
|
19 |
Answer:"""
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
#
|
24 |
outputs = model.generate(
|
25 |
inputs.input_ids,
|
26 |
-
max_new_tokens=
|
27 |
-
num_beams=1,
|
28 |
-
do_sample=
|
|
|
|
|
|
|
29 |
pad_token_id=tokenizer.eos_token_id,
|
30 |
-
temperature=0.7,
|
31 |
-
top_k=20,
|
32 |
early_stopping=True
|
33 |
)
|
34 |
|
35 |
# Extract only the new text
|
36 |
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
37 |
-
return full_text.split("Answer:")[-1].strip()
|
38 |
-
|
39 |
-
# Option 2: Better quality CPU model (balance between speed & quality)
|
40 |
-
def generate_answer_quality(context, question):
|
41 |
-
"""
|
42 |
-
Uses DialoGPT-small - conversational but still CPU-friendly
|
43 |
-
~5-10 seconds on CPU for 80 tokens
|
44 |
-
"""
|
45 |
-
qa_pipeline = pipeline(
|
46 |
-
"text-generation",
|
47 |
-
model="microsoft/DialoGPT-small",
|
48 |
-
tokenizer="microsoft/DialoGPT-small"
|
49 |
-
)
|
50 |
-
|
51 |
-
prompt = f"""Context: {context[:1200]}
|
52 |
-
Question: {question}
|
53 |
-
Assistant:"""
|
54 |
-
|
55 |
-
response = qa_pipeline(
|
56 |
-
prompt,
|
57 |
-
max_new_tokens=80,
|
58 |
-
num_beams=1,
|
59 |
-
temperature=0.8,
|
60 |
-
top_k=30,
|
61 |
-
do_sample=True,
|
62 |
-
pad_token_id=50256, # DialoGPT pad token
|
63 |
-
no_repeat_ngram_size=2
|
64 |
-
)
|
65 |
-
|
66 |
-
return response[0]['generated_text'].split("Assistant:")[-1].strip()
|
67 |
-
|
68 |
-
# Choose one based on priority
|
69 |
-
def generate_answer(context, question):
|
70 |
-
start_time = time.time()
|
71 |
-
result = generate_answer_fast(context, question) # For fastest response
|
72 |
-
# result = generate_answer_quality(context, question) # For better conversation
|
73 |
-
print(f"Generation took: {time.time() - start_time:.2f}s")
|
74 |
-
return result
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
2 |
+
|
3 |
+
# Load CPU-optimized model
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
|
5 |
+
model = AutoModelForCausalLM.from_pretrained("distilgpt2")
|
6 |
+
|
7 |
+
def generate_answer(context, question, max_new_tokens=100):
|
8 |
+
"""Generate answer with CPU optimizations"""
|
9 |
+
# Create concise prompt
|
10 |
+
prompt = f"""Based on the context, answer the question conversationally.
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
Context:
|
13 |
+
{context[:1000]}
|
14 |
|
15 |
Question: {question}
|
16 |
Answer:"""
|
17 |
|
18 |
+
# Tokenize with truncation
|
19 |
+
inputs = tokenizer(
|
20 |
+
prompt,
|
21 |
+
return_tensors="pt",
|
22 |
+
max_length=512,
|
23 |
+
truncation=True
|
24 |
+
)
|
25 |
|
26 |
+
# Generate with CPU-optimized settings
|
27 |
outputs = model.generate(
|
28 |
inputs.input_ids,
|
29 |
+
max_new_tokens=max_new_tokens,
|
30 |
+
num_beams=1, # Faster than beam search
|
31 |
+
do_sample=True, # More natural responses
|
32 |
+
temperature=0.7, # Balance creativity/focus
|
33 |
+
top_k=40, # Focus on likely tokens
|
34 |
+
top_p=0.9, # Nucleus sampling
|
35 |
pad_token_id=tokenizer.eos_token_id,
|
|
|
|
|
36 |
early_stopping=True
|
37 |
)
|
38 |
|
39 |
# Extract only the new text
|
40 |
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
41 |
+
return full_text.split("Answer:")[-1].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|