Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,23 +2,25 @@ import torch
|
|
2 |
import gradio as gr
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
from transformers import BitsAndBytesConfig
|
|
|
|
|
5 |
|
6 |
-
# Function to load a quantized model
|
7 |
def load_quantized_model():
|
8 |
-
|
|
|
9 |
config = BitsAndBytesConfig.from_dict({"load_in_4bit": True})
|
10 |
-
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", quantization_config=config)
|
11 |
return model, tokenizer
|
12 |
|
13 |
model, tokenizer = load_quantized_model()
|
14 |
|
15 |
-
|
16 |
def generate_response(prompt):
|
|
|
17 |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
18 |
outputs = model.generate(**inputs)
|
19 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
20 |
|
21 |
-
# Gradio interface
|
22 |
iface = gr.Interface(
|
23 |
fn=generate_response,
|
24 |
inputs="text",
|
|
|
2 |
import gradio as gr
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
from transformers import BitsAndBytesConfig
|
5 |
+
import os
|
6 |
+
token = os.getenv("HUGGINGFACE_TOKEN")
|
7 |
|
|
|
8 |
def load_quantized_model():
|
9 |
+
""" Function to load a quantized model"""
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct",token=token)
|
11 |
config = BitsAndBytesConfig.from_dict({"load_in_4bit": True})
|
12 |
+
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", quantization_config=config,token=token)
|
13 |
return model, tokenizer
|
14 |
|
15 |
model, tokenizer = load_quantized_model()
|
16 |
|
17 |
+
|
18 |
def generate_response(prompt):
|
19 |
+
"""Simple prediction function for Gradio"""
|
20 |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
21 |
outputs = model.generate(**inputs)
|
22 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
23 |
|
|
|
24 |
iface = gr.Interface(
|
25 |
fn=generate_response,
|
26 |
inputs="text",
|