arjunanand13 commited on
Commit
96ffa43
·
verified ·
1 Parent(s): 677d60b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -2,23 +2,25 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from transformers import BitsAndBytesConfig
 
 
5
 
6
- # Function to load a quantized model
7
  def load_quantized_model():
8
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
 
9
  config = BitsAndBytesConfig.from_dict({"load_in_4bit": True})
10
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", quantization_config=config)
11
  return model, tokenizer
12
 
13
  model, tokenizer = load_quantized_model()
14
 
15
- # Simple prediction function for Gradio
16
  def generate_response(prompt):
 
17
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
18
  outputs = model.generate(**inputs)
19
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
20
 
21
- # Gradio interface
22
  iface = gr.Interface(
23
  fn=generate_response,
24
  inputs="text",
 
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from transformers import BitsAndBytesConfig
5
+ import os
6
+ token = os.getenv("HUGGINGFACE_TOKEN")
7
 
 
8
  def load_quantized_model():
9
+ """ Function to load a quantized model"""
10
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct",token=token)
11
  config = BitsAndBytesConfig.from_dict({"load_in_4bit": True})
12
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", quantization_config=config,token=token)
13
  return model, tokenizer
14
 
15
  model, tokenizer = load_quantized_model()
16
 
17
+
18
  def generate_response(prompt):
19
+ """Simple prediction function for Gradio"""
20
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
21
  outputs = model.generate(**inputs)
22
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
23
 
 
24
  iface = gr.Interface(
25
  fn=generate_response,
26
  inputs="text",