Deepan13 commited on
Commit
f23b1f5
·
1 Parent(s): 90c0044

Switch to GPTQ model, remove BitsAndBytesConfig

Browse files
Files changed (2) hide show
  1. app.py +4 -6
  2. requirements.txt +2 -3
app.py CHANGED
@@ -1,14 +1,12 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
 
4
- model_id = "TheBloke/CodeLlama-7B-GPTQ" # Example 4-bit quantized model
5
-
6
- bnb_config = BitsAndBytesConfig(load_in_4bit=True, device_map="auto")
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
- quantization_config=bnb_config,
12
  device_map="auto"
13
  )
14
 
@@ -22,6 +20,6 @@ gr.Interface(
22
  fn=generate_response,
23
  inputs=gr.Textbox(lines=5, label="Your prompt"),
24
  outputs=gr.Textbox(label="Code Llama response"),
25
- title="Code Llama Demo",
26
  description="Ask questions or request code snippets!"
27
  ).launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
 
4
+ # Use prequantized GPTQ model no BitsAndBytesConfig needed
5
+ model_id = "TheBloke/CodeLlama-7B-GPTQ"
 
6
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_id,
 
10
  device_map="auto"
11
  )
12
 
 
20
  fn=generate_response,
21
  inputs=gr.Textbox(lines=5, label="Your prompt"),
22
  outputs=gr.Textbox(label="Code Llama response"),
23
+ title="Code Llama Demo (GPTQ)",
24
  description="Ask questions or request code snippets!"
25
  ).launch()
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- transformers
2
- gradio
3
  accelerate
4
- bitsandbytes
 
1
+ transformers==4.39.3
 
2
  accelerate
3
+ gradio