Samuel L Meyers commited on
Commit
2a8c299
·
1 Parent(s): fe36794

Use quantized model

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -7,7 +7,7 @@ import torch
7
  from conversation import get_default_conv_template
8
  import gradio as gr
9
  from pyllamacpp.model import Model
10
- from urllib import request, response, urlopen, error, parse, robotparser
11
 
12
  """
13
 
@@ -25,17 +25,13 @@ while True:
25
  break
26
  """
27
 
28
- talkers = {
29
- "m3b": {
30
- "tokenizer": AutoTokenizer.from_pretrained("GeneZC/MiniChat-3B", use_fast=False),
31
- "model": AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", device_map="auto", low_cpu_mem_usage=True),
32
- "conv": get_default_conv_template("minichat")
33
- }
34
- }
35
 
36
- request.urlretrieve("https://huggingface.co/GGUF/MiniChat-3B/resolve/main/ggml-model-q8_0.bin", "minichat-3b-q8_0.gguf")
37
 
38
- lcpp_model = Model(model_path="minichat-3b-q8_0.gguf")
 
 
39
 
40
  def m3b_talk(text):
41
  resp = ""
 
7
  from conversation import get_default_conv_template
8
  import gradio as gr
9
  from pyllamacpp.model import Model
10
+ import wget
11
 
12
  """
13
 
 
25
  break
26
  """
27
 
28
+ from huggingface_hub import hf_hub_download
 
 
 
 
 
 
29
 
30
+ model_path = "minichat-3b.q8_0.gguf"
31
 
32
+ hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path)
33
+
34
+ lcpp_model = Model(model_path=model_path)
35
 
36
  def m3b_talk(text):
37
  resp = ""