DesiredName commited on
Commit
1aba7f3
·
verified ·
1 Parent(s): 0132f9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -14
app.py CHANGED
@@ -1,22 +1,11 @@
1
  from fastapi import FastAPI
2
  import uvicorn
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
 
5
- model_name = "TheBloke/Wizard-Vicuna-13B-Uncensored-HF"
6
 
7
- # Configure 4-bit quantization
8
- bnb_config = BitsAndBytesConfig(
9
- load_in_4bit=True, # Enable 4-bit quantization
10
- bnb_4bit_quant_type="nf4", # Use 4-bit NormalFloat (optimal)
11
- bnb_4bit_compute_dtype="float16", # Faster computation with float16
12
- bnb_4bit_use_double_quant=True, # Extra compression
13
- llm_int8_enable_fp32_cpu_offload=True
14
- )
15
-
16
- # Load model with quantization
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_name, # Example model
19
- quantization_config=bnb_config,
20
  device_map="auto", # Auto-distribute across GPU/CPU
21
  trust_remote_code=True # Required for some models
22
  )
@@ -33,7 +22,9 @@ def greet_json():
33
 
34
  @app.get("/message")
35
  async def message(input: str):
36
- inputs = tokenizer(input, return_tensors="pt", padding=True, truncation=True)
 
 
37
 
38
  output = model.generate(
39
  input_ids=inputs["input_ids"],
 
1
  from fastapi import FastAPI
2
  import uvicorn
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ model_name = "Tap-M/Luna-AI-Llama2-Uncensored"
6
 
 
 
 
 
 
 
 
 
 
 
7
  model = AutoModelForCausalLM.from_pretrained(
8
  model_name, # Example model
 
9
  device_map="auto", # Auto-distribute across GPU/CPU
10
  trust_remote_code=True # Required for some models
11
  )
 
22
 
23
  @app.get("/message")
24
  async def message(input: str):
25
+ prompt = "USER:" + input + "\nASSISTANT:"
26
+
27
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
28
 
29
  output = model.generate(
30
  input_ids=inputs["input_ids"],