ChintanSatva commited on
Commit
3666246
·
verified ·
1 Parent(s): 7651fdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -8,7 +8,6 @@ import torch
8
  import psutil
9
  import cachetools
10
  import hashlib
11
- from bitsandbytes import quantize
12
 
13
  # Set environment variable for cache
14
  os.environ["HF_HOME"] = "/app/cache"
@@ -25,12 +24,11 @@ try:
25
  tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", cache_dir="/app/cache")
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
28
- torch_dtype=torch.float16,
29
  device_map="cpu",
30
  low_cpu_mem_usage=True,
31
  cache_dir="/app/cache",
32
- trust_remote_code=True,
33
- quantization_config={"load_in_4bit": True} # 4-bit quantization
34
  )
35
  except Exception as e:
36
  logger.error(f"Failed to load BitNet model: {str(e)}")
@@ -49,7 +47,7 @@ def get_text_hash(text: str):
49
  """Generate MD5 hash of text."""
50
  return hashlib.md5(text.encode('utf-8')).hexdigest()
51
 
52
- # Simplified categories (reference only, not included in prompt)
53
  ALLOWED_CATEGORIES = [
54
  {"name": "income", "subcategories": ["dividends", "interest earned", "retirement pension", "tax refund", "unemployment", "wages", "other income"]},
55
  {"name": "transfer in", "subcategories": ["cash advances and loans", "deposit", "investment and retirement funds", "savings", "account transfer", "other transfer in"]},
@@ -103,7 +101,7 @@ Amount: {amount}
103
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
104
  outputs = model.generate(
105
  **inputs,
106
- max_new_tokens=100, # Reduced for speed
107
  do_sample=False,
108
  num_beams=1
109
  )
 
8
  import psutil
9
  import cachetools
10
  import hashlib
 
11
 
12
  # Set environment variable for cache
13
  os.environ["HF_HOME"] = "/app/cache"
 
24
  tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", cache_dir="/app/cache")
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
+ torch_dtype=torch.bfloat16, # Optimized for CPU
28
  device_map="cpu",
29
  low_cpu_mem_usage=True,
30
  cache_dir="/app/cache",
31
+ trust_remote_code=True
 
32
  )
33
  except Exception as e:
34
  logger.error(f"Failed to load BitNet model: {str(e)}")
 
47
  """Generate MD5 hash of text."""
48
  return hashlib.md5(text.encode('utf-8')).hexdigest()
49
 
50
+ # Simplified categories (reference only, not in prompt)
51
  ALLOWED_CATEGORIES = [
52
  {"name": "income", "subcategories": ["dividends", "interest earned", "retirement pension", "tax refund", "unemployment", "wages", "other income"]},
53
  {"name": "transfer in", "subcategories": ["cash advances and loans", "deposit", "investment and retirement funds", "savings", "account transfer", "other transfer in"]},
 
101
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
102
  outputs = model.generate(
103
  **inputs,
104
+ max_new_tokens=50, # Further reduced for speed
105
  do_sample=False,
106
  num_beams=1
107
  )