Priyanshukr-1 commited on
Commit
1fb027f
·
verified ·
1 Parent(s): 314bed8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -29,7 +29,8 @@ else:
29
  llm = Llama(
30
  model_path=model_path,
31
  n_ctx=1024,
32
- n_threads=4 # Adjust for your CPU
 
33
  )
34
 
35
  @app.get("/")
@@ -54,6 +55,8 @@ async def generate(request: Request):
54
 
55
  print("📤 Raw model response:", response)
56
 
 
 
57
  return {
58
  "response": response["choices"][0]["message"]["content"].strip()
59
  }
 
29
  llm = Llama(
30
  model_path=model_path,
31
  n_ctx=1024,
32
+ n_threads=os.cpu_count(), # Adjust for your CPU
33
+ n_batch=64
34
  )
35
 
36
  @app.get("/")
 
55
 
56
  print("📤 Raw model response:", response)
57
 
58
+ llm.reset()
59
+
60
  return {
61
  "response": response["choices"][0]["message"]["content"].strip()
62
  }