ateetvatan commited on
Commit
e6a07cc
·
1 Parent(s): 5595274

formatting + removed cpu parralism

Browse files
Files changed (3) hide show
  1. Dockerfile +0 -1
  2. app.py +2 -0
  3. model_loader.py +2 -2
Dockerfile CHANGED
@@ -8,7 +8,6 @@ RUN useradd -m -u 1000 user
8
  ENV HOME=/home/user
9
  ENV APP_HOME=/home/user/app
10
  ENV HF_HOME=/home/user/.hf_home
11
- ENV OMP_NUM_THREADS=8
12
 
13
  # 🔹 Set working directory
14
  WORKDIR $APP_HOME
 
8
  ENV HOME=/home/user
9
  ENV APP_HOME=/home/user/app
10
  ENV HF_HOME=/home/user/.hf_home
 
11
 
12
  # 🔹 Set working directory
13
  WORKDIR $APP_HOME
app.py CHANGED
@@ -35,10 +35,12 @@ class PromptRequest(BaseModel):
35
  class ChatResponse(BaseModel):
36
  response: str
37
 
 
38
  @app.get("/")
39
  def root():
40
  return {"message": "MASX OpenChat API is running"}
41
 
 
42
  @app.get("/status")
43
  async def status():
44
  try:
 
35
  class ChatResponse(BaseModel):
36
  response: str
37
 
38
+
39
  @app.get("/")
40
  def root():
41
  return {"message": "MASX OpenChat API is running"}
42
 
43
+
44
  @app.get("/status")
45
  async def status():
46
  try:
model_loader.py CHANGED
@@ -11,8 +11,8 @@ load_dotenv()
11
  # === High-Precision GGUF Model Configuration ===
12
  MODEL_REPO = os.getenv("MODEL_REPO", "TheBloke/openchat-3.5-0106-GGUF")
13
  MODEL_FILE = os.getenv("MODEL_FILE", "openchat-3.5-0106.Q8_0.gguf")
14
- MODEL_TYPE = os.getenv("MODEL_TYPE", "mistral") # OpenChat 3.5 is Mistral-compatible
15
- CTX_LEN = int(os.getenv("CTX_LEN", "8192")) # Use full 8K context
16
 
17
  # === Load Model ===
18
  model = AutoModelForCausalLM.from_pretrained(
 
11
  # === High-Precision GGUF Model Configuration ===
12
  MODEL_REPO = os.getenv("MODEL_REPO", "TheBloke/openchat-3.5-0106-GGUF")
13
  MODEL_FILE = os.getenv("MODEL_FILE", "openchat-3.5-0106.Q8_0.gguf")
14
+ MODEL_TYPE = os.getenv("MODEL_TYPE", "mistral") # OpenChat 3.5 is Mistral-compatible
15
+ CTX_LEN = int(os.getenv("CTX_LEN", "8192")) # Use full 8K context
16
 
17
  # === Load Model ===
18
  model = AutoModelForCausalLM.from_pretrained(