Reality123b commited on
Commit
3b63b4a
·
verified ·
1 Parent(s): da78b12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -125
app.py CHANGED
@@ -1,99 +1,3 @@
1
- # from fastapi import FastAPI, HTTPException
2
- # from pydantic import BaseModel
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # import torch
5
- # from huggingface_hub import snapshot_download
6
- # from safetensors.torch import load_file
7
-
8
- # class ModelInput(BaseModel):
9
- # prompt: str
10
- # max_new_tokens: int = 50
11
-
12
- # app = FastAPI()
13
-
14
- # # Define model paths
15
- # base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
16
- # adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
17
-
18
- # try:
19
- # # First load the base model
20
- # print("Loading base model...")
21
- # model = AutoModelForCausalLM.from_pretrained(
22
- # base_model_path,
23
- # torch_dtype=torch.float16,
24
- # trust_remote_code=True,
25
- # device_map="auto"
26
- # )
27
-
28
- # # Load tokenizer from base model
29
- # print("Loading tokenizer...")
30
- # tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
-
32
- # # Download adapter weights
33
- # print("Downloading adapter weights...")
34
- # adapter_path_local = snapshot_download(adapter_path)
35
-
36
- # # Load the safetensors file
37
- # print("Loading adapter weights...")
38
- # state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
39
-
40
- # # Load state dict into model
41
- # model.load_state_dict(state_dict, strict=False)
42
-
43
- # print("Model and adapter loaded successfully!")
44
-
45
- # except Exception as e:
46
- # print(f"Error during model loading: {e}")
47
- # raise
48
-
49
- # def generate_response(model, tokenizer, instruction, max_new_tokens=128):
50
- # """Generate a response from the model based on an instruction."""
51
- # try:
52
- # messages = [{"role": "user", "content": instruction}]
53
- # input_text = tokenizer.apply_chat_template(
54
- # messages, tokenize=False, add_generation_prompt=True
55
- # )
56
-
57
- # inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
58
- # outputs = model.generate(
59
- # inputs,
60
- # max_new_tokens=max_new_tokens,
61
- # temperature=0.2,
62
- # top_p=0.9,
63
- # do_sample=True,
64
- # )
65
-
66
- # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
67
- # return response
68
-
69
- # except Exception as e:
70
- # raise ValueError(f"Error generating response: {e}")
71
-
72
- # @app.post("/generate")
73
- # async def generate_text(input: ModelInput):
74
- # try:
75
- # response = generate_response(
76
- # model=model,
77
- # tokenizer=tokenizer,
78
- # instruction=input.prompt,
79
- # max_new_tokens=input.max_new_tokens
80
- # )
81
- # return {"generated_text": response}
82
-
83
- # except Exception as e:
84
- # raise HTTPException(status_code=500, detail=str(e))
85
-
86
- # @app.get("/")
87
- # async def root():
88
- # return {"message": "Welcome to the Model API!"}
89
-
90
-
91
-
92
-
93
-
94
-
95
- # //////////////////////////////////////////
96
-
97
  from fastapi import FastAPI, HTTPException
98
  from pydantic import BaseModel
99
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -111,38 +15,38 @@ app = FastAPI()
111
  base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
112
  adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
113
 
114
- try:
115
- # Load the base model
116
- print("Loading base model...")
117
- model = AutoModelForCausalLM.from_pretrained(
118
- base_model_path,
119
- torch_dtype=torch.float16,
120
- trust_remote_code=True,
121
- device_map="auto"
122
- )
 
 
 
 
123
 
124
- # Load tokenizer
125
- print("Loading tokenizer...")
126
- tokenizer = AutoTokenizer.from_pretrained(base_model_path)
127
 
128
- # Download adapter weights
129
- print("Downloading adapter weights...")
130
- adapter_path_local = snapshot_download(repo_id=adapter_path)
131
 
132
- # Load the safetensors file
133
- print("Loading adapter weights...")
134
- adapter_file = f"{adapter_path_local}/adapter_model.safetensors"
135
- state_dict = load_file(adapter_file)
136
 
137
- # Load state dict into model
138
- print("Applying adapter weights...")
139
- model.load_state_dict(state_dict, strict=False)
140
 
141
- print("Model and adapter loaded successfully!")
 
 
 
142
 
143
- except Exception as e:
144
- print(f"Error during model loading: {e}")
145
- raise
146
 
147
  def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
148
  """Generate a response from the model based on an instruction."""
@@ -154,7 +58,7 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
154
  truncation=True,
155
  max_length=tokenizer.model_max_length
156
  ).to(model.device)
157
-
158
  # Generate response
159
  outputs = model.generate(
160
  inputs,
@@ -167,13 +71,14 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
167
  # Decode and strip input prompt from response
168
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
169
  generated_text = response[len(instruction):].strip()
170
-
171
  print(f"Instruction: {instruction}") # Debugging line
172
  print(f"Generated Response: {generated_text}") # Debugging line
173
 
174
  return generated_text
175
 
176
  except Exception as e:
 
177
  raise ValueError(f"Error generating response: {e}")
178
 
179
 
@@ -194,4 +99,3 @@ async def generate_text(input: ModelInput):
194
  @app.get("/")
195
  async def root():
196
  return {"message": "Welcome to the Model API!"}
197
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
15
  base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
16
  adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
17
 
18
+ # Load the model and tokenizer
19
+ def load_model_and_tokenizer():
20
+ try:
21
+ print("Loading base model...")
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ base_model_path,
24
+ torch_dtype=torch.float16,
25
+ trust_remote_code=True,
26
+ device_map="auto"
27
+ )
28
+
29
+ print("Loading tokenizer...")
30
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
 
32
+ print("Downloading adapter weights...")
33
+ adapter_path_local = snapshot_download(repo_id=adapter_path)
 
34
 
35
+ print("Loading adapter weights...")
36
+ adapter_file = f"{adapter_path_local}/adapter_model.safetensors"
37
+ state_dict = load_file(adapter_file)
38
 
39
+ print("Applying adapter weights...")
40
+ model.load_state_dict(state_dict, strict=False)
 
 
41
 
42
+ print("Model and adapter loaded successfully!")
 
 
43
 
44
+ return model, tokenizer
45
+ except Exception as e:
46
+ print(f"Error during model loading: {e}")
47
+ raise
48
 
49
+ model, tokenizer = load_model_and_tokenizer()
 
 
50
 
51
  def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
52
  """Generate a response from the model based on an instruction."""
 
58
  truncation=True,
59
  max_length=tokenizer.model_max_length
60
  ).to(model.device)
61
+
62
  # Generate response
63
  outputs = model.generate(
64
  inputs,
 
71
  # Decode and strip input prompt from response
72
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
  generated_text = response[len(instruction):].strip()
74
+
75
  print(f"Instruction: {instruction}") # Debugging line
76
  print(f"Generated Response: {generated_text}") # Debugging line
77
 
78
  return generated_text
79
 
80
  except Exception as e:
81
+ print(f"Error generating response: {e}")
82
  raise ValueError(f"Error generating response: {e}")
83
 
84
 
 
99
  @app.get("/")
100
  async def root():
101
  return {"message": "Welcome to the Model API!"}