Reality123b commited on
Commit
3003014
·
verified ·
1 Parent(s): 498ae97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -158
app.py CHANGED
@@ -1,105 +1,7 @@
1
- # from fastapi import FastAPI, HTTPException
2
- # from pydantic import BaseModel
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # import torch
5
- # from huggingface_hub import snapshot_download
6
- # from safetensors.torch import load_file
7
-
8
- # class ModelInput(BaseModel):
9
- # prompt: str
10
- # max_new_tokens: int = 50
11
-
12
- # app = FastAPI()
13
-
14
- # # Define model paths
15
- # base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
16
- # adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
17
-
18
- # try:
19
- # # First load the base model
20
- # print("Loading base model...")
21
- # model = AutoModelForCausalLM.from_pretrained(
22
- # base_model_path,
23
- # torch_dtype=torch.float16,
24
- # trust_remote_code=True,
25
- # device_map="auto"
26
- # )
27
-
28
- # # Load tokenizer from base model
29
- # print("Loading tokenizer...")
30
- # tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
-
32
- # # Download adapter weights
33
- # print("Downloading adapter weights...")
34
- # adapter_path_local = snapshot_download(adapter_path)
35
-
36
- # # Load the safetensors file
37
- # print("Loading adapter weights...")
38
- # state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
39
-
40
- # # Load state dict into model
41
- # model.load_state_dict(state_dict, strict=False)
42
-
43
- # print("Model and adapter loaded successfully!")
44
-
45
- # except Exception as e:
46
- # print(f"Error during model loading: {e}")
47
- # raise
48
-
49
- # def generate_response(model, tokenizer, instruction, max_new_tokens=128):
50
- # """Generate a response from the model based on an instruction."""
51
- # try:
52
- # messages = [{"role": "user", "content": instruction}]
53
- # input_text = tokenizer.apply_chat_template(
54
- # messages, tokenize=False, add_generation_prompt=True
55
- # )
56
-
57
- # inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
58
- # outputs = model.generate(
59
- # inputs,
60
- # max_new_tokens=max_new_tokens,
61
- # temperature=0.2,
62
- # top_p=0.9,
63
- # do_sample=True,
64
- # )
65
-
66
- # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
67
- # return response
68
-
69
- # except Exception as e:
70
- # raise ValueError(f"Error generating response: {e}")
71
-
72
- # @app.post("/generate")
73
- # async def generate_text(input: ModelInput):
74
- # try:
75
- # response = generate_response(
76
- # model=model,
77
- # tokenizer=tokenizer,
78
- # instruction=input.prompt,
79
- # max_new_tokens=input.max_new_tokens
80
- # )
81
- # return {"generated_text": response}
82
-
83
- # except Exception as e:
84
- # raise HTTPException(status_code=500, detail=str(e))
85
-
86
- # @app.get("/")
87
- # async def root():
88
- # return {"message": "Welcome to the Model API!"}
89
-
90
-
91
-
92
-
93
-
94
-
95
- # //////////////////////////////////////////
96
-
97
  from fastapi import FastAPI, HTTPException
98
  from pydantic import BaseModel
99
- from transformers import AutoModelForCausalLM, AutoTokenizer
100
  import torch
101
- from huggingface_hub import snapshot_download
102
- from safetensors.torch import load_file
103
 
104
  class ModelInput(BaseModel):
105
  prompt: str
@@ -107,62 +9,21 @@ class ModelInput(BaseModel):
107
 
108
  app = FastAPI()
109
 
110
- # Define model paths
111
- base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
112
- adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
113
-
114
- try:
115
- # Load the base model
116
- print("Loading base model...")
117
- model = AutoModelForCausalLM.from_pretrained(
118
- base_model_path,
119
- torch_dtype=torch.float16,
120
- trust_remote_code=True,
121
- device_map="auto"
122
- )
123
-
124
- # Load tokenizer
125
- print("Loading tokenizer...")
126
- tokenizer = AutoTokenizer.from_pretrained(base_model_path)
127
-
128
- # Download adapter weights
129
- print("Downloading adapter weights...")
130
- adapter_path_local = snapshot_download(repo_id=adapter_path)
131
-
132
- # Load the safetensors file
133
- print("Loading adapter weights...")
134
- adapter_file = f"{adapter_path_local}/adapter_model.safetensors"
135
- state_dict = load_file(adapter_file)
136
-
137
- # Load state dict into model
138
- print("Applying adapter weights...")
139
- model.load_state_dict(state_dict, strict=False)
140
 
141
- print("Model and adapter loaded successfully!")
 
142
 
143
- except Exception as e:
144
- print(f"Error during model loading: {e}")
145
- raise
146
-
147
- def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
148
- """Generate a response from the model based on an instruction."""
149
  try:
150
- # Format input for the model
151
- inputs = tokenizer.encode(instruction, return_tensors="pt").to(model.device)
152
-
153
- # Generate response
154
- outputs = model.generate(
155
- inputs,
156
- max_new_tokens=max_new_tokens,
157
- temperature=0.7,
158
- top_p=0.9,
159
- do_sample=True,
160
- )
161
-
162
- # Decode and return the output
163
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
164
- return response
165
-
166
  except Exception as e:
167
  raise ValueError(f"Error generating response: {e}")
168
 
@@ -170,17 +31,13 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
170
  async def generate_text(input: ModelInput):
171
  try:
172
  response = generate_response(
173
- model=model,
174
- tokenizer=tokenizer,
175
- instruction=input.prompt,
176
  max_new_tokens=input.max_new_tokens
177
  )
178
  return {"generated_text": response}
179
-
180
  except Exception as e:
181
  raise HTTPException(status_code=500, detail=str(e))
182
 
183
  @app.get("/")
184
  async def root():
185
- return {"message": "Welcome to the Model API!"}
186
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import pipeline, TextStreamer
4
  import torch
 
 
5
 
6
  class ModelInput(BaseModel):
7
  prompt: str
 
9
 
10
  app = FastAPI()
11
 
12
+ # Initialize text generation pipeline
13
+ generator = pipeline(
14
+ "text-generation",
15
+ model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
16
+ device=1 # Use CPU (change to device=0 for GPU)
17
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Create text streamer
20
+ streamer = TextStreamer(generator.tokenizer, skip_prompt=True)
21
 
22
+ def generate_response(prompt: str, max_new_tokens: int = 2048):
 
 
 
 
 
23
  try:
24
+ messages = [{"role": "user", "content": prompt}]
25
+ output = generator(messages, max_new_tokens=max_new_tokens, do_sample=False, streamer=streamer)
26
+ return output[0]["generated_text"][-1]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  except Exception as e:
28
  raise ValueError(f"Error generating response: {e}")
29
 
 
31
  async def generate_text(input: ModelInput):
32
  try:
33
  response = generate_response(
34
+ prompt=input.prompt,
 
 
35
  max_new_tokens=input.max_new_tokens
36
  )
37
  return {"generated_text": response}
 
38
  except Exception as e:
39
  raise HTTPException(status_code=500, detail=str(e))
40
 
41
  @app.get("/")
42
  async def root():
43
+ return {"message": "Welcome to the Streaming Model API!"}