Sushwetabm commited on
Commit
6d5a8ce
Β·
1 Parent(s): cf9564b

switched the model to Salesforce/codet5p-220m

Browse files
Files changed (4) hide show
  1. analyzer.py +23 -101
  2. main.py +3 -2
  3. model.py +135 -100
  4. setup.py +101 -85
analyzer.py CHANGED
@@ -211,116 +211,38 @@ logger.addHandler(handler)
211
  def analyze_code(tokenizer, model, language, code):
212
  start_time = time.time()
213
 
214
- messages = [
215
- {
216
- "role": "system",
217
- "content": (
218
- "You are a helpful and expert-level AI code reviewer and bug fixer. "
219
- "Your task is to analyze the given buggy code in the specified programming language, "
220
- "identify bugs (logical, syntax, runtime, etc.), and fix them. "
221
- "Return a JSON object with the following keys:\n\n"
222
- "1. 'bug_analysis': a list of objects, each containing:\n"
223
- " - 'line_number': the line number (approximate if needed)\n"
224
- " - 'error_message': a short name of the bug\n"
225
- " - 'explanation': short explanation of the problem\n"
226
- " - 'fix_suggestion': how to fix it\n"
227
- "2. 'corrected_code': the entire corrected code block.\n\n"
228
- "Respond only with a JSON block, no extra commentary."
229
- )
230
- },
231
- {
232
- "role": "user",
233
- "content": f"πŸ’» Language: {language}\n🐞 Buggy Code:\n```{language.lower()}\n{code.strip()}\n```"
234
- }
235
- ]
236
 
237
  try:
238
- logger.info("πŸ“¦ Tokenizing input...")
239
- inputs = tokenizer.apply_chat_template(
240
- messages,
241
- add_generation_prompt=True,
242
- return_tensors="pt"
243
- ).to(model.device)
244
-
245
- attention_mask = (inputs != tokenizer.pad_token_id).long()
246
-
247
- logger.info("βš™οΈ Starting generation...")
248
- generation_start = time.time()
249
- outputs = model.generate(
250
- inputs,
251
- attention_mask=attention_mask,
252
- max_new_tokens=1024,
253
- do_sample=False,
254
- pad_token_id=tokenizer.eos_token_id,
255
- eos_token_id=tokenizer.eos_token_id
256
- )
257
- generation_time = time.time() - generation_start
258
- logger.info(f"⚑ Generation completed in {generation_time:.2f} seconds")
259
-
260
- logger.info("πŸ“ Decoding response...")
261
- response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
262
-
263
- logger.info(f"πŸ“„ Response length: {len(response)} characters")
264
- logger.info(f"πŸ” First 100 chars: {response[:100]}...")
265
-
266
- # Attempt to parse as JSON
267
- logger.info("πŸ” Attempting to parse JSON...")
268
- cleaned_response = response.strip()
269
- if cleaned_response.startswith('```json'):
270
- cleaned_response = cleaned_response[7:]
271
- elif cleaned_response.startswith('```'):
272
- cleaned_response = cleaned_response[3:]
273
- if cleaned_response.endswith('```'):
274
- cleaned_response = cleaned_response[:-3]
275
-
276
- cleaned_response = cleaned_response.strip()
277
 
278
- json_output = json.loads(cleaned_response)
 
 
279
 
280
- total_time = time.time() - start_time
281
- logger.info(f"βœ… Analysis completed successfully in {total_time:.2f} seconds")
282
-
283
- # Validate and patch missing keys
284
- if not isinstance(json_output, dict):
285
- raise ValueError("Parsed response is not a dictionary")
286
-
287
- if 'bug_analysis' not in json_output:
288
- logger.warning("⚠️ Missing 'bug_analysis' key, adding empty list")
289
- json_output['bug_analysis'] = []
290
-
291
- if 'corrected_code' not in json_output:
292
- logger.warning("⚠️ Missing 'corrected_code' key, adding original code")
293
- json_output['corrected_code'] = code
294
-
295
- return json_output
296
-
297
- except json.JSONDecodeError as e:
298
- logger.error(f"❌ JSON decode error: {e}")
299
- logger.error(f"πŸ“„ Raw response: {repr(response)}")
300
  return {
301
- "bug_analysis": [{
302
- "line_number": 1,
303
- "error_message": "Analysis parsing failed",
304
- "explanation": "The AI model returned a response that couldn't be parsed as JSON",
305
- "fix_suggestion": "Please try again or check the code format"
306
- }],
307
- "corrected_code": code,
308
- "raw_output": response,
309
- "parsing_error": str(e)
310
  }
311
 
312
  except Exception as e:
313
- total_time = time.time() - start_time
314
- logger.error(f"❌ Analysis failed after {total_time:.2f} seconds: {str(e)}")
315
- logger.error(f"πŸ’₯ Exception type: {type(e).__name__}")
316
  return {
317
  "bug_analysis": [{
318
- "line_number": 1,
319
- "error_message": "Analysis failed",
320
- "explanation": f"An error occurred during analysis: {str(e)}",
321
- "fix_suggestion": "Please try again or contact support"
322
  }],
323
- "corrected_code": code,
324
- "error": str(e),
325
- "error_type": type(e).__name__
326
  }
 
211
  def analyze_code(tokenizer, model, language, code):
212
  start_time = time.time()
213
 
214
+ prompt = (
215
+ f"Language: {language}\n"
216
+ f"Task: Fix the following buggy code and explain the bugs.\n"
217
+ f"Input Code:\n{code.strip()}\n\n"
218
+ f"Respond with a JSON like this:\n"
219
+ f"{{\n"
220
+ f" \"bug_analysis\": [{{\"line_number\": X, \"error_message\": \"...\", \"explanation\": \"...\", \"fix_suggestion\": \"...\"}}],\n"
221
+ f" \"corrected_code\": \"...\"\n"
222
+ f"}}"
223
+ )
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  try:
226
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
227
+ output = model.generate(**inputs, max_new_tokens=1024)
228
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
+ # Clean response if needed
231
+ cleaned = response.strip().strip("```json").strip("```").strip()
232
+ json_output = json.loads(cleaned)
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  return {
235
+ "bug_analysis": json_output.get("bug_analysis", []),
236
+ "corrected_code": json_output.get("corrected_code", code)
 
 
 
 
 
 
 
237
  }
238
 
239
  except Exception as e:
 
 
 
240
  return {
241
  "bug_analysis": [{
242
+ "line_number": 0,
243
+ "error_message": "Failed to parse",
244
+ "explanation": str(e),
245
+ "fix_suggestion": "Try simplifying the code."
246
  }],
247
+ "corrected_code": code
 
 
248
  }
main.py CHANGED
@@ -295,7 +295,7 @@ async def analyze(req: AnalyzeRequest):
295
 
296
  try:
297
  tokenizer, model = get_model()
298
- result = analyze_code(req.language, req.code, tokenizer, model)
299
 
300
  if result is None:
301
  raise HTTPException(status_code=500, detail="Model failed to return any response.")
@@ -350,7 +350,8 @@ async def analyze_for_frontend(req: AnalyzeRequest):
350
 
351
  try:
352
  tokenizer, model = get_model()
353
- result = analyze_code(req.language, req.code, tokenizer, model)
 
354
 
355
  if result is None:
356
  return {
 
295
 
296
  try:
297
  tokenizer, model = get_model()
298
+ result = analyze_code(tokenizer, model, req.language, req.code)
299
 
300
  if result is None:
301
  raise HTTPException(status_code=500, detail="Model failed to return any response.")
 
350
 
351
  try:
352
  tokenizer, model = get_model()
353
+ result = analyze_code(tokenizer, model, req.language, req.code)
354
+
355
 
356
  if result is None:
357
  return {
model.py CHANGED
@@ -1,124 +1,159 @@
1
- # model.py - Optimized version
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from functools import lru_cache
5
- import os
6
- import asyncio
7
- from concurrent.futures import ThreadPoolExecutor
8
  import logging
9
 
10
  logger = logging.getLogger(__name__)
11
-
12
- # Global variables to store loaded model
13
  _tokenizer = None
14
  _model = None
15
- _model_loading = False
16
- _model_loaded = False
17
-
18
  @lru_cache(maxsize=1)
19
  def get_model_config():
20
- """Cache model configuration"""
21
  return {
22
- "model_id": "deepseek-ai/deepseek-coder-1.3b-instruct",
23
- "torch_dtype": torch.bfloat16,
24
- "device_map": "auto",
25
- "trust_remote_code": True,
26
- # Add these optimizations
27
- "low_cpu_mem_usage": True,
28
- "use_cache": True,
29
  }
30
 
31
  def load_model_sync():
32
- """Synchronous model loading with optimizations"""
33
  global _tokenizer, _model, _model_loaded
34
-
35
  if _model_loaded:
36
  return _tokenizer, _model
37
-
38
  config = get_model_config()
39
  model_id = config["model_id"]
40
-
41
- logger.info(f"πŸ”§ Loading model {model_id}...")
42
-
43
  try:
44
- # Set cache directory to avoid re-downloading
45
- cache_dir = os.environ.get("TRANSFORMERS_CACHE", "./model_cache")
46
- os.makedirs(cache_dir, exist_ok=True)
47
-
48
- # Load tokenizer first (faster)
49
- logger.info("πŸ“ Loading tokenizer...")
50
- _tokenizer = AutoTokenizer.from_pretrained(
51
- model_id,
52
- trust_remote_code=config["trust_remote_code"],
53
- cache_dir=cache_dir,
54
- use_fast=True, # Use fast tokenizer if available
55
- )
56
-
57
- # Load model with optimizations
58
- logger.info("🧠 Loading model...")
59
- _model = AutoModelForCausalLM.from_pretrained(
60
- model_id,
61
- trust_remote_code=config["trust_remote_code"],
62
- torch_dtype=config["torch_dtype"],
63
- device_map=config["device_map"],
64
- low_cpu_mem_usage=config["low_cpu_mem_usage"],
65
- cache_dir=cache_dir,
66
- offload_folder="offload",
67
- offload_state_dict=True
68
- )
69
-
70
- # Set to evaluation mode
71
  _model.eval()
72
-
73
  _model_loaded = True
74
- logger.info("βœ… Model loaded successfully!")
75
  return _tokenizer, _model
76
-
77
  except Exception as e:
78
  logger.error(f"❌ Failed to load model: {e}")
79
  raise
80
-
81
- async def load_model_async():
82
- """Asynchronous model loading"""
83
- global _model_loading
84
-
85
- if _model_loaded:
86
- return _tokenizer, _model
87
-
88
- if _model_loading:
89
- # Wait for ongoing loading to complete
90
- while _model_loading and not _model_loaded:
91
- await asyncio.sleep(0.1)
92
- return _tokenizer, _model
93
-
94
- _model_loading = True
95
-
96
- try:
97
- # Run model loading in thread pool to avoid blocking
98
- loop = asyncio.get_event_loop()
99
- with ThreadPoolExecutor(max_workers=1) as executor:
100
- tokenizer, model = await loop.run_in_executor(
101
- executor, load_model_sync
102
- )
103
- return tokenizer, model
104
- finally:
105
- _model_loading = False
106
-
107
- def get_model():
108
- """Get the loaded model (for synchronous access)"""
109
- if not _model_loaded:
110
- return load_model_sync()
111
- return _tokenizer, _model
112
-
113
- def is_model_loaded():
114
- """Check if model is loaded"""
115
- return _model_loaded
116
-
117
- def get_model_info():
118
- """Get model information without loading"""
119
- config = get_model_config()
120
- return {
121
- "model_id": config["model_id"],
122
- "loaded": _model_loaded,
123
- "loading": _model_loading,
124
- }
 
1
+ # # model.py - Optimized version
2
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ # import torch
4
+ # from functools import lru_cache
5
+ # import os
6
+ # import asyncio
7
+ # from concurrent.futures import ThreadPoolExecutor
8
+ # import logging
9
+
10
+ # logger = logging.getLogger(__name__)
11
+
12
+ # # Global variables to store loaded model
13
+ # _tokenizer = None
14
+ # _model = None
15
+ # _model_loading = False
16
+ # _model_loaded = False
17
+
18
+ # @lru_cache(maxsize=1)
19
+ # def get_model_config():
20
+ # """Cache model configuration"""
21
+ # return {
22
+ # "model_id": "deepseek-ai/deepseek-coder-1.3b-instruct",
23
+ # "torch_dtype": torch.bfloat16,
24
+ # "device_map": "auto",
25
+ # "trust_remote_code": True,
26
+ # # Add these optimizations
27
+ # "low_cpu_mem_usage": True,
28
+ # "use_cache": True,
29
+ # }
30
+
31
+ # def load_model_sync():
32
+ # """Synchronous model loading with optimizations"""
33
+ # global _tokenizer, _model, _model_loaded
34
+
35
+ # if _model_loaded:
36
+ # return _tokenizer, _model
37
+
38
+ # config = get_model_config()
39
+ # model_id = config["model_id"]
40
+
41
+ # logger.info(f"πŸ”§ Loading model {model_id}...")
42
+
43
+ # try:
44
+ # # Set cache directory to avoid re-downloading
45
+ # cache_dir = os.environ.get("TRANSFORMERS_CACHE", "./model_cache")
46
+ # os.makedirs(cache_dir, exist_ok=True)
47
+
48
+ # # Load tokenizer first (faster)
49
+ # logger.info("πŸ“ Loading tokenizer...")
50
+ # _tokenizer = AutoTokenizer.from_pretrained(
51
+ # model_id,
52
+ # trust_remote_code=config["trust_remote_code"],
53
+ # cache_dir=cache_dir,
54
+ # use_fast=True, # Use fast tokenizer if available
55
+ # )
56
+
57
+ # # Load model with optimizations
58
+ # logger.info("🧠 Loading model...")
59
+ # _model = AutoModelForCausalLM.from_pretrained(
60
+ # model_id,
61
+ # trust_remote_code=config["trust_remote_code"],
62
+ # torch_dtype=config["torch_dtype"],
63
+ # device_map=config["device_map"],
64
+ # low_cpu_mem_usage=config["low_cpu_mem_usage"],
65
+ # cache_dir=cache_dir,
66
+ # offload_folder="offload",
67
+ # offload_state_dict=True
68
+ # )
69
+
70
+ # # Set to evaluation mode
71
+ # _model.eval()
72
+
73
+ # _model_loaded = True
74
+ # logger.info("βœ… Model loaded successfully!")
75
+ # return _tokenizer, _model
76
+
77
+ # except Exception as e:
78
+ # logger.error(f"❌ Failed to load model: {e}")
79
+ # raise
80
+
81
+ # async def load_model_async():
82
+ # """Asynchronous model loading"""
83
+ # global _model_loading
84
+
85
+ # if _model_loaded:
86
+ # return _tokenizer, _model
87
+
88
+ # if _model_loading:
89
+ # # Wait for ongoing loading to complete
90
+ # while _model_loading and not _model_loaded:
91
+ # await asyncio.sleep(0.1)
92
+ # return _tokenizer, _model
93
+
94
+ # _model_loading = True
95
+
96
+ # try:
97
+ # # Run model loading in thread pool to avoid blocking
98
+ # loop = asyncio.get_event_loop()
99
+ # with ThreadPoolExecutor(max_workers=1) as executor:
100
+ # tokenizer, model = await loop.run_in_executor(
101
+ # executor, load_model_sync
102
+ # )
103
+ # return tokenizer, model
104
+ # finally:
105
+ # _model_loading = False
106
+
107
+ # def get_model():
108
+ # """Get the loaded model (for synchronous access)"""
109
+ # if not _model_loaded:
110
+ # return load_model_sync()
111
+ # return _tokenizer, _model
112
+
113
+ # def is_model_loaded():
114
+ # """Check if model is loaded"""
115
+ # return _model_loaded
116
+
117
+ # def get_model_info():
118
+ # """Get model information without loading"""
119
+ # config = get_model_config()
120
+ # return {
121
+ # "model_id": config["model_id"],
122
+ # "loaded": _model_loaded,
123
+ # "loading": _model_loading,
124
+ # }
125
+
126
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
127
  from functools import lru_cache
 
 
 
128
  import logging
129
 
130
  logger = logging.getLogger(__name__)
131
+ _model_loaded = False
 
132
  _tokenizer = None
133
  _model = None
 
 
 
134
  @lru_cache(maxsize=1)
135
  def get_model_config():
 
136
  return {
137
+ "model_id": "Salesforce/codet5p-220m",
138
+ "trust_remote_code": True
 
 
 
 
 
139
  }
140
 
141
  def load_model_sync():
 
142
  global _tokenizer, _model, _model_loaded
143
+
144
  if _model_loaded:
145
  return _tokenizer, _model
146
+
147
  config = get_model_config()
148
  model_id = config["model_id"]
149
+
 
 
150
  try:
151
+ _tokenizer = AutoTokenizer.from_pretrained(model_id)
152
+ _model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  _model.eval()
 
154
  _model_loaded = True
 
155
  return _tokenizer, _model
156
+
157
  except Exception as e:
158
  logger.error(f"❌ Failed to load model: {e}")
159
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
setup.py CHANGED
@@ -1,106 +1,122 @@
1
- #!/usr/bin/env python3
2
- """
3
- Quick setup script to optimize your existing ML microservice.
4
- Run this to set up caching and pre-download the model.
5
- """
6
 
7
- import os
8
- import sys
9
- import logging
10
- from pathlib import Path
11
 
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO)
14
- logger = logging.getLogger(__name__)
15
 
16
- def setup_cache_directory():
17
- """Create cache directory for models"""
18
- cache_dir = Path("./model_cache")
19
- cache_dir.mkdir(exist_ok=True)
20
- logger.info(f"βœ… Cache directory created: {cache_dir.absolute()}")
21
- return cache_dir
22
 
23
- def set_environment_variables():
24
- """Set environment variables for optimization"""
25
- env_vars = {
26
- "TRANSFORMERS_CACHE": "./model_cache",
27
- "HF_HOME": "./model_cache",
28
- "TORCH_HOME": "./model_cache",
29
- "TOKENIZERS_PARALLELISM": "false",
30
- "OMP_NUM_THREADS": "4"
31
- }
32
 
33
- for key, value in env_vars.items():
34
- os.environ[key] = value
35
- logger.info(f"Set {key}={value}")
36
 
37
- def pre_download_model():
38
- """Pre-download the model to cache"""
39
- try:
40
- from transformers import AutoTokenizer, AutoModelForCausalLM
41
 
42
- model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
43
- cache_dir = "./model_cache"
44
 
45
- logger.info(f"πŸ”§ Pre-downloading model: {model_id}")
46
- logger.info("This may take a few minutes on first run...")
47
 
48
- # Download tokenizer
49
- logger.info("πŸ“ Downloading tokenizer...")
50
- tokenizer = AutoTokenizer.from_pretrained(
51
- model_id,
52
- cache_dir=cache_dir,
53
- trust_remote_code=True
54
- )
55
 
56
- # Download model
57
- logger.info("🧠 Downloading model...")
58
- model = AutoModelForCausalLM.from_pretrained(
59
- model_id,
60
- cache_dir=cache_dir,
61
- trust_remote_code=True,
62
- torch_dtype="auto", # Let it choose the best dtype
63
- low_cpu_mem_usage=True,
64
- )
65
 
66
- logger.info("βœ… Model downloaded and cached successfully!")
67
- logger.info(f"πŸ“ Model cached in: {Path(cache_dir).absolute()}")
68
 
69
- # Test that everything works
70
- logger.info("πŸ§ͺ Testing model loading...")
71
- del model, tokenizer # Free memory
72
 
73
- return True
74
 
75
- except Exception as e:
76
- logger.error(f"❌ Failed to pre-download model: {e}")
77
- return False
78
 
79
- def main():
80
- """Main setup function"""
81
- logger.info("πŸš€ Setting up ML Microservice Optimizations")
82
- logger.info("=" * 50)
83
 
84
- # Step 1: Setup cache directory
85
- setup_cache_directory()
86
 
87
- # Step 2: Set environment variables
88
- set_environment_variables()
89
 
90
- # Step 3: Pre-download model
91
- success = pre_download_model()
92
 
93
- if success:
94
- logger.info("\nβœ… Setup completed successfully!")
95
- logger.info("πŸ“‹ Next steps:")
96
- logger.info("1. Replace your main.py with the optimized version")
97
- logger.info("2. Replace your model.py with the optimized version")
98
- logger.info("3. Run: python main.py")
99
- logger.info("\nπŸš€ Your server will now start much faster!")
100
- else:
101
- logger.error("\n❌ Setup failed!")
102
- logger.error("Please check your internet connection and try again.")
103
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  if __name__ == "__main__":
106
- main()
 
 
1
+ # #!/usr/bin/env python3
2
+ # """
3
+ # Quick setup script to optimize your existing ML microservice.
4
+ # Run this to set up caching and pre-download the model.
5
+ # """
6
 
7
+ # import os
8
+ # import sys
9
+ # import logging
10
+ # from pathlib import Path
11
 
12
+ # # Configure logging
13
+ # logging.basicConfig(level=logging.INFO)
14
+ # logger = logging.getLogger(__name__)
15
 
16
+ # def setup_cache_directory():
17
+ # """Create cache directory for models"""
18
+ # cache_dir = Path("./model_cache")
19
+ # cache_dir.mkdir(exist_ok=True)
20
+ # logger.info(f"βœ… Cache directory created: {cache_dir.absolute()}")
21
+ # return cache_dir
22
 
23
+ # def set_environment_variables():
24
+ # """Set environment variables for optimization"""
25
+ # env_vars = {
26
+ # "TRANSFORMERS_CACHE": "./model_cache",
27
+ # "HF_HOME": "./model_cache",
28
+ # "TORCH_HOME": "./model_cache",
29
+ # "TOKENIZERS_PARALLELISM": "false",
30
+ # "OMP_NUM_THREADS": "4"
31
+ # }
32
 
33
+ # for key, value in env_vars.items():
34
+ # os.environ[key] = value
35
+ # logger.info(f"Set {key}={value}")
36
 
37
+ # def pre_download_model():
38
+ # """Pre-download the model to cache"""
39
+ # try:
40
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
41
 
42
+ # model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
43
+ # cache_dir = "./model_cache"
44
 
45
+ # logger.info(f"πŸ”§ Pre-downloading model: {model_id}")
46
+ # logger.info("This may take a few minutes on first run...")
47
 
48
+ # # Download tokenizer
49
+ # logger.info("πŸ“ Downloading tokenizer...")
50
+ # tokenizer = AutoTokenizer.from_pretrained(
51
+ # model_id,
52
+ # cache_dir=cache_dir,
53
+ # trust_remote_code=True
54
+ # )
55
 
56
+ # # Download model
57
+ # logger.info("🧠 Downloading model...")
58
+ # model = AutoModelForCausalLM.from_pretrained(
59
+ # model_id,
60
+ # cache_dir=cache_dir,
61
+ # trust_remote_code=True,
62
+ # torch_dtype="auto", # Let it choose the best dtype
63
+ # low_cpu_mem_usage=True,
64
+ # )
65
 
66
+ # logger.info("βœ… Model downloaded and cached successfully!")
67
+ # logger.info(f"πŸ“ Model cached in: {Path(cache_dir).absolute()}")
68
 
69
+ # # Test that everything works
70
+ # logger.info("πŸ§ͺ Testing model loading...")
71
+ # del model, tokenizer # Free memory
72
 
73
+ # return True
74
 
75
+ # except Exception as e:
76
+ # logger.error(f"❌ Failed to pre-download model: {e}")
77
+ # return False
78
 
79
+ # def main():
80
+ # """Main setup function"""
81
+ # logger.info("πŸš€ Setting up ML Microservice Optimizations")
82
+ # logger.info("=" * 50)
83
 
84
+ # # Step 1: Setup cache directory
85
+ # setup_cache_directory()
86
 
87
+ # # Step 2: Set environment variables
88
+ # set_environment_variables()
89
 
90
+ # # Step 3: Pre-download model
91
+ # success = pre_download_model()
92
 
93
+ # if success:
94
+ # logger.info("\nβœ… Setup completed successfully!")
95
+ # logger.info("πŸ“‹ Next steps:")
96
+ # logger.info("1. Replace your main.py with the optimized version")
97
+ # logger.info("2. Replace your model.py with the optimized version")
98
+ # logger.info("3. Run: python main.py")
99
+ # logger.info("\nπŸš€ Your server will now start much faster!")
100
+ # else:
101
+ # logger.error("\n❌ Setup failed!")
102
+ # logger.error("Please check your internet connection and try again.")
103
+ # sys.exit(1)
104
+
105
+ # if __name__ == "__main__":
106
+ # main()
107
+
108
+ # setup.py
109
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
110
+ import os
111
+
112
+ MODEL_ID = "Salesforce/codet5p-220m"
113
+
114
+ def download_model():
115
+ print(f"[SETUP] Downloading model: {MODEL_ID}")
116
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
117
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
118
+ print("[SETUP] Model and tokenizer downloaded βœ…")
119
 
120
  if __name__ == "__main__":
121
+ os.makedirs("model_cache", exist_ok=True)
122
+ download_model()