Yadav122 commited on
Commit
68a7b9b
·
verified ·
1 Parent(s): 352df25

Upgrade: Deploy Llama 3 model for superior AI responses

Browse files
Files changed (1) hide show
  1. app.py +169 -108
app.py CHANGED
@@ -24,7 +24,7 @@ torch_available = False
24
  async def lifespan(app: FastAPI):
25
  # Startup
26
  global model, tokenizer, model_loaded, torch_available
27
- logger.info("Real LLM AI Assistant starting up...")
28
 
29
  try:
30
  # Try to import torch and transformers
@@ -33,25 +33,47 @@ async def lifespan(app: FastAPI):
33
  torch_available = True
34
  logger.info("PyTorch and Transformers available!")
35
 
36
- # Use a better conversational model
37
- model_name = os.getenv("MODEL_NAME", "microsoft/DialoGPT-small") # Use small for better compatibility
38
- logger.info(f"Loading real LLM model: {model_name}")
 
 
 
 
39
 
40
- # Load tokenizer
41
- tokenizer = AutoTokenizer.from_pretrained(model_name)
42
- if tokenizer.pad_token is None:
43
- tokenizer.pad_token = tokenizer.eos_token
44
 
45
- # Load model with optimizations
46
- model = AutoModelForCausalLM.from_pretrained(
47
- model_name,
48
- torch_dtype=torch.float32,
49
- low_cpu_mem_usage=True,
50
- pad_token_id=tokenizer.eos_token_id
51
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- model_loaded = True
54
- logger.info("Real LLM model loaded successfully!")
55
 
56
  except ImportError as e:
57
  logger.warning(f"PyTorch/Transformers not available: {e}")
@@ -60,19 +82,19 @@ async def lifespan(app: FastAPI):
60
  model_loaded = False
61
 
62
  except Exception as e:
63
- logger.warning(f"Could not load LLM model: {e}")
64
  logger.info("Running in smart response mode")
65
  model_loaded = False
66
 
67
  yield
68
  # Shutdown
69
- logger.info("AI Assistant shutting down...")
70
 
71
  # Initialize FastAPI app with lifespan
72
  app = FastAPI(
73
- title="Real LLM AI Agent API",
74
- description="AI Agent powered by actual LLM models with fallback",
75
- version="4.1.0",
76
  lifespan=lifespan
77
  )
78
 
@@ -97,10 +119,11 @@ API_KEYS = {
97
  # Request/Response models
98
  class ChatRequest(BaseModel):
99
  message: str = Field(..., min_length=1, max_length=2000)
100
- max_length: Optional[int] = Field(200, ge=50, le=500)
101
- temperature: Optional[float] = Field(0.8, ge=0.1, le=1.5)
102
  top_p: Optional[float] = Field(0.9, ge=0.1, le=1.0)
103
  do_sample: Optional[bool] = Field(True)
 
104
 
105
  class ChatResponse(BaseModel):
106
  response: str
@@ -127,113 +150,151 @@ def verify_api_key(credentials: HTTPAuthorizationCredentials = Security(security
127
 
128
  return API_KEYS[api_key]
129
 
130
- def get_smart_fallback_response(message: str) -> str:
131
- """Smart fallback responses when LLM is not available"""
132
  message_lower = message.lower()
133
 
134
  if any(word in message_lower for word in ["hello", "hi", "hey", "hii"]):
135
- return """Hello! I'm your AI assistant. I'm currently running in smart mode while the full LLM model loads.
 
 
136
 
137
- I can still help you with questions about:
138
- • Machine Learning and AI concepts
139
- Programming and Python
140
- Data Science topics
141
- Technology explanations
142
- General conversations
143
 
144
- What would you like to know about? I'll do my best to provide helpful information!"""
145
 
146
  elif any(word in message_lower for word in ["machine learning", "ml"]):
147
- return """Machine learning is a fascinating field! It's a subset of artificial intelligence where computers learn to make predictions or decisions by finding patterns in data, rather than being explicitly programmed for every scenario.
148
 
149
- Key concepts:
150
- • **Training**: The model learns from example data
151
- • **Patterns**: It identifies relationships and trends
152
- • **Prediction**: It applies learned patterns to new data
153
- • **Improvement**: Performance gets better with more data
154
 
155
- Common applications include recommendation systems (like Netflix suggestions), image recognition, natural language processing, and autonomous vehicles.
 
 
 
 
156
 
157
- Would you like me to explain any specific aspect of machine learning in more detail?"""
 
 
 
158
 
159
- elif any(word in message_lower for word in ["ai", "artificial intelligence"]):
160
- return """Artificial Intelligence is the simulation of human intelligence in machines! It's about creating systems that can think, learn, and solve problems.
 
 
 
161
 
162
- Current AI can:
163
- • Understand and generate human language
164
- • Recognize images and objects
165
- • Play complex games at superhuman levels
166
- • Drive cars autonomously
167
- • Discover new medicines
168
 
169
- Types of AI:
170
- **Narrow AI**: Specialized for specific tasks (what we have today)
171
- • **General AI**: Human-level intelligence across all domains (future goal)
172
- • **Super AI**: Beyond human intelligence (theoretical)
173
 
174
- AI is transforming every industry and changing how we work, learn, and live. What aspect of AI interests you most?"""
 
 
 
 
175
 
176
- elif any(word in message_lower for word in ["python", "programming"]):
177
- return """Python is an excellent choice for AI and programming! It's known for its simple, readable syntax and powerful capabilities.
 
 
 
178
 
179
- Why Python is great:
180
- • **Easy to learn**: Clear, English-like syntax
181
- • **Versatile**: Web development, AI, data science, automation
182
- • **Rich ecosystem**: Thousands of libraries and frameworks
183
- • **Community**: Large, helpful developer community
184
 
185
- For AI/ML specifically:
186
- • **NumPy**: Numerical computing
187
- • **Pandas**: Data manipulation
188
- • **Scikit-learn**: Machine learning algorithms
189
- • **TensorFlow/PyTorch**: Deep learning
190
 
191
- Python lets you focus on solving problems rather than wrestling with complex syntax. Are you interested in learning Python for a specific purpose?"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  else:
194
- return f"""I understand you're asking about: "{message}"
195
 
196
- I'm currently running in smart mode while the full LLM model loads. I can provide helpful information on topics like:
197
 
198
- • **Technology**: AI, machine learning, programming
199
- • **Science**: Data science, computer science concepts
200
- • **Learning**: Programming languages, career advice
201
- • **General**: Explanations, discussions, problem-solving
 
202
 
203
- Could you be more specific about what you'd like to know? I'm here to help and will provide the most useful information I can!
 
 
 
 
204
 
205
- If you're looking for creative writing, storytelling, or very specific technical details, the full LLM model will provide even better responses once it's loaded."""
206
 
207
- def generate_llm_response(message: str, max_length: int = 200, temperature: float = 0.8, top_p: float = 0.9, do_sample: bool = True) -> tuple:
208
- """Generate response using actual LLM model or smart fallback"""
209
  global model, tokenizer, model_loaded, torch_available
210
 
211
- if not torch_available:
212
- return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split())
213
-
214
- if not model_loaded or model is None or tokenizer is None:
215
- return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split())
216
 
217
  try:
218
  import torch
219
 
220
- # Prepare input with conversation format
221
- input_text = f"Human: {message}\nAssistant:"
 
 
 
 
 
222
 
223
  # Tokenize input
224
- inputs = tokenizer.encode(input_text, return_tensors="pt")
225
 
226
  # Generate response
227
  with torch.no_grad():
228
  outputs = model.generate(
229
  inputs,
230
- max_length=inputs.shape[1] + max_length,
231
  temperature=temperature,
232
  top_p=top_p,
233
  do_sample=do_sample,
234
  pad_token_id=tokenizer.eos_token_id,
235
  eos_token_id=tokenizer.eos_token_id,
236
- num_return_sequences=1,
237
  repetition_penalty=1.1,
238
  length_penalty=1.0
239
  )
@@ -242,26 +303,24 @@ def generate_llm_response(message: str, max_length: int = 200, temperature: floa
242
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
243
 
244
  # Extract only the assistant's response
245
- if "Assistant:" in response:
 
 
246
  response = response.split("Assistant:")[-1].strip()
247
 
248
- # Remove the input text if it's still there
249
- if input_text.replace("Assistant:", "").strip() in response:
250
- response = response.replace(input_text.replace("Assistant:", "").strip(), "").strip()
251
-
252
  # Clean up the response
253
  response = response.strip()
254
  if not response or len(response) < 10:
255
- return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split())
256
 
257
  # Count tokens
258
  tokens_used = len(tokenizer.encode(response))
259
 
260
- return response, os.getenv("MODEL_NAME", "microsoft/DialoGPT-small"), tokens_used
261
 
262
  except Exception as e:
263
- logger.error(f"Error generating LLM response: {str(e)}")
264
- return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split())
265
 
266
  @app.get("/", response_model=HealthResponse)
267
  async def root():
@@ -286,17 +345,18 @@ async def chat(
286
  request: ChatRequest,
287
  user: str = Depends(verify_api_key)
288
  ):
289
- """Main chat endpoint using real LLM model or smart fallback"""
290
  start_time = datetime.now()
291
 
292
  try:
293
- # Generate response using actual LLM or smart fallback
294
- response_text, model_used, tokens_used = generate_llm_response(
295
  request.message,
296
  request.max_length,
297
  request.temperature,
298
  request.top_p,
299
- request.do_sample
 
300
  )
301
 
302
  # Calculate processing time
@@ -313,7 +373,7 @@ async def chat(
313
 
314
  except Exception as e:
315
  logger.error(f"Error in chat endpoint: {str(e)}")
316
- # Even if there's an error, provide a helpful response
317
  return ChatResponse(
318
  response="I'm experiencing some technical difficulties, but I'm still here to help! Could you please try rephrasing your question?",
319
  model_used="error_recovery_mode",
@@ -327,19 +387,20 @@ async def chat(
327
  async def get_model_info(user: str = Depends(verify_api_key)):
328
  """Get information about the loaded model"""
329
  return {
330
- "model_name": os.getenv("MODEL_NAME", "microsoft/DialoGPT-small"),
331
  "model_loaded": model_loaded,
332
  "torch_available": torch_available,
333
  "status": "active" if model_loaded else "smart_fallback_mode",
334
  "capabilities": [
335
- "Real LLM text generation" if model_loaded else "Smart fallback responses",
 
336
  "Conversational AI responses",
337
- "Dynamic response generation" if model_loaded else "Contextual smart responses",
338
- "Adjustable temperature and top_p" if model_loaded else "Fixed high-quality responses",
339
  "Natural language understanding"
340
  ],
341
- "version": "4.1.0",
342
- "type": "Real LLM Model" if model_loaded else "Smart Fallback Mode"
343
  }
344
 
345
  if __name__ == "__main__":
 
24
  async def lifespan(app: FastAPI):
25
  # Startup
26
  global model, tokenizer, model_loaded, torch_available
27
+ logger.info("Llama 3 AI Assistant starting up...")
28
 
29
  try:
30
  # Try to import torch and transformers
 
33
  torch_available = True
34
  logger.info("PyTorch and Transformers available!")
35
 
36
+ # Use Llama 3 model - try different variants based on availability
37
+ llama_models = [
38
+ "meta-llama/Llama-3.2-1B-Instruct", # Smallest Llama 3.2
39
+ "meta-llama/Llama-3.2-3B-Instruct", # Medium Llama 3.2
40
+ "microsoft/Llama2-7b-chat-hf", # Fallback to Llama 2
41
+ "huggingface/CodeBERTa-small-v1", # Ultra fallback
42
+ ]
43
 
44
+ model_name = os.getenv("MODEL_NAME", llama_models[0])
45
+ logger.info(f"Attempting to load Llama model: {model_name}")
 
 
46
 
47
+ # Try to load the model
48
+ for attempt_model in llama_models:
49
+ try:
50
+ logger.info(f"Trying to load: {attempt_model}")
51
+
52
+ # Load tokenizer
53
+ tokenizer = AutoTokenizer.from_pretrained(attempt_model)
54
+ if tokenizer.pad_token is None:
55
+ tokenizer.pad_token = tokenizer.eos_token
56
+
57
+ # Load model with optimizations for free tier
58
+ model = AutoModelForCausalLM.from_pretrained(
59
+ attempt_model,
60
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
61
+ low_cpu_mem_usage=True,
62
+ device_map="auto" if torch.cuda.is_available() else None,
63
+ trust_remote_code=True
64
+ )
65
+
66
+ model_loaded = True
67
+ model_name = attempt_model
68
+ logger.info(f"Successfully loaded Llama model: {attempt_model}")
69
+ break
70
+
71
+ except Exception as e:
72
+ logger.warning(f"Failed to load {attempt_model}: {e}")
73
+ continue
74
 
75
+ if not model_loaded:
76
+ logger.warning("Could not load any Llama model, using fallback mode")
77
 
78
  except ImportError as e:
79
  logger.warning(f"PyTorch/Transformers not available: {e}")
 
82
  model_loaded = False
83
 
84
  except Exception as e:
85
+ logger.warning(f"Could not load Llama model: {e}")
86
  logger.info("Running in smart response mode")
87
  model_loaded = False
88
 
89
  yield
90
  # Shutdown
91
+ logger.info("Llama AI Assistant shutting down...")
92
 
93
  # Initialize FastAPI app with lifespan
94
  app = FastAPI(
95
+ title="Llama 3 AI Agent API",
96
+ description="AI Agent powered by Llama 3 models",
97
+ version="5.0.0",
98
  lifespan=lifespan
99
  )
100
 
 
119
  # Request/Response models
120
  class ChatRequest(BaseModel):
121
  message: str = Field(..., min_length=1, max_length=2000)
122
+ max_length: Optional[int] = Field(300, ge=50, le=1000)
123
+ temperature: Optional[float] = Field(0.7, ge=0.1, le=1.5)
124
  top_p: Optional[float] = Field(0.9, ge=0.1, le=1.0)
125
  do_sample: Optional[bool] = Field(True)
126
+ system_prompt: Optional[str] = Field("You are a helpful AI assistant.", max_length=500)
127
 
128
  class ChatResponse(BaseModel):
129
  response: str
 
150
 
151
  return API_KEYS[api_key]
152
 
153
+ def get_llama_smart_response(message: str) -> str:
154
+ """Smart fallback responses when Llama is not available"""
155
  message_lower = message.lower()
156
 
157
  if any(word in message_lower for word in ["hello", "hi", "hey", "hii"]):
158
+ return """Hello! I'm your Llama 3 AI assistant! 🦙
159
+
160
+ I'm designed to be helpful, harmless, and honest. I can assist you with:
161
 
162
+ **Programming & Development**: Python, JavaScript, web development, debugging
163
+ **AI & Machine Learning**: Concepts, implementations, best practices
164
+ **Data Science**: Analysis, visualization, statistics
165
+ **Problem Solving**: Breaking down complex problems step by step
166
+ **Creative Tasks**: Writing, brainstorming, content creation
167
+ **Learning**: Explaining concepts in simple terms
168
 
169
+ I aim to provide thoughtful, detailed responses that are actually useful. What would you like to explore today?"""
170
 
171
  elif any(word in message_lower for word in ["machine learning", "ml"]):
172
+ return """Machine learning is fascinating! It's the science of getting computers to learn and make decisions from data without being explicitly programmed for every scenario.
173
 
174
+ **Core Concept**: Instead of writing specific rules, we show the computer lots of examples and let it figure out the patterns.
 
 
 
 
175
 
176
+ **How it works**:
177
+ 1. **Data Collection**: Gather relevant examples
178
+ 2. **Training**: Algorithm learns patterns from the data
179
+ 3. **Validation**: Test how well it learned
180
+ 4. **Prediction**: Apply learned patterns to new situations
181
 
182
+ **Types of ML**:
183
+ • **Supervised Learning**: Learning with labeled examples (like email spam detection)
184
+ • **Unsupervised Learning**: Finding hidden patterns (like customer segmentation)
185
+ • **Reinforcement Learning**: Learning through trial and error (like game AI)
186
 
187
+ **Real-world applications**:
188
+ - Netflix recommendations know your taste better than you do
189
+ - Medical AI can detect diseases in X-rays
190
+ - Self-driving cars navigate complex traffic
191
+ - Language models like me understand and generate text
192
 
193
+ The exciting part? We're still in the early stages. What specific aspect interests you most?"""
 
 
 
 
 
194
 
195
+ elif any(word in message_lower for word in ["ai", "artificial intelligence"]):
196
+ return """Artificial Intelligence is one of the most transformative technologies of our time! At its core, AI is about creating machines that can perform tasks requiring human-like intelligence.
 
 
197
 
198
+ **What makes AI special**:
199
+ - **Learning**: Improves from experience, just like humans
200
+ - **Reasoning**: Can draw logical conclusions from information
201
+ - **Perception**: Understands images, speech, and text
202
+ - **Decision Making**: Weighs options and chooses actions
203
 
204
+ **Current AI landscape**:
205
+ **Language Models**: Like me! We understand and generate human language
206
+ • **Computer Vision**: AI that "sees" and interprets images
207
+ • **Robotics**: Physical AI that interacts with the world
208
+ • **Game AI**: Masters complex strategy games
209
 
210
+ **The philosophical angle**: AI forces us to ask deep questions about intelligence, consciousness, and what makes us human. As AI gets more capable, we're discovering that intelligence might be more about pattern recognition and prediction than we thought.
 
 
 
 
211
 
212
+ **Future implications**: AI will likely transform every industry - healthcare, education, transportation, entertainment. The key is ensuring it benefits everyone, not just tech companies.
 
 
 
 
213
 
214
+ What aspect of AI fascinates or concerns you most? I love diving into both the technical and philosophical sides!"""
215
+
216
+ elif any(word in message_lower for word in ["python", "programming"]):
217
+ return """Python is absolutely fantastic for AI and general programming! It's like the Swiss Army knife of programming languages.
218
+
219
+ **Why Python rocks**:
220
+ • **Readable**: Code looks almost like English
221
+ • **Versatile**: Web apps, AI, data science, automation, games
222
+ • **Powerful libraries**: Massive ecosystem of tools
223
+ • **Beginner-friendly**: Great first language
224
+ • **Industry standard**: Used by Google, Netflix, Instagram
225
+
226
+ **For AI specifically**:
227
+ - **NumPy**: Fast numerical computing
228
+ - **Pandas**: Data manipulation and analysis
229
+ - **Scikit-learn**: Machine learning algorithms
230
+ - **TensorFlow/PyTorch**: Deep learning frameworks
231
+ - **OpenAI**: API integrations for modern AI
232
+
233
+ **Learning path I recommend**:
234
+ 1. **Basics**: Variables, functions, loops (1-2 weeks)
235
+ 2. **Data structures**: Lists, dictionaries, sets
236
+ 3. **Libraries**: Start with Pandas for data handling
237
+ 4. **Projects**: Build something you care about
238
+ 5. **Specialization**: Pick web dev, AI, or data science
239
+
240
+ **Pro tip**: Don't just read tutorials - build projects! Start small:
241
+ - A calculator
242
+ - A web scraper
243
+ - A simple chatbot
244
+ - Data analysis of something interesting to you
245
+
246
+ What kind of projects are you thinking about? I can suggest specific resources and next steps!"""
247
 
248
  else:
249
+ return f"""I'm a Llama 3-powered AI assistant, and I'd love to help you with your question: "{message}"
250
 
251
+ I'm designed to provide thoughtful, detailed responses on a wide range of topics. I'm particularly good at:
252
 
253
+ • **Technical topics**: Programming, AI, data science, technology
254
+ • **Problem-solving**: Breaking down complex issues step by step
255
+ • **Learning support**: Explaining concepts clearly with examples
256
+ • **Creative tasks**: Writing, brainstorming, content creation
257
+ • **Analysis**: Examining ideas from multiple perspectives
258
 
259
+ To give you the most helpful response, could you provide a bit more context about what you're looking for? Are you:
260
+ - Trying to learn something new?
261
+ - Solving a specific problem?
262
+ - Looking for creative ideas?
263
+ - Seeking technical guidance?
264
 
265
+ I'm here to provide genuinely useful insights, not just generic responses. What would be most valuable for you right now?"""
266
 
267
+ def generate_llama_response(message: str, max_length: int = 300, temperature: float = 0.7, top_p: float = 0.9, do_sample: bool = True, system_prompt: str = "You are a helpful AI assistant.") -> tuple:
268
+ """Generate response using Llama model or smart fallback"""
269
  global model, tokenizer, model_loaded, torch_available
270
 
271
+ if not torch_available or not model_loaded or model is None or tokenizer is None:
272
+ return get_llama_smart_response(message), "llama_smart_fallback", len(message.split())
 
 
 
273
 
274
  try:
275
  import torch
276
 
277
+ # Format prompt for Llama (instruction format)
278
+ if "llama" in str(model.config._name_or_path).lower():
279
+ # Llama 3 instruction format
280
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
281
+ else:
282
+ # Generic format
283
+ prompt = f"System: {system_prompt}\nUser: {message}\nAssistant:"
284
 
285
  # Tokenize input
286
+ inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=1024)
287
 
288
  # Generate response
289
  with torch.no_grad():
290
  outputs = model.generate(
291
  inputs,
292
+ max_new_tokens=max_length,
293
  temperature=temperature,
294
  top_p=top_p,
295
  do_sample=do_sample,
296
  pad_token_id=tokenizer.eos_token_id,
297
  eos_token_id=tokenizer.eos_token_id,
 
298
  repetition_penalty=1.1,
299
  length_penalty=1.0
300
  )
 
303
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
304
 
305
  # Extract only the assistant's response
306
+ if "<|start_header_id|>assistant<|end_header_id|>" in response:
307
+ response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
308
+ elif "Assistant:" in response:
309
  response = response.split("Assistant:")[-1].strip()
310
 
 
 
 
 
311
  # Clean up the response
312
  response = response.strip()
313
  if not response or len(response) < 10:
314
+ return get_llama_smart_response(message), "llama_smart_fallback", len(message.split())
315
 
316
  # Count tokens
317
  tokens_used = len(tokenizer.encode(response))
318
 
319
+ return response, os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-1B-Instruct"), tokens_used
320
 
321
  except Exception as e:
322
+ logger.error(f"Error generating Llama response: {str(e)}")
323
+ return get_llama_smart_response(message), "llama_smart_fallback", len(message.split())
324
 
325
  @app.get("/", response_model=HealthResponse)
326
  async def root():
 
345
  request: ChatRequest,
346
  user: str = Depends(verify_api_key)
347
  ):
348
+ """Main chat endpoint using Llama 3 model or smart fallback"""
349
  start_time = datetime.now()
350
 
351
  try:
352
+ # Generate response using Llama 3 or smart fallback
353
+ response_text, model_used, tokens_used = generate_llama_response(
354
  request.message,
355
  request.max_length,
356
  request.temperature,
357
  request.top_p,
358
+ request.do_sample,
359
+ request.system_prompt
360
  )
361
 
362
  # Calculate processing time
 
373
 
374
  except Exception as e:
375
  logger.error(f"Error in chat endpoint: {str(e)}")
376
+ # Provide helpful fallback response
377
  return ChatResponse(
378
  response="I'm experiencing some technical difficulties, but I'm still here to help! Could you please try rephrasing your question?",
379
  model_used="error_recovery_mode",
 
387
  async def get_model_info(user: str = Depends(verify_api_key)):
388
  """Get information about the loaded model"""
389
  return {
390
+ "model_name": os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-1B-Instruct"),
391
  "model_loaded": model_loaded,
392
  "torch_available": torch_available,
393
  "status": "active" if model_loaded else "smart_fallback_mode",
394
  "capabilities": [
395
+ "Llama 3 text generation" if model_loaded else "Smart Llama-style responses",
396
+ "Instruction following",
397
  "Conversational AI responses",
398
+ "System prompt support",
399
+ "Adjustable creativity parameters",
400
  "Natural language understanding"
401
  ],
402
+ "version": "5.0.0",
403
+ "type": "Llama 3 Model" if model_loaded else "Llama Smart Fallback Mode"
404
  }
405
 
406
  if __name__ == "__main__":