TuringsSolutions commited on
Commit
223bfba
·
verified ·
1 Parent(s): dbbd401

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -47
app.py CHANGED
@@ -5,11 +5,9 @@
5
  # This script has been updated to run as a Hugging Face Space.
6
  #
7
  # Key Upgrades from the original script:
8
- # 1. **Hugging Face Model Integration**: Uses the 'google/gemma-3n-E4B' model
9
- # from the Hugging Face Hub for argument extraction.
10
- # 2. **Environment Variable Management**: Securely accesses the
11
- # HUGGING_FACE_HUB_TOKEN using os.environ.get(), which is the standard
12
- # for Hugging Face Spaces.
13
  # 3. **Standard Dependencies**: All dependencies are managed via a
14
  # `requirements.txt` file.
15
  #
@@ -40,40 +38,19 @@ print("✅ Embedding model loaded.")
40
 
41
  # --- Configuration for Hugging Face Model-based Argument Extraction ---
42
  try:
43
- HF_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN')
44
- if HF_TOKEN is None:
45
- raise ValueError("HUGGING_FACE_HUB_TOKEN secret not found.")
46
-
47
  print("⚙️ Loading Hugging Face model for argument extraction...")
48
- # Using the user-specified Gemma model
49
- model_id = "google/gemma-3n-E4B"
50
-
51
- hf_tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
52
-
53
- # --------------------------------------------------------------------------
54
- # ✅ FIX: Manually set the chat template for the Gemma model.
55
- # This is required because the specified model does not have a default
56
- # template set in its tokenizer config on the Hugging Face Hub.
57
- # --------------------------------------------------------------------------
58
- gemma_template = (
59
- "{% for message in messages %}"
60
- "{{'<start_of_turn>' + message['role'] + '\n' + message['content'] + '<end_of_turn>\n'}}"
61
- "{% endfor %}"
62
- "{% if add_generation_prompt %}"
63
- "{{ '<start_of_turn>model\n' }}"
64
- "{% endif %}"
65
- )
66
- hf_tokenizer.chat_template = gemma_template
67
- # --------------------------------------------------------------------------
68
 
 
69
  hf_model = AutoModelForCausalLM.from_pretrained(
70
  model_id,
71
- token=HF_TOKEN,
72
  torch_dtype=torch.bfloat16, # Use bfloat16 for efficiency
73
  device_map="auto" # Automatically use GPU if available
74
  )
75
  USE_HF_LLM = True
76
- print(f"✅ Successfully loaded '{model_id}' model and set chat template.")
 
77
 
78
  except Exception as e:
79
  USE_HF_LLM = False
@@ -289,27 +266,27 @@ def extract_arguments_hf(user_prompt: str, tool: Tool):
289
  Uses a local Hugging Face model to extract structured arguments.
290
  """
291
  system_prompt = f"""
292
- You are an expert at extracting structured data from natural language.
293
- Your task is to analyze the user's prompt and extract the arguments required to call the tool: '{tool.name}'.
294
 
295
- You must adhere to the following JSON schema for the arguments:
296
- {json.dumps(tool.args_schema, indent=2)}
297
 
298
- - If a value is not present in the prompt for a non-required field, omit it from the JSON.
299
- - If a required value is missing, return a JSON object with an "error" key explaining what is missing.
300
- - Today's date is {datetime.now().strftime('%Y-%m-%d')}. If the user says "tomorrow", use {(datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')}.
301
- - Respond ONLY with a valid JSON object. Do not include any other text, explanation, or markdown code blocks.
302
- """
303
 
304
- # Gemma instruction-following format
305
  chat = [
306
- # Gemma does not use a 'system' role. Instructions are part of the first user message.
307
- {"role": "user", "content": f"{system_prompt}\n\nUser Prompt: \"{user_prompt}\""},
308
  ]
309
 
310
- prompt = hf_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
311
-
312
  try:
 
 
313
  inputs = hf_tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(hf_model.device)
314
 
315
  # Generate with the model
@@ -338,7 +315,7 @@ def execute_tool(user_prompt: str):
338
  selected_tool, score, _ = find_best_tool(user_prompt)
339
 
340
  if USE_HF_LLM:
341
- print(f"⚙️ Selected Tool: {selected_tool.name}. Extracting arguments with Gemma...")
342
  extracted_args = extract_arguments_hf(user_prompt, selected_tool)
343
  else:
344
  # Fallback if the model failed to load
@@ -445,7 +422,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
445
  gr.Markdown("# 🛠️ Tool World: Advanced Prototype (Hugging Face Version)")
446
  gr.Markdown(
447
  "Enter a natural language command. The system will select the best tool, "
448
- "extract structured arguments with **google/gemma-3n-E4B**, and execute it."
449
  )
450
 
451
  with gr.Row():
 
5
  # This script has been updated to run as a Hugging Face Space.
6
  #
7
  # Key Upgrades from the original script:
8
+ # 1. **Hugging Face Model Integration**: Uses the fast 'Qwen/Qwen2-0.5B-Instruct'
9
+ # model from the Hugging Face Hub for argument extraction.
10
+ # 2. **Simplified Setup**: This model does not require a Hugging Face token.
 
 
11
  # 3. **Standard Dependencies**: All dependencies are managed via a
12
  # `requirements.txt` file.
13
  #
 
38
 
39
  # --- Configuration for Hugging Face Model-based Argument Extraction ---
40
  try:
 
 
 
 
41
  print("⚙️ Loading Hugging Face model for argument extraction...")
42
+ # Using the fast Qwen2 0.5B Instruct model
43
+ model_id = "Qwen/Qwen2-0.5B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
46
  hf_model = AutoModelForCausalLM.from_pretrained(
47
  model_id,
 
48
  torch_dtype=torch.bfloat16, # Use bfloat16 for efficiency
49
  device_map="auto" # Automatically use GPU if available
50
  )
51
  USE_HF_LLM = True
52
+ # The Qwen2 tokenizer has a built-in chat template, so we don't need to set it manually.
53
+ print(f"✅ Successfully loaded '{model_id}' model.")
54
 
55
  except Exception as e:
56
  USE_HF_LLM = False
 
266
  Uses a local Hugging Face model to extract structured arguments.
267
  """
268
  system_prompt = f"""
269
+ You are an expert at extracting structured data from natural language.
270
+ Your task is to analyze the user's prompt and extract the arguments required to call the tool: '{tool.name}'.
271
 
272
+ You must adhere to the following JSON schema for the arguments:
273
+ {json.dumps(tool.args_schema, indent=2)}
274
 
275
+ - If a value is not present in the prompt for a non-required field, omit it from the JSON.
276
+ - If a required value is missing, return a JSON object with an "error" key explaining what is missing.
277
+ - Today's date is {datetime.now().strftime('%Y-%m-%d')}. If the user says "tomorrow", use {(datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')}.
278
+ - Respond ONLY with a valid JSON object. Do not include any other text, explanation, or markdown code blocks.
279
+ """
280
 
281
+ # Qwen2 instruction-following format
282
  chat = [
283
+ {"role": "system", "content": system_prompt},
284
+ {"role": "user", "content": user_prompt},
285
  ]
286
 
 
 
287
  try:
288
+ # The tokenizer for Qwen2 has a built-in chat template.
289
+ prompt = hf_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
290
  inputs = hf_tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(hf_model.device)
291
 
292
  # Generate with the model
 
315
  selected_tool, score, _ = find_best_tool(user_prompt)
316
 
317
  if USE_HF_LLM:
318
+ print(f"⚙️ Selected Tool: {selected_tool.name}. Extracting arguments with Qwen2...")
319
  extracted_args = extract_arguments_hf(user_prompt, selected_tool)
320
  else:
321
  # Fallback if the model failed to load
 
422
  gr.Markdown("# 🛠️ Tool World: Advanced Prototype (Hugging Face Version)")
423
  gr.Markdown(
424
  "Enter a natural language command. The system will select the best tool, "
425
+ "extract structured arguments with **Qwen/Qwen2-0.5B-Instruct**, and execute it."
426
  )
427
 
428
  with gr.Row():