Spaces:

VLAI-AIVN
/

AIO2024M10_Tutorial_Tool_Calling

Running

App Files Files Community

wjnwjn59 commited on May 11

Commit

487fa97

1 Parent(s): 6408fab

update sampling

Browse files

Files changed (3) hide show

app.py +3 -20
src/chat.py +0 -0
src/llm/chat.py +1 -1

app.py CHANGED Viewed

@@ -1,49 +1,34 @@
-# app.py
 import os, base64, json, uuid, torch, gradio as gr
 from pathlib import Path
-# === Your vision-LLM stack (imported from src/… as organised earlier) ===
-from src.llm.chat import FunctionCallingChat     # wrapper around Llama-3.2-1B
-chatbot = FunctionCallingChat()                  # load once at start-up
-# -------- helpers --------------------------------------------------------
 def image_to_base64(image_path: str):
     with open(image_path, "rb") as f:
         return base64.b64encode(f.read()).decode("utf-8")
 def save_uploaded_image(pil_img) -> Path:
-    """Persist uploaded PIL image to ./static/ and return the file path."""
     Path("static").mkdir(exist_ok=True)
     filename = f"upload_{uuid.uuid4().hex[:8]}.png"
     path = Path("static") / filename
     pil_img.save(path)
     return path
-# -------- inference ------------------------------------------------------
 def inference(pil_img, prompt, task):
-    """
-    • pil_img : uploaded PIL image
-    • prompt  : optional free-form request
-    • task    : "Detection" | "Segmentation" | "Auto"
-    Returns plain-text JSON with the LLM tool-call and its results.
-    """
     if pil_img is None:
         return "❗ Please upload an image first."
     img_path = save_uploaded_image(pil_img)
-    # Build user message for the LLM
     if task == "Detection":
         user_msg = f"Please detect objects in the image '{img_path}'."
     elif task == "Segmentation":
         user_msg = f"Please segment objects in the image '{img_path}'."
-    else:  # Auto / custom
         prompt = prompt.strip() or "Analyse this image."
         user_msg = f"{prompt} (image: '{img_path}')"
-    # Run chat → tool calls → tool execution
     out = chatbot(user_msg)
     txt = (
         "### 🔧 Raw tool-call \n"
@@ -53,8 +38,6 @@ def inference(pil_img, prompt, task):
     )
     return txt
-# -------- UI (unchanged shell) ------------------------------------------
 def create_header():
     with gr.Row():
         with gr.Column(scale=1):

 import os, base64, json, uuid, torch, gradio as gr
 from pathlib import Path
+from src.llm.chat import FunctionCallingChat
+chatbot = FunctionCallingChat()
 def image_to_base64(image_path: str):
     with open(image_path, "rb") as f:
         return base64.b64encode(f.read()).decode("utf-8")
 def save_uploaded_image(pil_img) -> Path:
     Path("static").mkdir(exist_ok=True)
     filename = f"upload_{uuid.uuid4().hex[:8]}.png"
     path = Path("static") / filename
     pil_img.save(path)
     return path
 def inference(pil_img, prompt, task):
     if pil_img is None:
         return "❗ Please upload an image first."
     img_path = save_uploaded_image(pil_img)
     if task == "Detection":
         user_msg = f"Please detect objects in the image '{img_path}'."
     elif task == "Segmentation":
         user_msg = f"Please segment objects in the image '{img_path}'."
+    else:
         prompt = prompt.strip() or "Analyse this image."
         user_msg = f"{prompt} (image: '{img_path}')"
     out = chatbot(user_msg)
     txt = (
         "### 🔧 Raw tool-call \n"
     )
     return txt
 def create_header():
     with gr.Row():
         with gr.Column(scale=1):

src/chat.py DELETED Viewed

File without changes

src/llm/chat.py CHANGED Viewed

@@ -31,7 +31,7 @@ class FunctionCallingChat:
         ]
         generation_cfg = GenerationConfig(
-            max_new_tokens=512, temperature=0.5, top_p=0.95, do_sample=True
         )
         tokenized = self.tokenizer.apply_chat_template(

         ]
         generation_cfg = GenerationConfig(
+            max_new_tokens=512, temperature=0.2, top_p=0.95, do_sample=True
         )
         tokenized = self.tokenizer.apply_chat_template(