Final_Assignment_Template

Sleeping

gdms commited on Jun 3

Commit

9102dbe

1 Parent(s): 7c695a7

Tool para remover medidas em receitas

Files changed (3) hide show

agent.py CHANGED Viewed

@@ -20,7 +20,7 @@ class Agent:
         print("--> Audio Agent")
         self.audio_agent = create_react_agent(
             model=init_chat_model("openai:gpt-4o-mini"),
-            tools=[extract_text_from_url_tool, extract_text_from_file_tool],
             prompt= AUDIO_AGENT_PROMPT,
             name="audio_agent",
         )

         print("--> Audio Agent")
         self.audio_agent = create_react_agent(
             model=init_chat_model("openai:gpt-4o-mini"),
+            tools=[extract_text_from_url_tool, extract_text_from_file_tool, clean_ingredient_measure_tool],
             prompt= AUDIO_AGENT_PROMPT,
             name="audio_agent",
         )

constantes.py CHANGED Viewed

@@ -32,6 +32,12 @@ HUGGINGFACE_DATASET_URL_TEMPLATE = (
     "https://huggingface.co/datasets/gdms/gaia/resolve/main/{filename}"
 )
 LISTA_TASKS_PROCESSAR = [
      "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
      "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",

     "https://huggingface.co/datasets/gdms/gaia/resolve/main/{filename}"
 )
+MEASURE_WORDS = {
+    "dash", "pinch", "cup", "cups", "tablespoon", "tablespoons",
+    "teaspoon", "teaspoons", "pound", "pounds", "oz", "ounce", "ounces",
+    "grams", "gram", "ml", "liter", "liters", "slice", "slices"
+}
 LISTA_TASKS_PROCESSAR = [
      "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
      "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",

tools.py CHANGED Viewed

	@@ -673,3 +673,31 @@ def filter_vegetables_from_list_tool(items: list[str]) -> list[str]:
673
674	return result
675

     return result
+def clean_ingredient_measure_tool(ingredients: list[str]) -> list[str]:
+    """
+        Strips words that indicate measurements or quantities from a list of ingredients
+        and returns only the cleaned ingredient names, without duplicates and in alphabetical order.
+        The function should be used when extracting ingredients from audio or text
+        contains units such as "dash", "pinch", "cup", etc., and when it is necessary to
+        keep only the descriptive names of the ingredients for a shopping list or display.
+        Parameters:
+        - ingredients: list of strings, where each string is an ingredient extracted from the audio or transcript.
+        Returns:
+        - List of strings with the names of the ingredients cleaned, without units of measurement and sorted alphabetically.
+    """
+    cleaned = []
+    for ingredient in ingredients:
+        words = ingredient.split()
+        filtered_words = [word for word in words if word.lower() not in MEASURE_WORDS]
+        cleaned_ingredient = ' '.join(filtered_words).strip()
+        if cleaned_ingredient:
+            cleaned.append(cleaned_ingredient)
+    # Remove duplicatas e ordena
+    return sorted(set(cleaned))