gdms commited on
Commit
9102dbe
·
1 Parent(s): 7c695a7

Tool para remover medidas em receitas

Browse files
Files changed (3) hide show
  1. agent.py +1 -1
  2. constantes.py +6 -0
  3. tools.py +28 -0
agent.py CHANGED
@@ -20,7 +20,7 @@ class Agent:
20
  print("--> Audio Agent")
21
  self.audio_agent = create_react_agent(
22
  model=init_chat_model("openai:gpt-4o-mini"),
23
- tools=[extract_text_from_url_tool, extract_text_from_file_tool],
24
  prompt= AUDIO_AGENT_PROMPT,
25
  name="audio_agent",
26
  )
 
20
  print("--> Audio Agent")
21
  self.audio_agent = create_react_agent(
22
  model=init_chat_model("openai:gpt-4o-mini"),
23
+ tools=[extract_text_from_url_tool, extract_text_from_file_tool, clean_ingredient_measure_tool],
24
  prompt= AUDIO_AGENT_PROMPT,
25
  name="audio_agent",
26
  )
constantes.py CHANGED
@@ -32,6 +32,12 @@ HUGGINGFACE_DATASET_URL_TEMPLATE = (
32
  "https://huggingface.co/datasets/gdms/gaia/resolve/main/{filename}"
33
  )
34
 
 
 
 
 
 
 
35
  LISTA_TASKS_PROCESSAR = [
36
  "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
37
  "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
 
32
  "https://huggingface.co/datasets/gdms/gaia/resolve/main/{filename}"
33
  )
34
 
35
+ MEASURE_WORDS = {
36
+ "dash", "pinch", "cup", "cups", "tablespoon", "tablespoons",
37
+ "teaspoon", "teaspoons", "pound", "pounds", "oz", "ounce", "ounces",
38
+ "grams", "gram", "ml", "liter", "liters", "slice", "slices"
39
+ }
40
+
41
  LISTA_TASKS_PROCESSAR = [
42
  "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
43
  "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
tools.py CHANGED
@@ -673,3 +673,31 @@ def filter_vegetables_from_list_tool(items: list[str]) -> list[str]:
673
 
674
  return result
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
 
674
  return result
675
 
676
+
677
+ def clean_ingredient_measure_tool(ingredients: list[str]) -> list[str]:
678
+ """
679
+ Strips words that indicate measurements or quantities from a list of ingredients
680
+ and returns only the cleaned ingredient names, without duplicates and in alphabetical order.
681
+
682
+ The function should be used when extracting ingredients from audio or text
683
+ contains units such as "dash", "pinch", "cup", etc., and when it is necessary to
684
+ keep only the descriptive names of the ingredients for a shopping list or display.
685
+
686
+ Parameters:
687
+ - ingredients: list of strings, where each string is an ingredient extracted from the audio or transcript.
688
+
689
+ Returns:
690
+ - List of strings with the names of the ingredients cleaned, without units of measurement and sorted alphabetically.
691
+ """
692
+
693
+ cleaned = []
694
+ for ingredient in ingredients:
695
+ words = ingredient.split()
696
+ filtered_words = [word for word in words if word.lower() not in MEASURE_WORDS]
697
+ cleaned_ingredient = ' '.join(filtered_words).strip()
698
+ if cleaned_ingredient:
699
+ cleaned.append(cleaned_ingredient)
700
+
701
+ # Remove duplicatas e ordena
702
+ return sorted(set(cleaned))
703
+