Spaces:
Sleeping
Sleeping
Tool para remover medidas em receitas
Browse files- agent.py +1 -1
- constantes.py +6 -0
- tools.py +28 -0
agent.py
CHANGED
@@ -20,7 +20,7 @@ class Agent:
|
|
20 |
print("--> Audio Agent")
|
21 |
self.audio_agent = create_react_agent(
|
22 |
model=init_chat_model("openai:gpt-4o-mini"),
|
23 |
-
tools=[extract_text_from_url_tool, extract_text_from_file_tool],
|
24 |
prompt= AUDIO_AGENT_PROMPT,
|
25 |
name="audio_agent",
|
26 |
)
|
|
|
20 |
print("--> Audio Agent")
|
21 |
self.audio_agent = create_react_agent(
|
22 |
model=init_chat_model("openai:gpt-4o-mini"),
|
23 |
+
tools=[extract_text_from_url_tool, extract_text_from_file_tool, clean_ingredient_measure_tool],
|
24 |
prompt= AUDIO_AGENT_PROMPT,
|
25 |
name="audio_agent",
|
26 |
)
|
constantes.py
CHANGED
@@ -32,6 +32,12 @@ HUGGINGFACE_DATASET_URL_TEMPLATE = (
|
|
32 |
"https://huggingface.co/datasets/gdms/gaia/resolve/main/{filename}"
|
33 |
)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
LISTA_TASKS_PROCESSAR = [
|
36 |
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
37 |
"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
|
|
32 |
"https://huggingface.co/datasets/gdms/gaia/resolve/main/{filename}"
|
33 |
)
|
34 |
|
35 |
+
MEASURE_WORDS = {
|
36 |
+
"dash", "pinch", "cup", "cups", "tablespoon", "tablespoons",
|
37 |
+
"teaspoon", "teaspoons", "pound", "pounds", "oz", "ounce", "ounces",
|
38 |
+
"grams", "gram", "ml", "liter", "liters", "slice", "slices"
|
39 |
+
}
|
40 |
+
|
41 |
LISTA_TASKS_PROCESSAR = [
|
42 |
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
43 |
"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
tools.py
CHANGED
@@ -673,3 +673,31 @@ def filter_vegetables_from_list_tool(items: list[str]) -> list[str]:
|
|
673 |
|
674 |
return result
|
675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
|
674 |
return result
|
675 |
|
676 |
+
|
677 |
+
def clean_ingredient_measure_tool(ingredients: list[str]) -> list[str]:
|
678 |
+
"""
|
679 |
+
Strips words that indicate measurements or quantities from a list of ingredients
|
680 |
+
and returns only the cleaned ingredient names, without duplicates and in alphabetical order.
|
681 |
+
|
682 |
+
The function should be used when extracting ingredients from audio or text
|
683 |
+
contains units such as "dash", "pinch", "cup", etc., and when it is necessary to
|
684 |
+
keep only the descriptive names of the ingredients for a shopping list or display.
|
685 |
+
|
686 |
+
Parameters:
|
687 |
+
- ingredients: list of strings, where each string is an ingredient extracted from the audio or transcript.
|
688 |
+
|
689 |
+
Returns:
|
690 |
+
- List of strings with the names of the ingredients cleaned, without units of measurement and sorted alphabetically.
|
691 |
+
"""
|
692 |
+
|
693 |
+
cleaned = []
|
694 |
+
for ingredient in ingredients:
|
695 |
+
words = ingredient.split()
|
696 |
+
filtered_words = [word for word in words if word.lower() not in MEASURE_WORDS]
|
697 |
+
cleaned_ingredient = ' '.join(filtered_words).strip()
|
698 |
+
if cleaned_ingredient:
|
699 |
+
cleaned.append(cleaned_ingredient)
|
700 |
+
|
701 |
+
# Remove duplicatas e ordena
|
702 |
+
return sorted(set(cleaned))
|
703 |
+
|