Spaces:
Sleeping
Sleeping
from contextlib import redirect_stderr, redirect_stdout | |
import io | |
import json | |
import os | |
import re | |
import subprocess | |
import traceback | |
from typing import Dict, List, Literal, Optional | |
import google.generativeai as genai | |
import cv2 | |
import pandas as pd | |
from pydantic import BaseModel | |
import requests | |
from audio_util import Audio_Util | |
from constantes import * | |
from file_util import File_Util | |
from image_util import Image_Util | |
from tavily import TavilyClient | |
from web_util import Web_Util | |
from wikipedia_util import Wikipedia_Historical_Page, Wikipedia_Util | |
class Video_Util: | |
def download_video_from_url(url: str, output_path: str, video_file_name: str) -> str: | |
"""Baixa o vídeo do YouTube usando yt-dlp.""" | |
video_path = f'{output_path}/{video_file_name}.%(ext)s' | |
print(f"Baixando vídeo de {url} para {video_path}...") | |
try: | |
# Comando yt-dlp para baixar o melhor formato mp4 | |
command = [ | |
'yt-dlp', | |
"--cookies", YOUTUBE_COOKIE_PATH, | |
'-f', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', | |
'-o', video_path, | |
url | |
] | |
result = subprocess.run(command, check=True, capture_output=True, text=True) | |
lista_arquivos = File_Util.retirar_sufixo_codec_arquivo(output_path) | |
print("Download de áudio concluído com sucesso.") | |
return f"{output_path}/{lista_arquivos[0]}" | |
except subprocess.CalledProcessError as e: | |
print(f"Erro ao baixar o vídeo: {e}") | |
print(f"Saída do erro: {e.stderr}") | |
return False | |
except FileNotFoundError: | |
print("Erro: O comando 'yt-dlp' não foi encontrado. Certifique-se de que ele está instalado e no PATH do sistema.") | |
print("Você pode instalá-lo com: pip install yt-dlp") | |
return False | |
def execute_python_code_tool(code_path: str) -> str: | |
""" | |
Execute code python informed in code_path param | |
Args: | |
code_path: Path to the python file. | |
Returns: | |
Execution result. | |
""" | |
saida = io.StringIO() | |
erros = io.StringIO() | |
final_code_path = File_Util.baixa_arquivo_task(code_path) | |
print(f"Executando código em {final_code_path}...") | |
try: | |
with open(final_code_path, 'r', encoding='utf-8') as f: | |
codigo = f.read() | |
# Captura stdout e stderr usando contexto | |
with redirect_stdout(saida), redirect_stderr(erros): | |
exec(codigo, {'__name__': '__main__'}) | |
# Pega o conteúdo das saídas | |
saida_valor = saida.getvalue() | |
erro_valor = erros.getvalue() | |
if erro_valor: | |
return f"[ERRO DE EXECUÇÃO]:\n{erro_valor}" | |
return saida_valor if saida_valor.strip() else "[SEM SAÍDA]" | |
except Exception: | |
return f"[EXCEÇÃO DURANTE EXECUÇÃO]:\n{traceback.format_exc()}" | |
def chess_image_to_fen_tool(image_path:str, current_player: Literal["black", "white"]) -> Dict[str,str]: | |
""" | |
Convert chess image to FEN (Forsyth-Edwards Notation) notation. | |
Args: | |
image_path: Path to the image file. | |
current_player: Whose turn it is to play. Must be either 'black' or 'white'. | |
Returns: | |
JSON with FEN (Forsyth-Edwards Notation) string representing the current board position. | |
""" | |
print(f"Image to Fen invocada com os seguintes parametros:") | |
print(f"image_path: {image_path}") | |
print(f"current_player: {current_player}") | |
if current_player not in ["black", "white"]: | |
raise ValueError("current_player must be 'black' or 'white'") | |
final_image_path= os.path.join(AGENTS_FILES_PATH, image_path) | |
base64_image = Image_Util.encode_image_to_base64(final_image_path) | |
if not base64_image: | |
raise ValueError("Failed to encode image to base64.") | |
base64_image_encoded = f"data:image/jpeg;base64,{base64_image}" | |
url = CHESSVISION_TO_FEN_URL | |
payload = { | |
"board_orientation": "predict", | |
"cropped": False, | |
"current_player": "black", | |
"image": base64_image_encoded, | |
"predict_turn": False | |
} | |
response = requests.post(url, json=payload) | |
if response.status_code == 200: | |
dados = response.json() | |
if dados.get("success"): | |
print(f"Retorno Chessvision {dados}") | |
fen = dados.get("result") | |
fen = fen.replace("_", " ") #retorna _ no lugar de espaço em branco | |
return json.dumps({"fen": fen}) | |
else: | |
raise Exception("Requisição feita, mas falhou na predição.") | |
else: | |
raise Exception(f"Erro na requisição: {response.status_code}") | |
def chess_fen_get_best_next_move_tool(fen: str, current_player: Literal["black", "white"]) -> str: | |
""" | |
Return the best move in algebric notation. | |
Args: | |
fen: FEN (Forsyth-Edwards Notation) notation. | |
Returns: | |
Best move in algebric notation. | |
""" | |
if not fen: | |
raise ValueError("fen must be provided.") | |
if current_player not in ["black", "white"]: | |
raise ValueError("current_player must be 'black' or 'white'") | |
url = CHESS_MOVE_API | |
payload = { | |
"fen": fen | |
} | |
print(f"Buscando melhor jogada em {CHESS_MOVE_API} - {payload}") | |
response = requests.post(url, json=payload) | |
if response.status_code == 200: | |
#print(f"Retorno melhor jogada --> {response.text}") | |
dados = response.json() | |
move_algebric_notation = dados.get("san") | |
move = dados.get("text") | |
print(f"Melhor jogada segundo chess-api.com -> {move}") | |
return move_algebric_notation | |
else: | |
raise Exception(f"Erro na requisição: {response.status_code}") | |
def extract_frames_from_video_to_files(url: str) -> List[str]: | |
""" | |
Extract frames from a video and store in temporaily files. | |
Args: | |
url: URL to the video. | |
Returns: | |
List of frame file paths. | |
""" | |
frames_list: List[str] = [] | |
File_Util.create_or_clear_output_directory(OUTPUT_VIDEO_PATH) | |
File_Util.create_or_clear_output_directory(OUTPUT_IMAGE_PATH) | |
video_download_file_name = Video_Util.download_video_from_url(url, OUTPUT_VIDEO_PATH, VIDEO_FILE_NAME) | |
if not video_download_file_name: | |
raise ValueError("Failed to download video.") | |
print(f"Extraindo frames de {video_download_file_name} a cada {FRAME_INTERVAL_SECONDS} segundos...") | |
if not os.path.exists(video_download_file_name): | |
print(f"Erro: Arquivo de vídeo não encontrado em {video_download_file_name}") | |
return [] | |
cap = cv2.VideoCapture(video_download_file_name) | |
# Verificar a resolução | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
print(f"Resolução original do vídeo: {width}x{height}") | |
if not cap.isOpened(): | |
print(f"Erro ao abrir o arquivo de vídeo: {video_download_file_name}") | |
return [] | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
if fps == 0: | |
print("Erro: Não foi possível obter o FPS do vídeo. Usando FPS padrão de 30.") | |
fps = 30 # Valor padrão caso a leitura falhe | |
# retirado para permitir fracionado frame_interval = int(fps * interval_sec) | |
frame_interval = fps * FRAME_INTERVAL_SECONDS | |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
print(f"Vídeo FPS: {fps:.2f}, Intervalo de frames: {frame_interval}, Total de frames: {total_frames}") | |
extracted_frames_paths = [] | |
frame_count = 0 | |
saved_frame_index = 5 # o importante nunca começa no inicio, é um deslocamento inicial para iniciar depois da introdução | |
while True: | |
# Define a posição do próximo frame a ser lido | |
# Adiciona frame_interval para pegar o frame *após* o intervalo de tempo | |
# ajustado para float target_frame_pos = saved_frame_index * frame_interval | |
target_frame_pos = int(saved_frame_index * frame_interval) | |
if target_frame_pos >= total_frames: | |
break # Sai se o próximo frame alvo estiver além do final do vídeo | |
if (saved_frame_index < INICIO_FRAME_IMPORTANTE or saved_frame_index > FIM_FRAME_IMPORTANTE): | |
print(f"Pulando frame {saved_frame_index}") | |
saved_frame_index += 1 | |
continue # evitar custo desnecessário para inferencia ao gpt | |
cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame_pos) | |
ret, frame = cap.read() | |
if not ret: | |
print(f"Não foi possível ler o frame na posição {target_frame_pos}. Pode ser o fim do vídeo ou um erro.") | |
break # Sai se não conseguir ler o frame | |
# redimensiona o frame (custo chamada) | |
# removido porque poderia afetar a nitidez e impactar o resultado | |
# frame = cv2.resize(frame, (1280, 720)) | |
# Calcula o timestamp em segundos | |
timestamp_sec = target_frame_pos / fps | |
# Salva o frame | |
frame_filename = f"frame_{saved_frame_index:04d}_time_{timestamp_sec:.2f}s.png" | |
frame_path = os.path.join(OUTPUT_IMAGE_PATH, frame_filename) | |
try: | |
# modificado para salvar com qualidade máxima cv2.imwrite(frame_path, frame) | |
cv2.imwrite(frame_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0]) | |
extracted_frames_paths.append(frame_path) | |
print(f"Frame salvo: {frame_path} (Timestamp: {timestamp_sec:.2f}s)") | |
saved_frame_index += 1 | |
except Exception as e: | |
print(f"Erro ao salvar o frame {frame_path}: {e}") | |
# Continua para o próximo intervalo mesmo se um frame falhar | |
# Segurança para evitar loop infinito caso algo dê errado com a lógica de posição | |
if saved_frame_index > (total_frames / frame_interval) + 2: | |
print("Aviso: Número de frames salvos parece exceder o esperado. Interrompendo extração.") | |
break | |
cap.release() | |
print(f"Extração de frames concluída. Total de frames salvos: {len(extracted_frames_paths)}") | |
return extracted_frames_paths | |
return frames_list; | |
def count_birds_species(image_path: str) -> int: | |
bird_count_prompt = """You are a world-class expert in avian species classification. Analyze the provided image and determine how many | |
**distinct bird species** are present. Consider size, shape, plumage, coloration, and beak structure. Focus only on | |
visible morphological differences. Return a **single integer** with no explanation. Do not count individuals of the same species. ' | |
If unsure, assume that bird is a different specie.""" | |
if not OPENAI_API_KEY: | |
raise ValueError("OPENAI API KEY must be defined.") | |
base64_image = Image_Util.encode_image_to_base64(image_path) | |
genai.configure(api_key=GEMINI_API_KEY) | |
model = genai.GenerativeModel(GEMINI_MODEL) | |
print(f"Enviando frame para análise no {GEMINI_MODEL}...") | |
try: | |
response = model.generate_content( | |
contents=[ | |
{ | |
"role": "user", | |
"parts": [ | |
{f"text": f"{bird_count_prompt}"}, | |
{"inline_data": { | |
"mime_type": "image/jpeg", | |
"data": base64_image | |
}} | |
] | |
} | |
], | |
generation_config={ | |
"temperature": 0.0, | |
"max_output_tokens": 500 | |
}) | |
# Extrai o conteúdo da resposta | |
analysis_result = response.text.strip() | |
print(f"Análise recebida: {analysis_result}") | |
return int(analysis_result) | |
except Exception as e: | |
print(f"Erro ao chamar a API OpenAI: {e}") | |
return {"error": str(e)} | |
def bird_video_count_tool(url: str) -> int: | |
""" | |
Count different species of birds in a video. | |
Args: | |
url: URL to the video. | |
Returns: | |
Count of different species of birds. | |
""" | |
frames_path_list = extract_frames_from_video_to_files(url) | |
if not frames_path_list: | |
raise ValueError("Failed to extract frames.") | |
max_species: int = 0 | |
for frame_path in frames_path_list: | |
species_count = count_birds_species(frame_path) | |
if species_count > max_species: | |
max_species = species_count | |
return max_species | |
def extract_text_from_url_tool (audio_url:str) -> str: | |
""" | |
Extracts text from an audio url using the OpenAI Whisper API. | |
Args: | |
audio_url: URL to the audio file. | |
Returns: | |
text extracted from the audio url. | |
""" | |
if not audio_url: | |
raise ValueError("'audio_url'must be provided.") | |
if not OUTPUT_AUDIO_PATH: | |
raise ValueError("OUTPUT_AUDIO_PATH must be defined.") | |
File_Util.create_or_clear_output_directory(OUTPUT_AUDIO_PATH) | |
audio_download_file_name = Audio_Util.download_audio_from_url(audio_url, OUTPUT_AUDIO_PATH, AUDIO_FILENAME) | |
if not audio_download_file_name: | |
raise ValueError("Failed to download audio.") | |
transcript = Audio_Util.extract_text_from_audio_file(audio_download_file_name) | |
return transcript | |
def extract_text_from_file_tool(audio_file_name:str) -> str: | |
""" | |
Extracts text from an audio file using the OpenAI Whisper API. | |
Args: | |
audio_file_name: Name of the audio file. | |
Returns: | |
text extracted from the audio file. | |
""" | |
if not audio_file_name and not audio_file_name: | |
raise ValueError(" 'audio_file_name' must be provided.") | |
if not OUTPUT_AUDIO_PATH: | |
raise ValueError("OUTPUT_AUDIO_PATH must be defined.") | |
treated_path = f"{AGENTS_FILES_PATH}/{audio_file_name}" | |
transcript = Audio_Util.extract_text_from_audio_file(treated_path) | |
return transcript | |
class Search_Web_Result(BaseModel): | |
page_title: str | |
page_url: str | |
page_html_content: str | |
page_markdown_content: str | |
def search_web_tool(query: str, | |
wikipedia_has_priority: bool, | |
wikipedia_historical_date: Optional[str]=None, | |
convert_to_markdown: bool=True | |
) -> List[Search_Web_Result]: | |
""" | |
Searches the web for pages with the most relevant information about the topic, returning a list of Search_Web_Result (title, url, html content and markdown content) | |
Args: | |
query: The main topic or question to search for. | |
use_wikipedia_priority: If true, prioritize results from Wikipedia. | |
wikipedia_date: Optional date to fetch historical Wikipedia data. | |
Returns: | |
A list of URLs or page titles sorted by relevance. | |
""" | |
return_list: List[Search_Web_Result] = [] | |
try: | |
tavily = TavilyClient(api_key=TAVILY_API_KEY) | |
except Exception as e: | |
print(f"Erro ao inicializar o cliente Tavily: {e}") | |
raise | |
print(f"\n--- Realizando busca por '{query}' usando Tavily ---") | |
print(f"Prioridade para Wikipedia: {wikipedia_has_priority}") | |
print(f"Data para Wikipedia: {wikipedia_historical_date}") | |
print(f"Convertendo HTML para Markdown: {convert_to_markdown}") | |
try: | |
response = tavily.search(query=query, search_depth="basic", max_results=10) | |
search_results = response.get('results', []) | |
except Exception as e: | |
print(f"Erro ao realizar busca com Tavily: {e}") | |
raise | |
if not search_results: | |
print("Nenhum resultado encontrado pela busca Tavily.") | |
return [] | |
if wikipedia_has_priority: | |
print("Prioridade para Wikipedia habilitada. Filtrando resultados Tavily por Wikipedia...") | |
return _processa_resultado_wikipedia(search_results, wikipedia_historical_date, convert_to_markdown) | |
urls_to_process = [] | |
print("Usando os 5 primeiros resultados gerais.") | |
urls_to_process = [res['url'] for res in search_results[:5]] | |
print(f"\n--- Processando {len(urls_to_process)} URLs selecionadas ---") | |
for url in urls_to_process: | |
title, html_content = Web_Util.download_html(url) | |
if not title or not html_content: | |
raise AssertionError(f"Falha ao processar URL: {url}") | |
md_content = "" | |
if convert_to_markdown: | |
md_content = Web_Util.convert_html_to_markdown(title, html_content) | |
if not md_content: | |
raise AssertionError(f"Falha ao converter URL: {url}, html:{html_content}") | |
return_list.append(Search_Web_Result( | |
page_title=title, | |
page_url=url, | |
page_html_content=html_content if not convert_to_markdown else "", | |
page_markdown_content=md_content | |
)) | |
return return_list | |
def _processa_resultado_wikipedia(search_results: List[str], wikipedia_historical_date: str, | |
convert_to_markdown:bool) -> List[Search_Web_Result]: | |
""" | |
Trata do resultado de pesquisa quando existe prioridade para Wikipedia. | |
Args: | |
search_results: Lista com resultados da busca realizado pelo Tavily. | |
wikipedia_historical_date: A data para buscar uma revisão histórica da Wikipedia. | |
convert_to_markdown: Se true, converte o conteúdo HTML para Markdown. | |
Returns: | |
Lista com os resultados processados. | |
""" | |
print("Prioridade para Wikipedia habilitada. Filtrando resultados Tavily por Wikipedia...") | |
wiki_urls = [res['url'] for res in search_results if Web_Util.is_wikipedia_url(res['url'])] | |
if not wiki_urls: | |
print("Nenhuma URL da Wikipedia encontrada nos resultados.") | |
return [] | |
# Pega o primeiro resultado da Wikipedia | |
first_wiki_url = wiki_urls[0] | |
page_title_guess = first_wiki_url.split('/')[-1].replace('_', ' ') | |
page_check = Wikipedia_Util.wiki_executor.page(page_title_guess) | |
if not page_check.exists(): | |
raise AssertionError(f"Página '{page_title_guess}' não encontrada na Wikipedia.") | |
page_title = None | |
page_url = None | |
if not wikipedia_historical_date: | |
page_title = page_title_guess | |
page_url = first_wiki_url | |
else: | |
# Busca revisão histórica | |
historical_wiki_info: Wikipedia_Historical_Page = Wikipedia_Util.get_wikipedia_page_historical_content(page_check.title, wikipedia_historical_date) | |
print(f"Dados da versão histórica wikipedia - {historical_wiki_info}") | |
page_title = historical_wiki_info.title | |
page_url = historical_wiki_info.url | |
title, html_content = Web_Util.download_html(page_url) | |
print(f"title {title}") | |
if not html_content: | |
raise AssertionError(f"Conteúdo da página {page_url} não foi baixado, não será possível continuar.") | |
md_content = "" | |
if convert_to_markdown: | |
md_content = Web_Util.convert_html_to_markdown(page_title, html_content) | |
if md_content and wikipedia_historical_date: | |
# Adiciona informação sobre a revisão no início do conteúdo (CORRIGIDO) | |
header = f"# Wikipedia Content for '{historical_wiki_info.title}'\n" | |
header += f"*Revision from {historical_wiki_info.timestamp} (ID: {historical_wiki_info.revision_id})*\n" | |
header += f"*Regarding search date: {wikipedia_historical_date}*\n\n" | |
header += "---\n\n" | |
md_content = header + md_content | |
return_list = [ | |
Search_Web_Result( | |
page_title=page_title, | |
page_url=page_url, | |
page_html_content=html_content if not convert_to_markdown else "", | |
page_markdown_content=md_content | |
) | |
] | |
return return_list | |
def text_inverter_tool(text: str ) -> str: | |
""" | |
Invert the text. | |
Args: | |
text: Text to be inverted. | |
Returns: | |
Inverted text. | |
""" | |
return text[::-1] | |
def parse_markdown_table_to_dict(markdown: str) -> dict: | |
""" | |
Convert binary operation table in markdown format to a dictionary | |
Args: | |
markdown: table in markdown format | |
""" | |
linhas = markdown.strip().split('\n') | |
# Remove barras verticais nas extremidades e divide pelas internas | |
cabecalho = [col.strip() for col in linhas[0].strip('|').split('|')] | |
colunas = cabecalho[1:] # ignora o '*' | |
tabela = {} | |
for linha in linhas[2:]: # pula cabeçalho e separador | |
partes = [p.strip() for p in linha.strip('|').split('|')] | |
linha_elem = partes[0] | |
valores = partes[1:] | |
if len(valores) != len(colunas): | |
raise ValueError(f"Erro ao processar linha '{linha_elem}': número de colunas incompatível.") | |
tabela[linha_elem] = dict(zip(colunas, valores)) | |
return tabela | |
def check_table_commutativity_tool(markdown: str) -> dict: | |
""" | |
Check if the table in markdown format is commutative | |
Args: | |
table: table in markdown format | |
""" | |
contraexemplos = [] | |
elementos = set() | |
table = parse_markdown_table_to_dict(markdown) | |
for x in table: | |
for y in table: | |
if x != y and table[x][y] != table[y][x]: | |
contraexemplos.append((x, y)) | |
elementos.update([x, y]) | |
return { | |
"counter_example": contraexemplos, | |
"elements_involved": sorted(elementos) | |
} | |
def get_excel_columns_tool(file_path: str) -> list[str]: | |
""" | |
Get the columns of an Excel file. | |
Args: | |
file_path: Path to the Excel file. | |
Returns: | |
List of column names. | |
""" | |
final_excel_path = File_Util.baixa_arquivo_task(file_path) | |
print(f"Extraindo as colunas do arquivo {file_path}") | |
df = pd.read_excel(final_excel_path, nrows=0) | |
return df.columns.tolist() | |
def calculate_excel_sum_by_columns_tool( | |
file_path: str, | |
include_columns: list[str] | |
) -> str: | |
""" | |
Calculate the sum of values in specified columns of an Excel file. | |
Args: | |
- file_path: Path to the Excel file. | |
- include_columns: Columns included in the sum | |
""" | |
final_excel_path = File_Util.baixa_arquivo_task(file_path) | |
print(f"Calculando soma de {include_columns} em {final_excel_path}") | |
df = pd.read_excel(final_excel_path) | |
total = df[include_columns].sum().sum() # soma todas as colunas e depois soma os totais | |
return total | |
# Lista curada de vegetais culinários | |
VEGETABLES = { | |
"lettuce", "carrot", "broccoli", "spinach", "kale", "celery", "cabbage", | |
"sweet potato", "radish", "turnip", "cauliflower", "beet", "onion", "garlic", | |
"pea", "chard", "arugula", "basil", "parsley", "dill", "leek", | |
"asparagus", "eggplant", "okra", "pumpkin", "squash", "yam", "collard green", | |
"mustard green", "brussels sprout", "scallion", "fennel", "rhubarb", "artichoke", | |
"endive", "escarole", "bok choy", "watercress", "turnip green" | |
} | |
COMMON_ADJECTIVES = {"fresh", "raw", "organic", "chopped", "sliced", "whole"} | |
def normalize_item(text: str) -> str: | |
# Lowercase and remove common adjectives | |
words = [w for w in re.findall(r"\w+", text.lower()) if w not in COMMON_ADJECTIVES] | |
# Singularização básica | |
singular = [] | |
for word in words: | |
if word.endswith("ies"): | |
singular.append(word[:-3] + "y") | |
elif word.endswith("oes"): | |
singular.append(word[:-2]) | |
elif word.endswith("s") and not word.endswith("ss"): | |
singular.append(word[:-1]) | |
else: | |
singular.append(word) | |
return " ".join(singular) | |
def filter_vegetables_from_list_tool(items: list[str]) -> list[str]: | |
""" | |
Return a set of vegetables from items | |
Args: | |
items: | |
Listo of items | |
Returns: | |
List of vegetable items | |
""" | |
result =[] | |
for i in items: | |
if normalize_item(i) in VEGETABLES: | |
result.append(i) | |
return result | |
def clean_ingredient_measure_tool(ingredients: list[str]) -> list[str]: | |
""" | |
Strips words that indicate measurements or quantities from a list of ingredients | |
and returns only the cleaned ingredient names, without duplicates and in alphabetical order. | |
The function should be used when extracting ingredients from audio or text | |
contains units such as "dash", "pinch", "cup", etc., and when it is necessary to | |
keep only the descriptive names of the ingredients for a shopping list or display. | |
Parameters: | |
- ingredients: list of strings, where each string is an ingredient extracted from the audio or transcript. | |
Returns: | |
- List of strings with the names of the ingredients cleaned, without units of measurement and sorted alphabetically. | |
""" | |
cleaned = [] | |
for ingredient in ingredients: | |
words = ingredient.split() | |
filtered_words = [word for word in words if word.lower() not in MEASURE_WORDS] | |
cleaned_ingredient = ' '.join(filtered_words).strip() | |
if cleaned_ingredient: | |
cleaned.append(cleaned_ingredient) | |
# Remove duplicatas e ordena | |
return sorted(set(cleaned)) | |