import os import sys import json import pandas as pd import gradio as gr # 1) Ajusta o path antes de importar o loader BASE_DIR = os.path.dirname(os.path.abspath(__file__)) INFERENCE_PATH = os.path.join(BASE_DIR, "smi-ted", "inference") sys.path.insert(0, INFERENCE_PATH) from smi_ted_light.load import load_smi_ted # 2) Carrega o modelo MODEL_DIR = os.path.join(INFERENCE_PATH, "smi_ted_light") model = load_smi_ted( folder=MODEL_DIR, ckpt_filename="smi-ted-Light_40.pt", vocab_filename="bert_vocab_curated.txt", ) # 3) Função única para processar SMILES simples ou CSV de SMILES def process_inputs(smiles: str, file_obj): # Se vier um arquivo CSV, processa em batch if file_obj is not None: try: df_in = pd.read_csv(file_obj.name) smiles_list = df_in.iloc[:, 0].astype(str).tolist() embeddings = [] for sm in smiles_list: vec = model.encode(sm, return_torch=True)[0].tolist() embeddings.append(vec) # Monta DataFrame de saída out_df = pd.DataFrame(embeddings) out_df.insert(0, "smiles", smiles_list) out_df.to_csv("embeddings.csv", index=False) msg = f"Batch de {len(smiles_list)} SMILES processado. Baixe em embeddings.csv." return msg, gr.update(value="embeddings.csv", visible=True) except Exception as e: return f"Erro no batch: {e}", gr.update(visible=False) # Senão, processa SMILES único smiles = smiles.strip() if not smiles: return "Digite um SMILES ou envie um arquivo CSV.", gr.update(visible=False) try: vec = model.encode(smiles, return_torch=True)[0].tolist() # Salva CSV com cabeçalho cols = ["smiles"] + [f"dim_{i}" for i in range(len(vec))] df_out = pd.DataFrame([[smiles] + vec], columns=cols) df_out.to_csv("embeddings.csv", index=False) return json.dumps(vec), gr.update(value="embeddings.csv", visible=True) except Exception as e: return f"Erro ao gerar embedding: {e}", gr.update(visible=False) # 4) Monta interface Blocks with gr.Blocks() as demo: gr.Markdown( """ # SMI-TED Embedding Generator **Modo único:** cole um SMILES na caixa à esquerda. **Modo batch:** faça upload de um CSV com várias linhas de SMILES (eles devem estar na primeira coluna). Em ambos os casos, será gerado um arquivo `embeddings.csv` para download, com a primeira coluna de SMILES e o embedding nas colunas seguintes. """ ) with gr.Row(): smiles_in = gr.Textbox(label="SMILES (modo único)", placeholder="Ex.: CCO") file_in = gr.File(label="CSV de SMILES (modo batch)", file_types=[".csv"]) gerar_btn = gr.Button("Gerar Embeddings") with gr.Row(): output_msg = gr.Textbox(label="Resposta/Embedding (JSON)", interactive=False, lines=2) download_csv = gr.File(label="Baixar embeddings.csv", visible=False) gerar_btn.click( fn=process_inputs, inputs=[smiles_in, file_in], outputs=[output_msg, download_csv] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0")