# import os # import gradio as gr # import numpy as np # from transformers import AutoTokenizer, AutoModel # import time # import torch # # :white_check_mark: Setup environment # os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True) # hf_token = os.environ.get("HF_TOKEN") # if not hf_token: # raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.") # # Check for GPU availability # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print(f"Using device: {device}") # # :white_check_mark: Load model and tokenizer # text_tokenizer = AutoTokenizer.from_pretrained( # "nomic-ai/nomic-embed-text-v1.5", # trust_remote_code=True, # token=hf_token, # cache_dir=os.environ["HF_HOME"] # ) # text_model = AutoModel.from_pretrained( # "nomic-ai/nomic-embed-text-v1.5", # trust_remote_code=True, # token=hf_token, # cache_dir=os.environ["HF_HOME"] # ).to(device) # Move model to GPU if available # # :white_check_mark: Embedding function # def get_text_embeddings(text): # """ # Converts input text into a dense embedding using the Nomic embedding model. # These embeddings are used to query Qdrant for semantically relevant document chunks. # """ # inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) # Move inputs to same device as model # with torch.no_grad(): # Disable gradient calculation for inference # outputs = text_model(**inputs) # embeddings = outputs.last_hidden_state.mean(dim=1) # print(embeddings[0].detach().cpu().numpy()) # return embeddings[0].detach().cpu().numpy() # # :white_check_mark: Gradio interface function # def embed_text_interface(text): # strt_time = time.time() # embedding = get_text_embeddings(text) # print(f"Total time taken by nomic to embed: {time.time()-strt_time}") # return embedding # # :white_check_mark: Gradio UI # interface = gr.Interface( # fn=embed_text_interface, # inputs=gr.Textbox(label="Enter text to embed", lines=5), # outputs=gr.Textbox(label="Embedding vector"), # title="Text Embedding with Nomic AI", # description="Enter some text, and get its embedding vector using Nomic's embedding model." # ) # # :white_check_mark: Launch the app # if __name__ == "__main__": # interface.launch() import os import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModel import time import torch # :white_check_mark: Setup environment os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True) hf_token = os.environ.get("HF_TOKEN") if not hf_token: raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.") # Check for GPU availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # :white_check_mark: Load model and tokenizer text_tokenizer = AutoTokenizer.from_pretrained( "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True, token=hf_token, cache_dir=os.environ["HF_HOME"] ) text_model = AutoModel.from_pretrained( "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True, token=hf_token, cache_dir=os.environ["HF_HOME"] ).to(device) # Move model to GPU if available def get_text_embeddings(text): """Returns embedding as NumPy array""" inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) with torch.no_grad(): outputs = text_model(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings[0].detach().cpu().numpy() def format_embedding(embedding): """Formats the embedding as 'embedding: [x.xx, x.xx, ...]'""" formatted = ", ".join([f"{x:.3f}" for x in embedding]) return f"embedding: [{formatted}]" import json def embed_text_interface(text): strt_time = time.time() embedding = get_text_embeddings(text) print(f"Total time taken by nomic to embed: {time.time()-strt_time}") # Convert to list and format for display embedding_list = embedding.tolist() formatted = { "embedding": embedding_list, "shape": len(embedding_list) } return formatted interface = gr.Interface( fn=embed_text_interface, inputs=gr.Textbox(label="Input Text", lines=5), outputs=gr.JSON(label="Embedding Vector"), # Using JSON output title="Nomic Text Embeddings", description="Returns embeddings as a Python list", examples=[ ["This is a sample text"], ["Another example sentence"] ] ) if __name__ == "__main__": interface.queue(api_open=True).launch()