# import os
# import gradio as gr
# import numpy as np
# from transformers import AutoTokenizer, AutoModel
# import time 
# import torch

# # :white_check_mark: Setup environment
# os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
# hf_token = os.environ.get("HF_TOKEN")
# if not hf_token:
#     raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")

# # Check for GPU availability
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# # :white_check_mark: Load model and tokenizer
# text_tokenizer = AutoTokenizer.from_pretrained(
#     "nomic-ai/nomic-embed-text-v1.5",
#     trust_remote_code=True,
#     token=hf_token,
#     cache_dir=os.environ["HF_HOME"]
# )
# text_model = AutoModel.from_pretrained(
#     "nomic-ai/nomic-embed-text-v1.5",
#     trust_remote_code=True,
#     token=hf_token,
#     cache_dir=os.environ["HF_HOME"]
# ).to(device)  # Move model to GPU if available

# # :white_check_mark: Embedding function
# def get_text_embeddings(text):
#     """
#     Converts input text into a dense embedding using the Nomic embedding model.
#     These embeddings are used to query Qdrant for semantically relevant document chunks.
#     """
#     inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)  # Move inputs to same device as model
#     with torch.no_grad():  # Disable gradient calculation for inference
#         outputs = text_model(**inputs)
#     embeddings = outputs.last_hidden_state.mean(dim=1)
#     print(embeddings[0].detach().cpu().numpy())
#     return embeddings[0].detach().cpu().numpy()

# # :white_check_mark: Gradio interface function
# def embed_text_interface(text):
#     strt_time = time.time()
#     embedding = get_text_embeddings(text)
#     print(f"Total time taken by nomic to embed: {time.time()-strt_time}")
#     return embedding

# # :white_check_mark: Gradio UI
# interface = gr.Interface(
#     fn=embed_text_interface,
#     inputs=gr.Textbox(label="Enter text to embed", lines=5),
#     outputs=gr.Textbox(label="Embedding vector"),
#     title="Text Embedding with Nomic AI",
#     description="Enter some text, and get its embedding vector using Nomic's embedding model."
# )

# # :white_check_mark: Launch the app
# if __name__ == "__main__":
#     interface.launch()


import os
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModel
import time 
import torch

# :white_check_mark: Setup environment
os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
    raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# :white_check_mark: Load model and tokenizer
text_tokenizer = AutoTokenizer.from_pretrained(
    "nomic-ai/nomic-embed-text-v1.5",
    trust_remote_code=True,
    token=hf_token,
    cache_dir=os.environ["HF_HOME"]
)
text_model = AutoModel.from_pretrained(
    "nomic-ai/nomic-embed-text-v1.5",
    trust_remote_code=True,
    token=hf_token,
    cache_dir=os.environ["HF_HOME"]
).to(device)  # Move model to GPU if available


def get_text_embeddings(text):
    """Returns embedding as NumPy array"""
    inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = text_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings[0].detach().cpu().numpy()

def format_embedding(embedding):
    """Formats the embedding as 'embedding: [x.xx, x.xx, ...]'"""
    formatted = ", ".join([f"{x:.3f}" for x in embedding])
    return f"embedding: [{formatted}]"
import json

def embed_text_interface(text):
    strt_time = time.time()
    embedding = get_text_embeddings(text)
    print(f"Total time taken by nomic to embed: {time.time()-strt_time}")
    
    # Convert to list and format for display
    embedding_list = embedding.tolist()
    formatted = {
        "embedding": embedding_list,
        "shape": len(embedding_list)
    }
    return formatted

interface = gr.Interface(
    fn=embed_text_interface,
    inputs=gr.Textbox(label="Input Text", lines=5),
    outputs=gr.JSON(label="Embedding Vector"),  # Using JSON output
    title="Nomic Text Embeddings",
    description="Returns embeddings as a Python list",
    examples=[
        ["This is a sample text"],
        ["Another example sentence"]
    ]
)
if __name__ == "__main__":
    interface.queue(api_open=True).launch()