Spaces:

traversaal-internal
/

nomic-embedding

Sleeping

App Files Files Community

AreejMehboob commited on 16 days ago

Commit

f68fb03

verified ·

1 Parent(s): 97a4dbc

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -24

app.py CHANGED Viewed

@@ -1,29 +1,47 @@
 import gradio as gr
-from sentence_transformers import SentenceTransformer
-# Load the Nomic embedding model
-model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
-def get_embedding(text):
-    """Generate an embedding for the input text using Nomic encoder."""
-    if not text.strip():
-        return "Please provide some text."
-    # Generate embedding
-    embedding = model.encode([text])[0]  # Get the first (and only) embedding
-    # Return embedding as list (more user-friendly in the UI)
-    return embedding.tolist()
-# Create Gradio interface
 interface = gr.Interface(
-    fn=get_embedding,
-    inputs=gr.Textbox(lines=5, placeholder="Enter text to embed..."),
-    outputs=gr.JSON(),
-    title="Text Embedding with Nomic Encoder",
-    description="Enter text to get its embedding vector using the Nomic Encoder model."
 )
-# Launch the interface
 if __name__ == "__main__":
     interface.launch()

+import os
 import gradio as gr
+import numpy as np
+from transformers import AutoTokenizer, AutoModel
+# :white_check_mark: Setup environment
+os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
+hf_token = os.environ.get("HF_TOKEN")
+if not hf_token:
+    raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")
+# :white_check_mark: Load model and tokenizer
+text_tokenizer = AutoTokenizer.from_pretrained(
+    "nomic-ai/nomic-embed-text-v1.5",
+    trust_remote_code=True,
+    token=hf_token,
+    cache_dir=os.environ["HF_HOME"]
+)
+text_model = AutoModel.from_pretrained(
+    "nomic-ai/nomic-embed-text-v1.5",
+    trust_remote_code=True,
+    token=hf_token,
+    cache_dir=os.environ["HF_HOME"]
+)
+# :white_check_mark: Embedding function
+def get_text_embeddings(text):
+    """
+    Converts input text into a dense embedding using the Nomic embedding model.
+    These embeddings are used to query Qdrant for semantically relevant document chunks.
+    """
+    inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+    outputs = text_model(**inputs)
+    embeddings = outputs.last_hidden_state.mean(dim=1)
+    return embeddings[0].detach().numpy()
+# :white_check_mark: Gradio interface function
+def embed_text_interface(text):
+    embedding = get_text_embeddings(text)
+    return str(embedding)
+# :white_check_mark: Gradio UI
 interface = gr.Interface(
+    fn=embed_text_interface,
+    inputs=gr.Textbox(label="Enter text to embed", lines=5),
+    outputs=gr.Textbox(label="Embedding vector"),
+    title="Text Embedding with Nomic AI",
+    description="Enter some text, and get its embedding vector using Nomic's embedding model."
 )
+# :white_check_mark: Launch the app
 if __name__ == "__main__":
     interface.launch()