AreejMehboob commited on
Commit
f68fb03
·
verified ·
1 Parent(s): 97a4dbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -24
app.py CHANGED
@@ -1,29 +1,47 @@
 
1
  import gradio as gr
2
- from sentence_transformers import SentenceTransformer
3
-
4
- # Load the Nomic embedding model
5
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
6
-
7
- def get_embedding(text):
8
- """Generate an embedding for the input text using Nomic encoder."""
9
- if not text.strip():
10
- return "Please provide some text."
11
-
12
- # Generate embedding
13
- embedding = model.encode([text])[0] # Get the first (and only) embedding
14
-
15
- # Return embedding as list (more user-friendly in the UI)
16
- return embedding.tolist()
17
-
18
- # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  interface = gr.Interface(
20
- fn=get_embedding,
21
- inputs=gr.Textbox(lines=5, placeholder="Enter text to embed..."),
22
- outputs=gr.JSON(),
23
- title="Text Embedding with Nomic Encoder",
24
- description="Enter text to get its embedding vector using the Nomic Encoder model."
25
  )
26
-
27
- # Launch the interface
28
  if __name__ == "__main__":
29
  interface.launch()
 
1
+ import os
2
  import gradio as gr
3
+ import numpy as np
4
+ from transformers import AutoTokenizer, AutoModel
5
+ # :white_check_mark: Setup environment
6
+ os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
7
+ hf_token = os.environ.get("HF_TOKEN")
8
+ if not hf_token:
9
+ raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")
10
+ # :white_check_mark: Load model and tokenizer
11
+ text_tokenizer = AutoTokenizer.from_pretrained(
12
+ "nomic-ai/nomic-embed-text-v1.5",
13
+ trust_remote_code=True,
14
+ token=hf_token,
15
+ cache_dir=os.environ["HF_HOME"]
16
+ )
17
+ text_model = AutoModel.from_pretrained(
18
+ "nomic-ai/nomic-embed-text-v1.5",
19
+ trust_remote_code=True,
20
+ token=hf_token,
21
+ cache_dir=os.environ["HF_HOME"]
22
+ )
23
+ # :white_check_mark: Embedding function
24
+ def get_text_embeddings(text):
25
+ """
26
+ Converts input text into a dense embedding using the Nomic embedding model.
27
+ These embeddings are used to query Qdrant for semantically relevant document chunks.
28
+ """
29
+ inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
30
+ outputs = text_model(**inputs)
31
+ embeddings = outputs.last_hidden_state.mean(dim=1)
32
+ return embeddings[0].detach().numpy()
33
+ # :white_check_mark: Gradio interface function
34
+ def embed_text_interface(text):
35
+ embedding = get_text_embeddings(text)
36
+ return str(embedding)
37
+ # :white_check_mark: Gradio UI
38
  interface = gr.Interface(
39
+ fn=embed_text_interface,
40
+ inputs=gr.Textbox(label="Enter text to embed", lines=5),
41
+ outputs=gr.Textbox(label="Embedding vector"),
42
+ title="Text Embedding with Nomic AI",
43
+ description="Enter some text, and get its embedding vector using Nomic's embedding model."
44
  )
45
+ # :white_check_mark: Launch the app
 
46
  if __name__ == "__main__":
47
  interface.launch()