Patryk Ptasiński commited on
Commit
3726350
·
1 Parent(s): 1be0f7d
Files changed (2) hide show
  1. app.py +16 -1
  2. test_models.sh +0 -45
app.py CHANGED
@@ -1,11 +1,23 @@
1
  from typing import List, Dict, Any
2
  import json
 
3
 
4
  import gradio as gr
5
  from fastapi import FastAPI
6
  from fastapi.responses import JSONResponse
7
  from sentence_transformers import SentenceTransformer
8
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Available models
10
  MODELS = {
11
  "nomic-ai/nomic-embed-text-v1.5": {"trust_remote_code": True},
@@ -58,12 +70,14 @@ def load_model(model_name: str):
58
  # Load the new model
59
  trust_remote_code = MODELS.get(model_name, {}).get("trust_remote_code", False)
60
  try:
 
61
  current_model = SentenceTransformer(
62
  model_name,
63
  trust_remote_code=trust_remote_code,
64
- device='cpu'
65
  )
66
  current_model_name = model_name
 
67
  except Exception as e:
68
  raise ValueError(f"Failed to load model '{model_name}': {str(e)}")
69
 
@@ -148,6 +162,7 @@ with gr.Blocks(title="Multi-Model Text Embeddings", css="""
148
  """) as app:
149
  gr.Markdown("# Multi-Model Text Embeddings")
150
  gr.Markdown("Generate embeddings for your text using 28+ state-of-the-art embedding models including top MTEB performers like NV-Embed-v2, gte-Qwen2-7B-instruct, Nomic, BGE, Snowflake, IBM Granite, Qwen3, Stella, and more.")
 
151
 
152
  # Model selector dropdown (allows custom input)
153
  model_dropdown = gr.Dropdown(
 
1
  from typing import List, Dict, Any
2
  import json
3
+ import torch
4
 
5
  import gradio as gr
6
  from fastapi import FastAPI
7
  from fastapi.responses import JSONResponse
8
  from sentence_transformers import SentenceTransformer
9
 
10
+ # Device detection - use GPU if available, otherwise CPU
11
+ def get_device():
12
+ if torch.cuda.is_available():
13
+ print("🚀 GPU detected - using CUDA for acceleration")
14
+ return 'cuda'
15
+ else:
16
+ print("💻 Using CPU for inference")
17
+ return 'cpu'
18
+
19
+ DEVICE = get_device()
20
+
21
  # Available models
22
  MODELS = {
23
  "nomic-ai/nomic-embed-text-v1.5": {"trust_remote_code": True},
 
70
  # Load the new model
71
  trust_remote_code = MODELS.get(model_name, {}).get("trust_remote_code", False)
72
  try:
73
+ print(f"Loading model '{model_name}' on {DEVICE}")
74
  current_model = SentenceTransformer(
75
  model_name,
76
  trust_remote_code=trust_remote_code,
77
+ device=DEVICE
78
  )
79
  current_model_name = model_name
80
+ print(f"✅ Model '{model_name}' loaded successfully on {DEVICE}")
81
  except Exception as e:
82
  raise ValueError(f"Failed to load model '{model_name}': {str(e)}")
83
 
 
162
  """) as app:
163
  gr.Markdown("# Multi-Model Text Embeddings")
164
  gr.Markdown("Generate embeddings for your text using 28+ state-of-the-art embedding models including top MTEB performers like NV-Embed-v2, gte-Qwen2-7B-instruct, Nomic, BGE, Snowflake, IBM Granite, Qwen3, Stella, and more.")
165
+ gr.Markdown(f"**Device**: {DEVICE.upper()} {'🚀' if DEVICE == 'cuda' else '💻'}")
166
 
167
  # Model selector dropdown (allows custom input)
168
  model_dropdown = gr.Dropdown(
test_models.sh DELETED
@@ -1,45 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Test script for all embedding models
4
- BASE_URL="https://ipepe-nomic-embeddings.hf.space"
5
- TEST_TEXT="Hello world test"
6
-
7
- echo "Testing all embedding models..."
8
- echo "================================="
9
-
10
- # Get list of models
11
- MODELS=$(curl -s "${BASE_URL}/models" | grep -o '"[^"]*"' | grep -E "(nomic|BAAI|sentence|Snowflake|granite|Qwen|stella|nvidia|Alibaba|intfloat)" | tr -d '"')
12
-
13
- # Test each model
14
- for model in $MODELS; do
15
- echo "Testing: $model"
16
-
17
- # Test with 30 second timeout
18
- response=$(timeout 30 curl -X POST "${BASE_URL}/embed" \
19
- -H "Content-Type: application/json" \
20
- -d "{\"text\": \"$TEST_TEXT\", \"model\": \"$model\"}" \
21
- -w "\nHTTP_STATUS:%{http_code}" \
22
- -s 2>/dev/null)
23
-
24
- if [ $? -eq 124 ]; then
25
- echo " ❌ TIMEOUT (>30s)"
26
- else
27
- status=$(echo "$response" | grep "HTTP_STATUS" | cut -d: -f2)
28
-
29
- if [ "$status" = "200" ]; then
30
- # Check if response contains embedding
31
- if echo "$response" | grep -q '"embedding":\['; then
32
- echo " ✅ SUCCESS"
33
- else
34
- echo " ⚠️ PARTIAL - No embedding in response"
35
- fi
36
- else
37
- # Extract error message
38
- error_msg=$(echo "$response" | grep -o '"error":"[^"]*"' | cut -d'"' -f4)
39
- echo " ❌ ERROR ($status): $error_msg"
40
- fi
41
- fi
42
- echo ""
43
- done
44
-
45
- echo "Testing complete!"