Prathamesh1420 commited on
Commit
8077ead
·
verified ·
1 Parent(s): 9fd14cb

Create retriever.py

Browse files
Files changed (1) hide show
  1. retriever.py +30 -0
retriever.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ model = SentenceTransformer("all-MiniLM-L6-v2")
7
+
8
+ def build_embeddings(video_folder="data/videos", output_file="data/video_metadata.json"):
9
+ metadata = []
10
+ for file in os.listdir(video_folder):
11
+ if file.endswith((".mp4", ".mov", ".avi")):
12
+ description = file.replace("_", " ").replace(".mp4", "").replace(".mov", "").replace(".avi", "")
13
+ embedding = model.encode(description).tolist()
14
+ metadata.append({
15
+ "file": file,
16
+ "description": description,
17
+ "embedding": embedding
18
+ })
19
+ with open(output_file, "w") as f:
20
+ json.dump(metadata, f)
21
+
22
+ def find_video(user_query, metadata_file="data/video_metadata.json"):
23
+ with open(metadata_file, "r") as f:
24
+ videos = json.load(f)
25
+
26
+ query_embedding = model.encode(user_query).reshape(1, -1)
27
+ similarities = [cosine_similarity([v["embedding"]], query_embedding)[0][0] for v in videos]
28
+ best_idx = similarities.index(max(similarities))
29
+
30
+ return videos[best_idx]["file"], videos[best_idx]["description"]